monitoring: warn if no deploy for 10 days

this is not entirely accurate — the lastModified attribute of a flake's
self-input gives the date of the last commit, not the last deploy. But I
figure it's close enough and less obscure to check than reading in the
last date via nix-env.

inspired by: we did no server updates for two weeks.
This commit is contained in:
stuebinm 2024-05-02 22:33:47 +02:00
parent 972a26163a
commit 8c3d3bf6db
2 changed files with 18 additions and 0 deletions

View file

@ -25,4 +25,5 @@ in
# used by monit # used by monit
environment.etc."haccfiles-commit".text = self.rev or self.dirtyRev; environment.etc."haccfiles-commit".text = self.rev or self.dirtyRev;
environment.etc."haccfiles-timestamp".text = builtins.toString self.lastModified;
} }

View file

@ -22,6 +22,20 @@ let
exit 1 exit 1
end end
''; '';
checkDeployAge = pkgs.writeScriptBin "check-deploy-age" ''
#!${lib.getExe pkgs.fish}
set date (date +%s)
# we do this indirection here so monit's config won't change on each deploy
set deploytimestamp (cat /etc/haccfiles-timestamp)
set age (expr $date - $deploytimestamp)
if test $age -ge (expr 3600 \* 24 \* 10)
echo "${config.networking.hostName} has not been deployed since 10 days, perhaps someone should do updates?"
exit 1
end
'';
in in
{ {
mailserver.monitoring = { mailserver.monitoring = {
@ -43,5 +57,8 @@ in
check program is-system-running path ${pkgs.systemd}/bin/systemctl is-system-running check program is-system-running path ${pkgs.systemd}/bin/systemctl is-system-running
if status != 0 then alert if status != 0 then alert
check program check-deploy-age path ${lib.getExe checkDeployAge}
if status == 1 then alert
''; '';
} }