Simon Bruder
444aa962b5
This now mostly replaces ayu, which has been sitting between upstream and the switch the last 6 months. It now also configures routing over Mullvad, which also finally enables IPv6 (I don’t understand how people get away with not providing it natively in 2023). Once I get a WLAN card, it will also host its own access point, which currently still relies on ayu as “dumb AP” (as OpenWRT calls it).
193 lines
6.4 KiB
Nix
193 lines
6.4 KiB
Nix
{ config, lib, pkgs, ... }:
|
||
let
|
||
cfg = config.services.prometheus;
|
||
|
||
mkStaticTargets = targets: lib.singleton { inherit targets; };
|
||
mkStaticTarget = target: mkStaticTargets (lib.singleton target);
|
||
in
|
||
{
|
||
services.prometheus = {
|
||
enable = true;
|
||
listenAddress = "127.0.0.1";
|
||
webExternalUrl = "https://prometheus.sbruder.de";
|
||
globalConfig = {
|
||
scrape_interval = "15s";
|
||
evaluation_interval = "15s";
|
||
};
|
||
extraFlags = [
|
||
"--storage.tsdb.retention.time=90d"
|
||
"--web.enable-admin-api"
|
||
];
|
||
|
||
alertmanagers = [
|
||
{
|
||
static_configs = mkStaticTarget "${cfg.alertmanager.listenAddress}:${toString cfg.alertmanager.port}";
|
||
path_prefix = "/alertmanager/";
|
||
}
|
||
];
|
||
alertmanager = {
|
||
enable = true;
|
||
listenAddress = "127.0.0.1";
|
||
webExternalUrl = "https://prometheus.sbruder.de/alertmanager";
|
||
configuration = {
|
||
global.resolve_timeout = "2m";
|
||
|
||
route = {
|
||
receiver = "matrix";
|
||
group_by = [ "alertname" ];
|
||
group_wait = "3m";
|
||
};
|
||
|
||
receivers = [
|
||
{
|
||
name = "matrix";
|
||
webhook_configs = lib.singleton {
|
||
url = (lib.elemAt
|
||
(lib.filter
|
||
({ ID, ... }: ID == "alertmanager_service")
|
||
config.services.go-neb.config.services)
|
||
0).Config.webhook_url;
|
||
};
|
||
}
|
||
];
|
||
};
|
||
};
|
||
|
||
scrapeConfigs = [
|
||
{
|
||
job_name = "prometheus";
|
||
static_configs = mkStaticTarget "localhost:${toString cfg.port}";
|
||
}
|
||
{
|
||
job_name = "node";
|
||
static_configs = mkStaticTargets [
|
||
"fuuko.vpn.sbruder.de:9100"
|
||
"mayushii.vpn.sbruder.de:9100"
|
||
"nunotaba.vpn.sbruder.de:9100"
|
||
"renge.vpn.sbruder.de:9100"
|
||
"hitagi.vpn.sbruder.de:9100"
|
||
"vueko.vpn.sbruder.de:9100"
|
||
];
|
||
}
|
||
{
|
||
job_name = "qbittorrent";
|
||
static_configs = mkStaticTargets [
|
||
"fuuko.vpn.sbruder.de:9561"
|
||
];
|
||
relabel_configs = lib.singleton {
|
||
target_label = "instance";
|
||
source_labels = lib.singleton "__address__";
|
||
regex = "(.*)\\.vpn\\.sbruder\\.de:9561";
|
||
};
|
||
}
|
||
(
|
||
let
|
||
listenerCfg = (lib.elemAt config.services.matrix-synapse.settings.listeners 0);
|
||
in
|
||
{
|
||
job_name = "synapse";
|
||
static_configs = mkStaticTarget "${lib.elemAt listenerCfg.bind_addresses 0}:${toString listenerCfg.port}";
|
||
metrics_path = "/_synapse/metrics";
|
||
relabel_configs = lib.singleton {
|
||
target_label = "instance";
|
||
replacement = "matrix.sbruder.de";
|
||
};
|
||
}
|
||
)
|
||
{
|
||
job_name = "dnsmasq";
|
||
static_configs = mkStaticTarget "fuuko.vpn.sbruder.de:${toString config.services.prometheus.exporters.dnsmasq.port}";
|
||
relabel_configs = lib.singleton {
|
||
target_label = "instance";
|
||
replacement = "fuuko.home.sbruder.de";
|
||
};
|
||
}
|
||
{
|
||
job_name = "hcloud";
|
||
static_configs = mkStaticTarget config.services.hcloud_exporter.listenAddress;
|
||
}
|
||
{
|
||
job_name = "co2";
|
||
static_configs = mkStaticTarget "fuuko.vpn.sbruder.de:9672";
|
||
}
|
||
];
|
||
|
||
rules =
|
||
let
|
||
mkAlert = { name, expr, for ? "1m", description ? null }: {
|
||
alert = name;
|
||
inherit expr for;
|
||
annotations = lib.optionalAttrs (description != null) { inherit description; };
|
||
};
|
||
in
|
||
[
|
||
(lib.generators.toYAML { } {
|
||
groups = lib.singleton {
|
||
name = "alert.rules";
|
||
rules = map mkAlert [
|
||
{
|
||
name = "InstanceDown";
|
||
expr = ''up{instance!~"(nunotaba|hitagi|mayushii|fuuko).vpn.sbruder.de:.*"} == 0'';
|
||
description = "Instance {{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes.";
|
||
}
|
||
{
|
||
name = "SystemdUnitFailed";
|
||
expr = ''node_systemd_unit_state{state="failed"} == 1'';
|
||
description = "Systemd unit {{ $labels.name }} on {{ $labels.instance }} has state failed.";
|
||
}
|
||
{
|
||
name = "NodeHighLoad";
|
||
expr = ''sum by (instance) (node_load15) / count by (instance) (node_cpu_seconds_total{mode="system"}) > 2'';
|
||
for = "15m";
|
||
description = "Node {{ $labels.instance }} is having a per-core load ≥ 2 for the last 15 minutes.";
|
||
}
|
||
{
|
||
name = "NodeHighMemory";
|
||
expr = ''(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes > 0.9'';
|
||
for = "2m";
|
||
description = "Node {{ $labels.instance }} is using more than 90 % of available RAM.";
|
||
}
|
||
{
|
||
name = "TP440ACPIBroken";
|
||
expr = ''node_hwmon_temp_celsius{chip="thermal_thermal_zone0",instance="nunotaba.vpn.sbruder.de:9100",job="node",sensor="temp1"} == 48'';
|
||
for = "10m";
|
||
description = "Thinkpad T440’s ACPI temperature is broken. Its reported temperature is 48 °C for the last 10 minutes. That doesn’t seem right. Try suspending";
|
||
}
|
||
{
|
||
name = "TorrentNoPeers";
|
||
expr = "sum by (instance) (qBittorrent_torrent_connected_leechs) == 0";
|
||
description = "qBittorrent instance {{ $labels.instance }} has no peers. There might be a network connectivity problem";
|
||
}
|
||
];
|
||
};
|
||
})
|
||
];
|
||
};
|
||
|
||
# exporters that are not part of nixpkgs’ prometheus infrastructure
|
||
services.hcloud_exporter = {
|
||
enable = true;
|
||
listenAddress = "127.0.0.1:9501";
|
||
environmentFile = config.sops.secrets.hcloud_exporter-environment.path;
|
||
};
|
||
sops.secrets.hcloud_exporter-environment.sopsFile = ../secrets.yaml;
|
||
|
||
sops.secrets.prometheus-htpasswd = {
|
||
owner = "nginx";
|
||
sopsFile = ../secrets.yaml;
|
||
};
|
||
|
||
services.nginx.virtualHosts."prometheus.sbruder.de" = {
|
||
enableACME = true;
|
||
forceSSL = true;
|
||
|
||
basicAuthFile = config.sops.secrets.prometheus-htpasswd.path;
|
||
|
||
locations = {
|
||
"/".proxyPass = "http://${cfg.listenAddress}:${toString cfg.port}";
|
||
|
||
"/alertmanager/".proxyPass = "http://${cfg.alertmanager.listenAddress}:${toString cfg.alertmanager.port}";
|
||
};
|
||
};
|
||
}
|