Update rendered example

2024-04-20 23:57:30 +02:00 · 2024-04-20 23:57:30 +02:00 · b9b1386149
parent ea1dc23cbb
commit b9b1386149
1 changed files with 97 additions and 97 deletions
--- a/example/monitoring-services.nomad.hcl
+++ b/example/monitoring-services.nomad.hcl
@ -579,103 +579,6 @@ _EOT

 groups:

-# Prometheus
- name: Prometheus
-  rules:
-
-  - alert: PrometheusTargetMissing
-    expr: up{job!~"sftp-PR\\d+"} == 0
-    for: 5m
-    labels:
-      severity: critical
-    annotations:
-      summary: Prometheus target missing (job {{ $labels.job }}, instance {{ $labels.instance }})
-      description: "A Prometheus target has disappeared. An exporter might be crashed.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusTooManyRestarts
-    expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 3
-    for: 1m
-    labels:
-      severity: warning
-    annotations:
-      summary: Prometheus too many restarts (job {{ $labels.job }}, instance {{ $labels.instance }})
-      description: "Prometheus has restarted more than 3 times in the last 15 minutes. It might be crashlooping.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusNotConnectedToAlertmanager
-    expr: prometheus_notifications_alertmanagers_discovered < 1
-    for: 2m
-    labels:
-      severity: critical
-    annotations:
-      summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }})
-      description: "Prometheus cannot connect the alertmanager\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusRuleEvaluationFailures
-    expr: increase(prometheus_rule_evaluation_failures_total[3m]) > 0
-    for: 0m
-    labels:
-      severity: critical
-    annotations:
-      summary: Prometheus rule evaluation failures (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusRuleEvaluationSlow
-    expr: prometheus_rule_group_last_duration_seconds > prometheus_rule_group_interval_seconds
-    for: 5m
-    labels:
-      severity: warning
-    annotations:
-      summary: Prometheus rule evaluation slow (instance {{ $labels.instance }})
-      description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusNotificationsBacklog
-    expr: min_over_time(prometheus_notifications_queue_length[10m]) > 0
-    for: 0m
-    labels:
-      severity: warning
-    annotations:
-      summary: Prometheus notifications backlog (instance {{ $labels.instance }})
-      description: "The Prometheus notification queue has not been empty for 10 minutes\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusAlertmanagerNotificationFailing
-    expr: rate(alertmanager_notifications_failed_total[1m]) > 0
-    for: 0m
-    labels:
-      severity: critical
-    annotations:
-      summary: Prometheus AlertManager notification failing (instance {{ $labels.instance }})
-      description: "Alertmanager is failing sending notifications\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusTargetScrapingSlow
-    expr: prometheus_target_interval_length_seconds{quantile="0.9"} / on (interval, instance, job) prometheus_target_interval_length_seconds{quantile="0.5"} > 1.05
-    for: 5m
-    labels:
-      severity: warning
-    annotations:
-      summary: Prometheus target scraping slow (instance {{ $labels.instance }})
-      description: "Prometheus is scraping exporters slowly since it exceeded the requested interval time. Your Prometheus server is under-provisioned.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-  - alert: PrometheusTsdbWalCorruptions
-    expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0
-    for: 0m
-    labels:
-      severity: critical
-    annotations:
-      summary: Prometheus TSDB WAL corruptions (instance {{ $labels.instance }})
-      description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
-
-
-_EOT
-        destination     = "local/rules/prometheus.yml"
-        left_delimiter  = "{{{"
-        right_delimiter = "}}}"
-      }
-      template {
-        data            = <<_EOT
-# vi: syntax=yaml
-
-groups:
-
 - name: Traefik

  rules:
@ -829,6 +732,103 @@ _EOT
        left_delimiter  = "{{{"
        right_delimiter = "}}}"
      }
+      template {
+        data            = <<_EOT
+# vi: syntax=yaml
+
+groups:
+
+# Prometheus
+- name: Prometheus
+  rules:
+
+  - alert: PrometheusTargetMissing
+    expr: up == 0
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus target missing (job {{ $labels.job }}, instance {{ $labels.instance }})
+      description: "A Prometheus target has disappeared. An exporter might be crashed.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusTooManyRestarts
+    expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 3
+    for: 1m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus too many restarts (job {{ $labels.job }}, instance {{ $labels.instance }})
+      description: "Prometheus has restarted more than 3 times in the last 15 minutes. It might be crashlooping.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusNotConnectedToAlertmanager
+    expr: prometheus_notifications_alertmanagers_discovered < 1
+    for: 2m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }})
+      description: "Prometheus cannot connect the alertmanager\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusRuleEvaluationFailures
+    expr: increase(prometheus_rule_evaluation_failures_total[3m]) > 0
+    for: 0m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus rule evaluation failures (instance {{ $labels.instance }})
+      description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusRuleEvaluationSlow
+    expr: prometheus_rule_group_last_duration_seconds > prometheus_rule_group_interval_seconds
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus rule evaluation slow (instance {{ $labels.instance }})
+      description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusNotificationsBacklog
+    expr: min_over_time(prometheus_notifications_queue_length[10m]) > 0
+    for: 0m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus notifications backlog (instance {{ $labels.instance }})
+      description: "The Prometheus notification queue has not been empty for 10 minutes\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusAlertmanagerNotificationFailing
+    expr: rate(alertmanager_notifications_failed_total[1m]) > 0
+    for: 0m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus AlertManager notification failing (instance {{ $labels.instance }})
+      description: "Alertmanager is failing sending notifications\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusTargetScrapingSlow
+    expr: prometheus_target_interval_length_seconds{quantile="0.9"} / on (interval, instance, job) prometheus_target_interval_length_seconds{quantile="0.5"} > 1.05
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus target scraping slow (instance {{ $labels.instance }})
+      description: "Prometheus is scraping exporters slowly since it exceeded the requested interval time. Your Prometheus server is under-provisioned.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusTsdbWalCorruptions
+    expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0
+    for: 0m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus TSDB WAL corruptions (instance {{ $labels.instance }})
+      description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+
+_EOT
+        destination     = "local/rules/prometheus.yml"
+        left_delimiter  = "{{{"
+        right_delimiter = "}}}"
+      }

      template {
        data            = <<_EOT