52 lines
2.1 KiB
YAML
52 lines
2.1 KiB
YAML
# vi: syntax=yaml
|
|
|
|
groups:
|
|
|
|
- name: Nomad
|
|
rules:
|
|
|
|
- alert: NomadJobFailed
|
|
expr: 'delta(nomad_nomad_job_summary_failed[30m]) > 0'
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Nomad job failed (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
|
|
description: "Nomad job failed\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: NomadJobLost
|
|
expr: 'nomad_nomad_job_summary_lost > 0'
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Nomad job lost (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
|
|
description: "Nomad job lost\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: NomadJobQueued
|
|
expr: 'nomad_nomad_job_summary_queued > 0'
|
|
for: 3m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Nomad job queued (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
|
|
description: "Nomad job queued\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: NomadBlockedEvaluation
|
|
expr: 'nomad_nomad_blocked_evals_total_blocked > 0'
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Nomad blocked evaluation (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
|
|
description: "Nomad blocked evaluation\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: NomadTaskOOM
|
|
expr: 'count_over_time(nomad_client_allocs_oom_killed[1h]) > 1'
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Nomad task killed by OOM (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
|
|
description: "Nomad task killed by OOM \n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|