55 lines
2.3 KiB
YAML
55 lines
2.3 KiB
YAML
# vi: syntax=yaml
|
|
|
|
groups:
|
|
|
|
- name: ConsulExporter
|
|
|
|
rules:
|
|
|
|
- alert: ConsulServiceHealthcheckFailed
|
|
# Note : don't check sidecar service health, as they can report a critical state when the main task is pending (eg, waiting for a volume to be available)
|
|
expr: 'consul_catalog_service_node_healthy{service_name!~".*-sidecar-proxy"} == 0'
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Consul service healthcheck failed (service {{ $labels.service_name }})
|
|
description: "Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: ConsulMissingMasterNode
|
|
expr: 'consul_raft_peers < (max_over_time(consul_raft_peers{}[6h]) / 2) + 1'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Consul missing master node (node {{ $labels.node }})
|
|
description: "Numbers of consul raft peers should be 3, in order to preserve quorum.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: ConsulAgentUnhealthy
|
|
expr: 'consul_health_node_status{status="critical"} == 1'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Consul agent unhealthy (node {{ $labels.node }})
|
|
description: "A Consul agent is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: ConsulServiceWarning
|
|
expr: 'consul_health_service_status{status="warning"} == 1'
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Service {{ $labels.service_name }} on node {{ $labels.node }} is in warning state
|
|
description: "Service {{ $labels.service_name }} on node {{ $labels.node }} is in warning state\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: ConsulServiceCritical
|
|
expr: 'consul_health_service_status{status="critical",service_name!~".*-sidecar-proxy"} == 1'
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Service {{ $labels.service_name }} on node {{ $labels.node }} is in critical state
|
|
description: "Service {{ $labels.service_name }} on node {{ $labels.node }} is in critical state\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|