monitoring/templates/prometheus/rules/traefik.yml

17 lines
542 B
YAML

# vi: syntax=yaml
groups:
- name: Traefik
rules:
- alert: TraefikHighHttp5xxErrorRateService
expr: 'sum(rate(traefik_service_requests_total{code=~"5.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5'
for: 1m
labels:
severity: critical
annotations:
summary: Traefik high HTTP 5xx error rate service (instance {{ $labels.instance }})
description: "Traefik service 5xx error rate is above 5%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"