81 lines
3.4 KiB
YAML
81 lines
3.4 KiB
YAML
# vi: syntax=yaml
|
|
|
|
groups:
|
|
|
|
- name: Postgres
|
|
|
|
rules:
|
|
|
|
- alert: PostgresqlDown
|
|
expr: 'pg_up == 0'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Postgresql down (instance {{ $labels.instance }})
|
|
description: "Postgresql instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: PostgresTooManyRestarts
|
|
expr: changes(process_start_time_seconds{job="pg"}[15m]) > 3
|
|
for: 1m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Postgres too many restarts (instance {{ $labels.instance }})
|
|
description: "Postgres server has restarted more than 3 times in the last 15 minutes. It might be crashlooping.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: PostgresqlTooManyConnections
|
|
expr: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > pg_settings_max_connections * 0.8'
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Postgresql too many connections (instance {{ $labels.instance }})
|
|
description: "PostgreSQL instance has too many connections (> 80%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: PostgresqlDeadLocks
|
|
expr: 'increase(pg_stat_database_deadlocks{datname!~"template.*|postgres"}[1m]) > 5'
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: Postgresql dead locks (instance {{ $labels.instance }})
|
|
description: "PostgreSQL has dead-locks\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
# - alert: PostgresqlHighRollbackRate
|
|
# expr: 'rate(pg_stat_database_xact_rollback{datname!~"template.*"}[3m]) / rate(pg_stat_database_xact_commit{datname!~"template.*"}[3m]) > 0.05'
|
|
# for: 0m
|
|
# labels:
|
|
# severity: warning
|
|
# annotations:
|
|
# summary: Postgresql high rollback rate (instance {{ $labels.instance }})
|
|
# description: "Ratio of transactions being aborted compared to committed is > 5 %\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: PostgresqlHighRateStatementTimeout
|
|
expr: 'rate(postgresql_errors_total{type="statement_timeout"}[1m]) > 3'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Postgresql high rate statement timeout (instance {{ $labels.instance }})
|
|
description: "Postgres transactions showing high rate of statement timeouts\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: PostgresqlHighRateDeadlock
|
|
expr: 'increase(postgresql_errors_total{type="deadlock_detected"}[1m]) > 1'
|
|
for: 0m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Postgresql high rate deadlock (instance {{ $labels.instance }})
|
|
description: "Postgres detected deadlocks\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|
|
- alert: PostgresqlTooManyLocksAcquired
|
|
expr: '((sum (pg_locks_count)) / (pg_settings_max_locks_per_transaction * pg_settings_max_connections)) > 0.20'
|
|
for: 2m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: Postgresql too many locks acquired (instance {{ $labels.instance }})
|
|
description: "Too many locks acquired on the database. If this alert happens frequently, we may need to increase the postgres setting max_locks_per_transaction.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
|