Remove debug comment and update rendered example

This commit is contained in:
Daniel Berteaud 2024-04-02 12:16:14 +02:00
parent 8b04f6c7b6
commit 5c6a3493c6
13 changed files with 21 additions and 132 deletions

View File

@ -1,4 +1,4 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
ARG AM_VERSION=0.27.0
@ -13,7 +13,7 @@ RUN set -eux &&\
tar xzf alertmanager-${AM_VERSION}.linux-amd64.tar.gz &&\
mv alertmanager-${AM_VERSION}.linux-amd64 /opt/alertmanager
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
ENV PATH=/opt/alertmanager:$PATH

View File

@ -1,4 +1,4 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
ARG BLACKBOX_EXPORTER_VERSION=0.24.0
@ -12,7 +12,7 @@ RUN set -eux &&\
mkdir blackbox &&\
mv blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64/blackbox_exporter /usr/local/bin/blackbox_exporter
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
ENV BLACKBOX_CONF=/etc/blackbox.yml

View File

@ -1,4 +1,4 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
ARG CONSUL_EXPORTER_VERSION=0.11.0
@ -11,7 +11,7 @@ RUN set -eux &&\
tar xvf consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64.tar.gz &&\
mv consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64/consul_exporter /usr/local/bin/consul_exporter
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
COPY --from=builder /usr/local/bin/consul_exporter /usr/local/bin/consul_exporter

View File

@ -1,4 +1,4 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
ARG GRAFANA_VERSION=10.4.1 \
GRAFANA_PLUGINS=grafana-clock-panel,grafana-piechart-panel
@ -23,7 +23,7 @@ RUN set -eux &&\
IFS=',' &&\
for PLUGIN in ${GRAFANA_PLUGINS}; do /opt/grafana/bin/grafana cli --pluginsDir /opt/grafana/plugins plugins install ${PLUGIN}; done
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
ENV PATH=/opt/grafana/bin/:${PATH} \

View File

@ -1,4 +1,4 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
ARG LOKI_VERSION=2.9.6
@ -12,7 +12,7 @@ RUN set -eux &&\
mkdir /opt/loki &&\
mv loki-linux-amd64 /opt/loki/loki
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
ENV PATH=/opt/loki:$PATH

View File

@ -1,4 +1,4 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
ARG EXPORTER_VERSION=1.7.0
@ -16,7 +16,7 @@ RUN set -euxo pipefail &&\
tar xvzf node_exporter-${EXPORTER_VERSION}.linux-amd64.tar.gz &&\
mv node_exporter-${EXPORTER_VERSION}.linux-amd64/node_exporter /usr/local/bin/node_exporter
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
COPY --from=builder --chown=root:root --chmod=755 /usr/local/bin/node_exporter /usr/local/bin/node_exporter

View File

@ -7,7 +7,7 @@ RUN set -eux &&\
cd nomad-vector-logger &&\
CGO_ENABLED=0 go build -ldflags="-s -w" -o /nomad-vector-logger
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
RUN set -euxo pipefail &&\

View File

@ -1,7 +1,7 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
ARG PING_EXPORTER_VERSION=1.1.2
ARG PING_EXPORTER_VERSION=1.1.3
ADD https://github.com/czerwonk/ping_exporter/releases/download/${PING_EXPORTER_VERSION}/ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz /tmp
ADD https://github.com/czerwonk/ping_exporter/releases/download/${PING_EXPORTER_VERSION}/ping_exporter_${PING_EXPORTER_VERSION}_checksums.txt /tmp
@ -15,7 +15,7 @@ RUN set -eux &&\
tar xvf ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz &&\
mv ping_exporter /usr/local/bin/
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
COPY --from=builder /usr/local/bin/ping_exporter /usr/local/bin/ping_exporter

View File

@ -1,4 +1,4 @@
FROM danielberteaud/alpine:24.3-1 AS builder
FROM danielberteaud/alpine:24.4-1 AS builder
ARG PROM_VERSION=2.51.1
@ -16,7 +16,7 @@ RUN set -eux &&\
rm -f prometheus-${PROM_VERSION}.linux-amd64.tar.gz &&\
mv prometheus-${PROM_VERSION}.linux-amd64 /opt/prometheus
FROM danielberteaud/alpine:24.3-1
FROM danielberteaud/alpine:24.4-1
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
ENV PATH=/opt/prometheus:$PATH

View File

@ -405,6 +405,8 @@ _EOT
}
# Alert rules
# monitoring
# if file.Exists monitoring
template {
data = <<_EOT
# vi: syntax=yaml
@ -488,30 +490,6 @@ _EOT
groups:
- name: JVM
rules:
- alert: JvmMemoryFillingUp
expr: '(sum by (instance)(jvm_memory_used_bytes{area="heap"}) / sum by (instance)(jvm_memory_max_bytes{area="heap"})) * 100 > 90'
for: 2m
labels:
severity: warning
annotations:
summary: JVM memory filling up (instance {{ $labels.instance }})
description: "JVM memory is filling up (> 90%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
_EOT
destination = "local/rules/jvm.yml"
left_delimiter = "{{{"
right_delimiter = "}}}"
}
template {
data = <<_EOT
# vi: syntax=yaml
groups:
- name: Nomad
rules:
@ -604,94 +582,6 @@ _EOT
groups:
- name: Postgres
rules:
- alert: PostgresqlDown
expr: 'pg_up == 0'
for: 0m
labels:
severity: critical
annotations:
summary: Postgresql down (instance {{ $labels.instance }})
description: "Postgresql instance is down\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PostgresTooManyRestarts
expr: changes(process_start_time_seconds{job="pg"}[15m]) > 3
for: 1m
labels:
severity: warning
annotations:
summary: Postgres too many restarts (instance {{ $labels.instance }})
description: "Postgres server has restarted more than 3 times in the last 15 minutes. It might be crashlooping.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PostgresqlTooManyConnections
expr: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > pg_settings_max_connections * 0.8'
for: 2m
labels:
severity: warning
annotations:
summary: Postgresql too many connections (instance {{ $labels.instance }})
description: "PostgreSQL instance has too many connections (> 80%).\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PostgresqlDeadLocks
expr: 'increase(pg_stat_database_deadlocks{datname!~"template.*|postgres"}[1m]) > 5'
for: 0m
labels:
severity: warning
annotations:
summary: Postgresql dead locks (instance {{ $labels.instance }})
description: "PostgreSQL has dead-locks\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# - alert: PostgresqlHighRollbackRate
# expr: 'rate(pg_stat_database_xact_rollback{datname!~"template.*"}[3m]) / rate(pg_stat_database_xact_commit{datname!~"template.*"}[3m]) > 0.05'
# for: 0m
# labels:
# severity: warning
# annotations:
# summary: Postgresql high rollback rate (instance {{ $labels.instance }})
# description: "Ratio of transactions being aborted compared to committed is > 5 %\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PostgresqlHighRateStatementTimeout
expr: 'rate(postgresql_errors_total{type="statement_timeout"}[1m]) > 3'
for: 0m
labels:
severity: critical
annotations:
summary: Postgresql high rate statement timeout (instance {{ $labels.instance }})
description: "Postgres transactions showing high rate of statement timeouts\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PostgresqlHighRateDeadlock
expr: 'increase(postgresql_errors_total{type="deadlock_detected"}[1m]) > 1'
for: 0m
labels:
severity: critical
annotations:
summary: Postgresql high rate deadlock (instance {{ $labels.instance }})
description: "Postgres detected deadlocks\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PostgresqlTooManyLocksAcquired
expr: '((sum (pg_locks_count)) / (pg_settings_max_locks_per_transaction * pg_settings_max_connections)) > 0.20'
for: 2m
labels:
severity: critical
annotations:
summary: Postgresql too many locks acquired (instance {{ $labels.instance }})
description: "Too many locks acquired on the database. If this alert happens frequently, we may need to increase the postgres setting max_locks_per_transaction.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
_EOT
destination = "local/rules/postgres.yml"
left_delimiter = "{{{"
right_delimiter = "}}}"
}
template {
data = <<_EOT
# vi: syntax=yaml
groups:
# Prometheus
- name: Prometheus
rules:
@ -1297,6 +1187,7 @@ _EOT
left_delimiter = "{{{"
right_delimiter = "}}}"
}
# monitoring_common
# A client cert, to connect to the AlertManager API
template {

View File

@ -84,9 +84,7 @@ _EOT
# Alert rules
[[- range $bundle := file.ReadDir "bundles" ]]
# [[ $bundle ]]
[[- if file.Exists (printf "bundles/%s/templates/prometheus/rules" $bundle) ]]
# if file.Exists [[ $bundle ]]
[[- range $tpl := file.ReadDir (printf "bundles/%s/templates/prometheus/rules" $bundle) ]]
[[- if not (file.Exists (printf "prometheus/rules/%s" $tpl)) ]]
template {