Some refactor, add grafana
This commit is contained in:
parent
7ed40afe9c
commit
a4d66759e0
|
@ -0,0 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "alertmanager[[ .consul.suffix ]]"
|
||||
Protocol = "http"
|
|
@ -0,0 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "grafana[[ .consul.suffix ]]"
|
||||
Protocol = "http"
|
|
@ -0,0 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "loki[[ .consul.suffix ]]"
|
||||
Protocol = "http"
|
|
@ -1,3 +0,0 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]"
|
||||
Protocol = "http"
|
|
@ -1,3 +0,0 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "[[ .instance ]]-loki[[ .consul.suffix ]]"
|
||||
Protocol = "http"
|
|
@ -1,3 +0,0 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]"
|
||||
Protocol = "http"
|
|
@ -0,0 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "prometheus[[ .consul.suffix ]]"
|
||||
Protocol = "http"
|
|
@ -1,5 +1,5 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]"
|
||||
Name = "alertmanager[[ .consul.suffix ]]"
|
||||
Sources = [
|
||||
{
|
||||
Name = "[[ (merge .monitoring.alertmanager .).traefik.instance ]]"
|
|
@ -0,0 +1,15 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "grafana[[ .consul.suffix ]]"
|
||||
Sources = [
|
||||
{
|
||||
Name = "[[ (merge .monitoring.grafana .monitoring .).traefik.instance ]]"
|
||||
Permissions = [
|
||||
{
|
||||
Action = "allow"
|
||||
HTTP {
|
||||
PathPrefix = "[[ if eq (urlParse .monitoring.grafana.public_url).Path "" ]]/[[ else ]][[ (urlParse .monitoring.grafana.public_url).Path ]][[ end ]]"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,5 +1,5 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "[[ .instance ]]-loki[[ .consul.suffix ]]"
|
||||
Name = "loki[[ .consul.suffix ]]"
|
||||
Sources = [
|
||||
{
|
||||
Name = "[[ (merge .monitoring.loki .monitoring .).traefik.instance ]]"
|
||||
|
@ -13,7 +13,7 @@ Sources = [
|
|||
]
|
||||
},
|
||||
{
|
||||
Name = "[[ .instance ]]-grafana[[ .consul.suffix ]]"
|
||||
Name = "grafana[[ .consul.suffix ]]"
|
||||
Permissions = [
|
||||
{
|
||||
Action = "allow"
|
||||
|
@ -33,7 +33,7 @@ Sources = [
|
|||
},
|
||||
[[- range $idx, $service := coll.Slice "vector-aggregator" "vector-agent" ]]
|
||||
{
|
||||
Name = "[[ $.instance ]]-[[ $service ]][[ $.consul.suffix ]]"
|
||||
Name = "[[ $service ]][[ $.consul.suffix ]]"
|
||||
Permissions = [
|
||||
{
|
||||
Action = "allow"
|
|
@ -1,5 +1,5 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]"
|
||||
Name = "prometheus[[ .consul.suffix ]]"
|
||||
Sources = [
|
||||
{
|
||||
Name = "[[ (merge .monitoring.prometheus .).traefik.instance ]]"
|
||||
|
@ -13,7 +13,7 @@ Sources = [
|
|||
]
|
||||
},
|
||||
{
|
||||
Name = "[[ .instance ]]-grafana[[ .consul.suffix ]]"
|
||||
Name = "grafana[[ .consul.suffix ]]"
|
||||
Permissions = [
|
||||
{
|
||||
# Deny access to the admin API from Grafana
|
|
@ -1,3 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "monitoring-loki"
|
||||
Name = "alertmanager"
|
||||
Protocol = "http"
|
|
@ -1,3 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "monitoring-prometheus"
|
||||
Name = "grafana"
|
||||
Protocol = "http"
|
|
@ -1,3 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "monitoring-alertmanager"
|
||||
Name = "loki"
|
||||
Protocol = "http"
|
|
@ -0,0 +1,3 @@
|
|||
Kind = "service-defaults"
|
||||
Name = "prometheus"
|
||||
Protocol = "http"
|
|
@ -1,5 +1,5 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "monitoring-alertmanager"
|
||||
Name = "alertmanager"
|
||||
Sources = [
|
||||
{
|
||||
Name = "traefik"
|
|
@ -0,0 +1,15 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "grafana"
|
||||
Sources = [
|
||||
{
|
||||
Name = "traefik"
|
||||
Permissions = [
|
||||
{
|
||||
Action = "allow"
|
||||
HTTP {
|
||||
PathPrefix = "/"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
|
@ -1,5 +1,5 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "monitoring-loki"
|
||||
Name = "loki"
|
||||
Sources = [
|
||||
{
|
||||
Name = "traefik"
|
||||
|
@ -13,7 +13,7 @@ Sources = [
|
|||
]
|
||||
},
|
||||
{
|
||||
Name = "monitoring-grafana"
|
||||
Name = "grafana"
|
||||
Permissions = [
|
||||
{
|
||||
Action = "allow"
|
||||
|
@ -32,7 +32,7 @@ Sources = [
|
|||
]
|
||||
},
|
||||
{
|
||||
Name = "monitoring-vector-aggregator"
|
||||
Name = "vector-aggregator"
|
||||
Permissions = [
|
||||
{
|
||||
Action = "allow"
|
||||
|
@ -51,7 +51,7 @@ Sources = [
|
|||
]
|
||||
},
|
||||
{
|
||||
Name = "monitoring-vector-agent"
|
||||
Name = "vector-agent"
|
||||
Permissions = [
|
||||
{
|
||||
Action = "allow"
|
|
@ -1,5 +1,5 @@
|
|||
Kind = "service-intentions"
|
||||
Name = "monitoring-prometheus"
|
||||
Name = "prometheus"
|
||||
Sources = [
|
||||
{
|
||||
Name = "traefik"
|
||||
|
@ -13,7 +13,7 @@ Sources = [
|
|||
]
|
||||
},
|
||||
{
|
||||
Name = "monitoring-grafana"
|
||||
Name = "grafana"
|
||||
Permissions = [
|
||||
{
|
||||
# Deny access to the admin API from Grafana
|
|
@ -18,7 +18,7 @@ job "monitoring-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "monitoring-ping-exporter"
|
||||
name = "ping-exporter"
|
||||
port = "ping"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -27,7 +27,7 @@ job "monitoring-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "monitoring-blackbox-exporter"
|
||||
name = "blackbox-exporter"
|
||||
port = "blackbox"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -35,7 +35,7 @@ job "monitoring-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "monitoring-consul-exporter"
|
||||
name = "consul-exporter"
|
||||
port = "ping"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -44,7 +44,7 @@ job "monitoring-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "monitoring-cluster-exporter"
|
||||
name = "cluster-exporter"
|
||||
port = "cluster"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -77,7 +77,7 @@ _EOT
|
|||
|
||||
|
||||
vault {
|
||||
policies = ["monitoring-consul-exporter"]
|
||||
policies = ["consul-exporter"]
|
||||
env = false
|
||||
disable_file = true
|
||||
change_mode = "noop"
|
||||
|
@ -102,7 +102,7 @@ _EOT
|
|||
|
||||
template {
|
||||
data = <<_EOT
|
||||
CONSUL_HTTP_TOKEN={{ with secret "consul/creds/monitoring-consul-exporter" }}{{ .Data.token }}{{ end }}
|
||||
CONSUL_HTTP_TOKEN={{ with secret "consul/creds/consul-exporter" }}{{ .Data.token }}{{ end }}
|
||||
_EOT
|
||||
destination = "secrets/.consul.env"
|
||||
uid = 100000
|
||||
|
@ -151,7 +151,7 @@ _EOT
|
|||
|
||||
|
||||
vault {
|
||||
policies = ["monitoring-cluster-exporter", "metrics"]
|
||||
policies = ["cluster-exporter", "metrics"]
|
||||
env = false
|
||||
disable_file = true
|
||||
change_mode = "noop"
|
||||
|
@ -187,7 +187,7 @@ server {
|
|||
return 405;
|
||||
}
|
||||
|
||||
set $consul_token "{{ with secret "consul/creds/monitoring-cluster-exporter" }}{{ .Data.token }}{{ end }}";
|
||||
set $consul_token "{{ with secret "consul/creds/cluster-exporter" }}{{ .Data.token }}{{ end }}";
|
||||
|
||||
{{- range service "nomad-client" }}
|
||||
location /nomad-client/{{ .Node }} {
|
||||
|
@ -365,7 +365,7 @@ _EOT
|
|||
# Get a Nomad client certificate
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/nomad/issue/monitoring-cluster-exporter" "common_name=metrics-proxy.nomad.consul" "ttl=24h" }}
|
||||
{{- with pkiCert "pki/nomad/issue/cluster-exporter" "common_name=metrics-proxy.nomad.consul" "ttl=24h" }}
|
||||
{{ .Data.Cert }}
|
||||
{{ .Data.Key }}
|
||||
{{- end }}
|
||||
|
@ -389,7 +389,7 @@ _EOT
|
|||
# Same for Consul
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/consul/issue/monitoring-cluster-exporter" "common_name=metrics-proxy.consul.consul" "ttl=24h" }}
|
||||
{{- with pkiCert "pki/consul/issue/cluster-exporter" "common_name=metrics-proxy.consul.consul" "ttl=24h" }}
|
||||
{{ .Data.Cert }}
|
||||
{{ .Data.Key }}
|
||||
{{- end }}
|
|
@ -0,0 +1,59 @@
|
|||
FROM danielberteaud/alpine:24.3-1 AS builder
|
||||
|
||||
ARG GRAFANA_VERSION=10.4.1 \
|
||||
GRAFANA_PLUGINS=grafana-clock-panel,grafana-piechart-panel
|
||||
|
||||
ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz /tmp
|
||||
ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256 /tmp
|
||||
RUN set -eux &&\
|
||||
apk --no-cache add \
|
||||
tar \
|
||||
curl \
|
||||
ca-certificates \
|
||||
bash \
|
||||
gcompat \
|
||||
libc6-compat \
|
||||
&&\
|
||||
ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\
|
||||
cd /tmp &&\
|
||||
echo "$(cat grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256) grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz" | sha256sum -c &&\
|
||||
tar xzf grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz &&\
|
||||
mv grafana-v${GRAFANA_VERSION} /opt/grafana &&\
|
||||
mkdir /opt/grafana/plugins &&\
|
||||
IFS=',' &&\
|
||||
for PLUGIN in ${GRAFANA_PLUGINS}; do /opt/grafana/bin/grafana cli --pluginsDir /opt/grafana/plugins plugins install ${PLUGIN}; done
|
||||
|
||||
FROM danielberteaud/alpine:24.3-1
|
||||
MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
|
||||
|
||||
ENV PATH=/opt/grafana/bin/:${PATH} \
|
||||
GF_PATHS_DATA=/data \
|
||||
GF_PATHS_PLUGINS=/opt/grafana/plugins \
|
||||
GF_LOG_MODE=console
|
||||
|
||||
COPY --from=builder /opt/grafana /opt/grafana
|
||||
RUN set -eux &&\
|
||||
apk --no-cache add \
|
||||
gcompat \
|
||||
libc6-compat \
|
||||
&&\
|
||||
ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\
|
||||
addgroup -g 3000 grafana &&\
|
||||
adduser --system \
|
||||
--ingroup grafana \
|
||||
--disabled-password \
|
||||
--uid 3000 \
|
||||
--home /opt/grafana \
|
||||
--no-create-home \
|
||||
--shell /sbin/nologin \
|
||||
grafana &&\
|
||||
mkdir /data &&\
|
||||
chown -R grafana:grafana /data /opt/grafana/plugins &&\
|
||||
chmod 700 /data
|
||||
|
||||
WORKDIR /opt/grafana
|
||||
USER grafana
|
||||
CMD ["grafana", \
|
||||
"server", \
|
||||
"--homepath=/opt/grafana", \
|
||||
"--packaging=docker"]
|
|
@ -1,6 +1,6 @@
|
|||
FROM danielberteaud/alpine:24.3-1 AS builder
|
||||
|
||||
ARG LOKI_VERSION=2.9.5
|
||||
ARG LOKI_VERSION=2.9.6
|
||||
|
||||
ADD https://github.com/grafana/loki/releases/download/v${LOKI_VERSION}/loki-linux-amd64.zip /tmp
|
||||
ADD https://github.com/grafana/loki/releases/download/v${LOKI_VERSION}/SHA256SUMS /tmp
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
FROM danielberteaud/alpine:24.3-1 AS builder
|
||||
|
||||
ARG PROM_VERSION=2.50.1
|
||||
ARG PROM_VERSION=2.51.0
|
||||
|
||||
ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz /tmp
|
||||
ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/sha256sums.txt /tmp
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
#!/bin/sh
|
||||
# vim: syntax=sh
|
||||
|
||||
vault write consul/roles/monitoring-prometheus \
|
||||
vault write consul/roles/prometheus \
|
||||
ttl=720h \
|
||||
max_ttl=720h \
|
||||
consul_policies="monitoring-prometheus"
|
||||
consul_policies="monitoring"
|
||||
|
||||
vault write consul/roles/monitoring-consul-exporter \
|
||||
vault write consul/roles/consul-exporter \
|
||||
ttl=720h \
|
||||
max_ttl=720h \
|
||||
consul_policies="monitoring-prometheus"
|
||||
consul_policies="monitoring"
|
||||
|
||||
vault write consul/roles/monitoring-cluster-exporter \
|
||||
vault write consul/roles/cluster-exporter \
|
||||
ttl=720h \
|
||||
max_ttl=720h \
|
||||
consul_policies="monitoring-prometheus"
|
||||
consul_policies="monitoring"
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
vault write database/roles/grafana \
|
||||
db_name="postgres" \
|
||||
creation_statements="CREATE ROLE \"{{name}}\" WITH LOGIN PASSWORD '{{password}}' VALID UNTIL '{{expiration}}'; \
|
||||
GRANT \"grafana\" TO \"{{name}}\"; \
|
||||
ALTER ROLE \"{{name}}\" SET role = \"grafana\"" \
|
||||
default_ttl="12h" \
|
||||
max_ttl="720h"
|
||||
|
|
@ -93,8 +93,8 @@ rm -rf ${TMP}
|
|||
|
||||
|
||||
# Create a role for alertmanager
|
||||
vault write pki/monitoring/roles/monitoring-alertmanager \
|
||||
allowed_domains="monitoring" \
|
||||
vault write pki/monitoring/roles/alertmanager \
|
||||
allowed_domains="monitoring.consul" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -106,8 +106,8 @@ vault write pki/monitoring/roles/monitoring-alertmanager \
|
|||
ou="Monitoring"
|
||||
|
||||
# Create a role for prometheus (which will only be a client, for AlertManager)
|
||||
vault write pki/monitoring/roles/monitoring-prometheus \
|
||||
allowed_domains="monitoring" \
|
||||
vault write pki/monitoring/roles/prometheus \
|
||||
allowed_domains="monitoring.consul" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -119,8 +119,8 @@ vault write pki/monitoring/roles/monitoring-prometheus \
|
|||
ou="Monitoring"
|
||||
|
||||
# Create a role for loki (which will only be a client, for AlertManager)
|
||||
vault write pki/monitoring/roles/monitoring-loki \
|
||||
allowed_domains="monitoring" \
|
||||
vault write pki/monitoring/roles/loki \
|
||||
allowed_domains="monitoring.consul" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -133,7 +133,7 @@ vault write pki/monitoring/roles/monitoring-loki \
|
|||
|
||||
# Create a role for metrics exporters (server only)
|
||||
vault write pki/monitoring/roles/metrics \
|
||||
allowed_domains="monitoring" \
|
||||
allowed_domains="monitoring.consul" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -147,7 +147,7 @@ vault write pki/monitoring/roles/metrics \
|
|||
ou="Monitoring"
|
||||
|
||||
# Create a role on the Nomad PKI for the cluster exporter
|
||||
vault write pki/nomad/roles/monitoring-cluster-exporter \
|
||||
vault write pki/nomad/roles/cluster-exporter \
|
||||
allowed_domains='nomad.consul' \
|
||||
allow_subdomains=true \
|
||||
allow_wildcard_certificates=false \
|
||||
|
@ -158,7 +158,7 @@ vault write pki/nomad/roles/monitoring-cluster-exporter \
|
|||
ou="Cluster metrics exporter"
|
||||
|
||||
# Create a role on the Consul PKI for the cluster exporter
|
||||
vault write pki/consul/roles/monitoring-cluster-exporter \
|
||||
vault write pki/consul/roles/cluster-exporter \
|
||||
allowed_domains="consul.consul" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# vim: syntax=sh
|
||||
|
||||
export LC_ALL=C
|
||||
VAULT_KV_PATH=kv/service/monitoring/grafana
|
||||
RAND_CMD="tr -dc A-Za-z0-9\-_\/=~\.+ < /dev/urandom | head -c 50"
|
||||
if ! vault kv list $(dirname ${VAULT_KV_PATH}) 2>/dev/null | grep -q -E "^$(basename ${VAULT_KV_PATH})\$"; then
|
||||
vault kv put ${VAULT_KV_PATH} \
|
||||
secret_key="$(sh -c "${RAND_CMD}")" \
|
||||
|
||||
fi
|
||||
for SECRET in secret_key; do
|
||||
if ! vault kv get -field ${SECRET} ${VAULT_KV_PATH} >/dev/null 2>&1; then
|
||||
vault kv patch ${VAULT_KV_PATH} \
|
||||
${SECRET}=$(sh -c "${RAND_CMD}")
|
||||
fi
|
||||
done
|
||||
|
||||
|
|
@ -18,7 +18,7 @@ job "monitoring-services" {
|
|||
|
||||
|
||||
volume "data" {
|
||||
source = "monitoring-prometheus-data"
|
||||
source = "prometheus-data"
|
||||
type = "csi"
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
|
@ -27,7 +27,7 @@ job "monitoring-services" {
|
|||
|
||||
|
||||
service {
|
||||
name = "monitoring-prometheus"
|
||||
name = "prometheus"
|
||||
port = 9090
|
||||
|
||||
meta {
|
||||
|
@ -173,7 +173,7 @@ _EOT
|
|||
leader = true
|
||||
|
||||
config {
|
||||
image = "danielberteaud/prometheus:2.50.1-1"
|
||||
image = "danielberteaud/prometheus:2.51.0-1"
|
||||
readonly_rootfs = true
|
||||
pids_limit = 200
|
||||
command = "prometheus"
|
||||
|
@ -192,7 +192,7 @@ _EOT
|
|||
|
||||
|
||||
vault {
|
||||
policies = ["monitoring-prometheus"]
|
||||
policies = ["prometheus"]
|
||||
env = false
|
||||
disable_file = true
|
||||
change_mode = "noop"
|
||||
|
@ -225,13 +225,13 @@ alerting:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: dc1
|
||||
relabel_configs:
|
||||
# Only keep alertmanagers
|
||||
- source_labels: [__meta_consul_service]
|
||||
action: keep
|
||||
regex: monitoring-alertmanager-tls
|
||||
regex: alertmanager-tls
|
||||
|
||||
scrape_configs:
|
||||
|
||||
|
@ -245,7 +245,7 @@ scrape_configs:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: dc1
|
||||
relabel_configs:
|
||||
|
||||
|
@ -262,7 +262,7 @@ scrape_configs:
|
|||
|
||||
- source_labels: [__meta_consul_service]
|
||||
regex: (.+)
|
||||
replacement: {{ range $idx, $instance := service "monitoring-cluster-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
|
||||
replacement: {{ range $idx, $instance := service "cluster-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
|
||||
target_label: __address__
|
||||
|
||||
# Rewrite the job labels to the name of the service
|
||||
|
@ -288,7 +288,7 @@ scrape_configs:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: dc1
|
||||
|
||||
relabel_configs:
|
||||
|
@ -348,7 +348,7 @@ scrape_configs:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: dc1
|
||||
|
||||
relabel_configs:
|
||||
|
@ -884,8 +884,8 @@ _EOT
|
|||
# A client cert, to connect to the AlertManager API
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/monitoring/issue/monitoring-prometheus"
|
||||
(printf "common_name=prometheus-%s.monitoring" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "pki/monitoring/issue/prometheus"
|
||||
(printf "common_name=prometheus-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}}
|
||||
{{ .Cert }}
|
||||
{{ .Key }}
|
||||
|
@ -940,7 +940,7 @@ _EOT
|
|||
|
||||
|
||||
volume "data" {
|
||||
source = "monitoring-alertmanager-data"
|
||||
source = "alertmanager-data"
|
||||
type = "csi"
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
|
@ -950,7 +950,7 @@ _EOT
|
|||
|
||||
# This service is used for the different instances of alertmanager to communicate
|
||||
service {
|
||||
name = "monitoring-alertmanager-gossip"
|
||||
name = "alertmanager-gossip"
|
||||
port = "cluster"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -960,7 +960,7 @@ _EOT
|
|||
# This service is used by prometheus. As it needs to be able to reach every instances, it cannot use
|
||||
# the service mesh. The exposed port uses mTLS, so it's safe to expose it outside of the mesh
|
||||
service {
|
||||
name = "monitoring-alertmanager-tls"
|
||||
name = "alertmanager-tls"
|
||||
port = "web-tls"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -970,7 +970,7 @@ _EOT
|
|||
# This service is exposed through the service mesh
|
||||
# and can be used to reach the web interface through Traefik
|
||||
service {
|
||||
name = "monitoring-alertmanager"
|
||||
name = "alertmanager"
|
||||
port = 9093
|
||||
meta {
|
||||
metrics-port = "${NOMAD_HOST_PORT_metrics}"
|
||||
|
@ -1111,7 +1111,7 @@ _EOT
|
|||
|
||||
# This task will handle mTLS to the AlertManager API
|
||||
# And expose it as plain http on 127.0.0.1 for Traefik (through the service mesh) and for the metrics proxy
|
||||
task "tls-proxy" {
|
||||
task "untls-proxy" {
|
||||
driver = "docker"
|
||||
user = 9093
|
||||
|
||||
|
@ -1135,7 +1135,7 @@ _EOT
|
|||
|
||||
|
||||
vault {
|
||||
policies = ["metrics", "monitoring-alertmanager"]
|
||||
policies = ["metrics", "alertmanager"]
|
||||
env = false
|
||||
disable_file = true
|
||||
change_mode = "noop"
|
||||
|
@ -1156,7 +1156,7 @@ server {
|
|||
proxy_ssl_certificate /secrets/alertmanager.bundle.pem;
|
||||
proxy_ssl_certificate_key /secrets/alertmanager.bundle.pem;
|
||||
proxy_ssl_verify on;
|
||||
proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.monitoring;
|
||||
proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.monitoring.consul;
|
||||
proxy_ssl_trusted_certificate /local/monitoring.ca.pem;
|
||||
allow 127.0.0.1;
|
||||
deny all;
|
||||
|
@ -1170,8 +1170,8 @@ _EOT
|
|||
# Certifiate used by AlertManager
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/monitoring/issue/monitoring-alertmanager"
|
||||
(printf "common_name=alertmanager-%s.monitoring" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "pki/monitoring/issue/alertmanager"
|
||||
(printf "common_name=alertmanager-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }}
|
||||
{{ .Cert }}
|
||||
|
@ -1214,7 +1214,7 @@ _EOT
|
|||
|
||||
|
||||
vault {
|
||||
policies = ["metrics", "monitoring-alertmanager"]
|
||||
policies = ["metrics", "alertmanager"]
|
||||
env = false
|
||||
disable_file = true
|
||||
change_mode = "noop"
|
||||
|
@ -1288,7 +1288,7 @@ exec alertmanager \
|
|||
--web.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_web-tls" }} \
|
||||
--cluster.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_cluster" }} \
|
||||
--cluster.advertise-address={{ env "NOMAD_HOST_ADDR_cluster" }} \
|
||||
{{- range service "monitoring-am-gossip" -}}
|
||||
{{- range service "alertmanager-gossip" -}}
|
||||
{{- if not (eq (env "NOMAD_ALLOC_INDEX") (index .ServiceMeta "alloc")) }}
|
||||
--cluster.peer={{ .Address }}:{{ .Port }} \
|
||||
{{ end -}}
|
||||
|
@ -1306,8 +1306,8 @@ _EOT
|
|||
# Certifiate used by AlertManager
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/monitoring/issue/monitoring-alertmanager"
|
||||
(printf "common_name=alertmanager-%s.monitoring" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "pki/monitoring/issue/alertmanager"
|
||||
(printf "common_name=alertmanager-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }}
|
||||
{{ .Cert }}
|
||||
|
@ -1356,7 +1356,7 @@ _EOT
|
|||
|
||||
|
||||
volume "data" {
|
||||
source = "monitoring-loki-data"
|
||||
source = "loki-data"
|
||||
type = "csi"
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
|
@ -1364,7 +1364,7 @@ _EOT
|
|||
|
||||
|
||||
service {
|
||||
name = "monitoring-loki"
|
||||
name = "loki"
|
||||
port = 3100
|
||||
meta {
|
||||
metrics-port = "${NOMAD_HOST_PORT_metrics}"
|
||||
|
@ -1507,14 +1507,14 @@ _EOT
|
|||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "danielberteaud/loki:2.9.5-1"
|
||||
image = "danielberteaud/loki:2.9.6-1"
|
||||
command = "loki"
|
||||
args = ["--config.file=/local/loki.yml"]
|
||||
}
|
||||
|
||||
|
||||
vault {
|
||||
policies = ["monitoring-loki"]
|
||||
policies = ["loki"]
|
||||
env = false
|
||||
disable_file = true
|
||||
change_mode = "noop"
|
||||
|
@ -1570,7 +1570,7 @@ ruler:
|
|||
tls_cert_path: /secrets/loki.bundle.pem
|
||||
tls_key_path: /secrets/loki.bundle.pem
|
||||
tls_server_name: alertmanager.monitoring
|
||||
alertmanager_url: monitoring-alertmanager-tls
|
||||
alertmanager_url: alertmanager-tls
|
||||
enable_alertmanager_discovery: true
|
||||
enable_alertmanager_v2: true
|
||||
enable_api: true
|
||||
|
@ -1609,8 +1609,8 @@ _EOT
|
|||
# A client cert, to connect to the AlertManager API
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/monitoring/issue/monitoring-loki"
|
||||
(printf "common_name=loki-%s.monitoring" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "pki/monitoring/issue/loki"
|
||||
(printf "common_name=loki-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}}
|
||||
{{ .Cert }}
|
||||
{{ .Key }}
|
||||
|
@ -1666,7 +1666,7 @@ _EOT
|
|||
# The main service is the vector source
|
||||
# It will provide access to other services through the mesh (like loki)
|
||||
service {
|
||||
name = "monitoring-vector-aggregator"
|
||||
name = "vector-aggregator"
|
||||
port = 9000
|
||||
meta {
|
||||
metrics-port = "${NOMAD_HOST_PORT_metrics}"
|
||||
|
@ -1678,7 +1678,7 @@ _EOT
|
|||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "monitoring-loki"
|
||||
destination_name = "loki"
|
||||
local_bind_port = 3100
|
||||
# Work arround, see https://github.com/hashicorp/nomad/issues/18538
|
||||
destination_type = "service"
|
||||
|
@ -1909,4 +1909,300 @@ _EOT
|
|||
|
||||
}
|
||||
}
|
||||
|
||||
group "interface" {
|
||||
|
||||
shutdown_delay = "6s"
|
||||
|
||||
network {
|
||||
mode = "bridge"
|
||||
port "metrics" {}
|
||||
}
|
||||
|
||||
|
||||
volume "data" {
|
||||
source = "grafana-data"
|
||||
type = "csi"
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
}
|
||||
|
||||
|
||||
service {
|
||||
name = "grafana"
|
||||
port = 3000
|
||||
|
||||
|
||||
meta {
|
||||
metrics-port = "${NOMAD_HOST_PORT_metrics}"
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
}
|
||||
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "postgres"
|
||||
local_bind_port = 5432
|
||||
# Work arround, see https://github.com/hashicorp/nomad/issues/18538
|
||||
destination_type = "service"
|
||||
}
|
||||
}
|
||||
}
|
||||
sidecar_task {
|
||||
config {
|
||||
args = [
|
||||
"-c",
|
||||
"${NOMAD_SECRETS_DIR}/envoy_bootstrap.json",
|
||||
"-l",
|
||||
"${meta.connect.log_level}",
|
||||
"--concurrency",
|
||||
"${meta.connect.proxy_concurrency}",
|
||||
"--disable-hot-restart"
|
||||
]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 50
|
||||
memory = 64
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
check {
|
||||
name = "health"
|
||||
type = "http"
|
||||
path = "/api/health"
|
||||
expose = true
|
||||
interval = "30s"
|
||||
timeout = "8s"
|
||||
}
|
||||
|
||||
tags = [
|
||||
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.monitoring-grafana.entrypoints=https",
|
||||
"traefik.http.routers.monitoring-grafana.rule=Host(`grafana.example.org`)",
|
||||
"traefik.http.middlewares.csp-monitoring-grafana.headers.contentsecuritypolicy=default-src 'self';font-src 'self' data:;img-src 'self' data:;script-src 'self' 'unsafe-inline' 'unsafe-eval';style-src 'self' 'unsafe-inline';",
|
||||
"traefik.http.routers.monitoring-grafana.middlewares=security-headers@file,rate-limit-std@file,forward-proto@file,inflight-std@file,hsts@file,compression@file,csp-monitoring-grafana",
|
||||
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# The prometheus metrics proxy, adding mTLS to the metrics endpoint
|
||||
task "metrics-proxy" {
|
||||
driver = "docker"
|
||||
user = 8995
|
||||
|
||||
config {
|
||||
image = "nginxinc/nginx-unprivileged:alpine"
|
||||
force_pull = true
|
||||
volumes = [
|
||||
"local/default.conf:/etc/nginx/conf.d/default.conf:ro"
|
||||
]
|
||||
pids_limit = 100
|
||||
}
|
||||
|
||||
lifecycle {
|
||||
hook = "poststart"
|
||||
sidecar = true
|
||||
}
|
||||
|
||||
vault {
|
||||
policies = ["metrics"]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/monitoring/issue/metrics" (printf "ip_sans=%s" (env "NOMAD_HOST_IP_metrics")) }}
|
||||
{{ .Cert }}
|
||||
{{ .Key }}{{ end -}}
|
||||
_EOT
|
||||
destination = "secrets/metrics.bundle.pem"
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{ with secret "pki/monitoring/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
|
||||
_EOT
|
||||
destination = "local/monitoring.ca.pem"
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<_EOT
|
||||
server {
|
||||
listen {{ env "NOMAD_ALLOC_PORT_metrics" }} ssl;
|
||||
http2 on;
|
||||
|
||||
ssl_certificate /secrets/metrics.bundle.pem;
|
||||
ssl_certificate_key /secrets/metrics.bundle.pem;
|
||||
ssl_client_certificate /local/monitoring.ca.pem;
|
||||
ssl_verify_client on;
|
||||
ssl_protocols TLSv1.2 TLSv1.3;
|
||||
ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
|
||||
ssl_session_cache shared:SSL:10m;
|
||||
ssl_session_timeout 1h;
|
||||
ssl_session_tickets off;
|
||||
gzip on;
|
||||
gzip_types
|
||||
text/plain;
|
||||
gzip_vary on;
|
||||
|
||||
server_tokens off;
|
||||
|
||||
if ($request_method !~ ^(GET|HEAD)$ ) {
|
||||
return 405;
|
||||
}
|
||||
location /metrics {
|
||||
proxy_pass http://localhost:3000/metrics;
|
||||
}
|
||||
}
|
||||
_EOT
|
||||
destination = "local/default.conf"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 10
|
||||
memory = 10
|
||||
memory_max = 20
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# Local memcached instance
|
||||
task "memcached" {
|
||||
driver = "docker"
|
||||
user = 11211
|
||||
|
||||
lifecycle {
|
||||
hook = "prestart"
|
||||
sidecar = true
|
||||
}
|
||||
|
||||
config {
|
||||
image = "memcached:alpine"
|
||||
readonly_rootfs = true
|
||||
force_pull = true
|
||||
entrypoint = ["/local/memcached"]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<_EOT
|
||||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
exec memcached -l 127.0.0.1 -p 11211 -m {{ env "NOMAD_MEMORY_LIMIT" | parseInt | subtract 5 }}
|
||||
_EOT
|
||||
destination = "local/memcached"
|
||||
perms = 755
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 10
|
||||
memory = 20
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
task "grafana" {
|
||||
|
||||
driver = "docker"
|
||||
leader = true
|
||||
|
||||
config {
|
||||
image = "danielberteaud/grafana:10.4.1-1"
|
||||
readonly_rootfs = true
|
||||
pids_limit = 100
|
||||
command = "grafana"
|
||||
args = [
|
||||
"server",
|
||||
"--homepath=/opt/grafana",
|
||||
"--config=/secrets/grafana.ini",
|
||||
"--packaging=docker"
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
vault {
|
||||
policies = ["grafana"]
|
||||
env = false
|
||||
disable_file = true
|
||||
change_mode = "noop"
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Use a template block instead of env {} so we can fetch values from vault
|
||||
template {
|
||||
data = <<_EOT
|
||||
LANG=fr_FR.utf8
|
||||
TZ=Europe/Paris
|
||||
_EOT
|
||||
destination = "secrets/.env"
|
||||
perms = 400
|
||||
env = true
|
||||
}
|
||||
|
||||
|
||||
# Basic grafana configuration file
|
||||
template {
|
||||
data = <<_EOT
|
||||
[server]
|
||||
http_addr = 127.0.0.1
|
||||
http_port = 3000
|
||||
root_url = https://grafana.example.org
|
||||
serve_from_sub_path = false
|
||||
|
||||
[database]
|
||||
type = postgres
|
||||
name = grafana
|
||||
host = 127.0.0.1:5432
|
||||
user = {{ with secret "database/creds/grafana" }}{{ .Data.username }}{{ end }}
|
||||
password = {{ with secret "database/creds/grafana" }}{{ .Data.password }}{{ end }}
|
||||
|
||||
|
||||
[remote_cache]
|
||||
type = memcached
|
||||
connstr = 127.0.0.1:11211
|
||||
|
||||
[analytics]
|
||||
reporting_enabled = false
|
||||
check_for_updates = false
|
||||
check_for_plugin_updates = false
|
||||
|
||||
[security]
|
||||
cookie_secure = true
|
||||
cookie_samesite = strict
|
||||
x_xss_protection = true
|
||||
secret_key = {{ with secret "kv/service/monitoring/grafana" }}{{ .Data.data.secret_key }}{{ end }}
|
||||
|
||||
[dataproxy]
|
||||
timeout = 120
|
||||
|
||||
_EOT
|
||||
destination = "secrets/grafana.ini"
|
||||
uid = 103000
|
||||
perms = 400
|
||||
}
|
||||
|
||||
# Mount volume in /data for persistence
|
||||
volume_mount {
|
||||
volume = "data"
|
||||
destination = "/data"
|
||||
}
|
||||
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 256
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
path "pki/monitoring/issue/monitoring-alertmanager" {
|
||||
path "pki/monitoring/issue/alertmanager" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
|
@ -1,20 +1,19 @@
|
|||
|
||||
# Read vault metrics
|
||||
path "sys/metrics" {
|
||||
capabilities = ["read", "list"]
|
||||
}
|
||||
|
||||
# Get a cert for Nomad
|
||||
path "pki/nomad/issue/monitoring-cluster-exporter" {
|
||||
path "pki/nomad/issue/cluster-exporter" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Get a cert for Consul
|
||||
path "pki/consul/issue/monitoring-cluster-exporter" {
|
||||
path "pki/consul/issue/cluster-exporter" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Get a consul token
|
||||
path "consul/creds/monitoring-cluster-exporter" {
|
||||
path "consul/creds/cluster-exporter" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
path "consul/creds/consul-exporter" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
path "database/creds/grafana" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "kv/data/service/monitoring/grafana" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
path "pki/monitoring/issue/monitoring-loki" {
|
||||
path "pki/monitoring/issue/loki" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
|
||||
path "consul/creds/monitoring-consul-exporter" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
path "pki/monitoring/issue/monitoring-prometheus" {
|
||||
path "pki/monitoring/issue/prometheus" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,6 @@ path "kv/service/monitoring/prometheus" {
|
|||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "consul/creds/monitoring-prometheus" {
|
||||
path "consul/creds/prometheus" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -17,7 +17,7 @@ job "[[ .instance ]]-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "[[ .instance ]]-ping-exporter[[ .consul.suffix ]]"
|
||||
name = "ping-exporter[[ .consul.suffix ]]"
|
||||
port = "ping"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -26,7 +26,7 @@ job "[[ .instance ]]-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "[[ .instance ]]-blackbox-exporter[[ .consul.suffix ]]"
|
||||
name = "blackbox-exporter[[ .consul.suffix ]]"
|
||||
port = "blackbox"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -34,7 +34,7 @@ job "[[ .instance ]]-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "[[ .instance ]]-consul-exporter[[ .consul.suffix ]]"
|
||||
name = "consul-exporter[[ .consul.suffix ]]"
|
||||
port = "ping"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -43,7 +43,7 @@ job "[[ .instance ]]-exporters" {
|
|||
}
|
||||
|
||||
service {
|
||||
name = "[[ .instance ]]-cluster-exporter[[ .consul.suffix ]]"
|
||||
name = "cluster-exporter[[ .consul.suffix ]]"
|
||||
port = "cluster"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -129,7 +129,7 @@ _EOT
|
|||
|
||||
template {
|
||||
data = <<_EOT
|
||||
CONSUL_HTTP_TOKEN={{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-consul-exporter" }}{{ .Data.token }}{{ end }}
|
||||
CONSUL_HTTP_TOKEN={{ with secret "consul/creds/consul-exporter[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
|
||||
_EOT
|
||||
destination = "secrets/.consul.env"
|
||||
uid = 100000
|
||||
|
@ -204,7 +204,7 @@ _EOT
|
|||
# Get a Nomad client certificate
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/nomad/issue/[[ .instance ]]-cluster-exporter" "common_name=metrics-proxy.nomad.[[ .consul.domain ]]" "ttl=24h" }}
|
||||
{{- with pkiCert "pki/nomad/issue/cluster-exporter[[ .consul.suffix ]]" "common_name=metrics-proxy.nomad.[[ .consul.domain ]]" "ttl=24h" }}
|
||||
{{ .Data.Cert }}
|
||||
{{ .Data.Key }}
|
||||
{{- end }}
|
||||
|
@ -228,7 +228,7 @@ _EOT
|
|||
# Same for Consul
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "pki/consul/issue/[[ .instance ]]-cluster-exporter" "common_name=metrics-proxy.consul.[[ .consul.domain ]]" "ttl=24h" }}
|
||||
{{- with pkiCert "pki/consul/issue/cluster-exporter[[ .consul.suffix ]]" "common_name=metrics-proxy.consul.[[ .consul.domain ]]" "ttl=24h" }}
|
||||
{{ .Data.Cert }}
|
||||
{{ .Data.Key }}
|
||||
{{- end }}
|
|
@ -0,0 +1,59 @@
|
|||
FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] AS builder
|
||||
|
||||
ARG GRAFANA_VERSION=[[ .monitoring.grafana.version ]] \
|
||||
GRAFANA_PLUGINS=[[ join .monitoring.grafana.plugins "," ]]
|
||||
|
||||
ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz /tmp
|
||||
ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256 /tmp
|
||||
RUN set -eux &&\
|
||||
apk --no-cache add \
|
||||
tar \
|
||||
curl \
|
||||
ca-certificates \
|
||||
bash \
|
||||
gcompat \
|
||||
libc6-compat \
|
||||
&&\
|
||||
ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\
|
||||
cd /tmp &&\
|
||||
echo "$(cat grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256) grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz" | sha256sum -c &&\
|
||||
tar xzf grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz &&\
|
||||
mv grafana-v${GRAFANA_VERSION} /opt/grafana &&\
|
||||
mkdir /opt/grafana/plugins &&\
|
||||
IFS=',' &&\
|
||||
for PLUGIN in ${GRAFANA_PLUGINS}; do /opt/grafana/bin/grafana cli --pluginsDir /opt/grafana/plugins plugins install ${PLUGIN}; done
|
||||
|
||||
FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]]
|
||||
MAINTAINER [[ .docker.maintainer ]]
|
||||
|
||||
ENV PATH=/opt/grafana/bin/:${PATH} \
|
||||
GF_PATHS_DATA=/data \
|
||||
GF_PATHS_PLUGINS=/opt/grafana/plugins \
|
||||
GF_LOG_MODE=console
|
||||
|
||||
COPY --from=builder /opt/grafana /opt/grafana
|
||||
RUN set -eux &&\
|
||||
apk --no-cache add \
|
||||
gcompat \
|
||||
libc6-compat \
|
||||
&&\
|
||||
ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\
|
||||
addgroup -g 3000 grafana &&\
|
||||
adduser --system \
|
||||
--ingroup grafana \
|
||||
--disabled-password \
|
||||
--uid 3000 \
|
||||
--home /opt/grafana \
|
||||
--no-create-home \
|
||||
--shell /sbin/nologin \
|
||||
grafana &&\
|
||||
mkdir /data &&\
|
||||
chown -R grafana:grafana /data /opt/grafana/plugins &&\
|
||||
chmod 700 /data
|
||||
|
||||
WORKDIR /opt/grafana
|
||||
USER grafana
|
||||
CMD ["grafana", \
|
||||
"server", \
|
||||
"--homepath=/opt/grafana", \
|
||||
"--packaging=docker"]
|
12
init/consul
12
init/consul
|
@ -1,17 +1,17 @@
|
|||
#!/bin/sh
|
||||
# vim: syntax=sh
|
||||
|
||||
vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-prometheus \
|
||||
vault write consul/roles/prometheus[[ .consul.suffix ]] \
|
||||
ttl=720h \
|
||||
max_ttl=720h \
|
||||
consul_policies="[[ .instance ]]-prometheus"
|
||||
consul_policies="[[ .instance ]]"
|
||||
|
||||
vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-consul-exporter \
|
||||
vault write consul/roles/consul-exporter[[ .consul.suffix ]] \
|
||||
ttl=720h \
|
||||
max_ttl=720h \
|
||||
consul_policies="[[ .instance ]]-prometheus"
|
||||
consul_policies="[[ .instance ]]"
|
||||
|
||||
vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-cluster-exporter \
|
||||
vault write consul/roles/cluster-exporter \
|
||||
ttl=720h \
|
||||
max_ttl=720h \
|
||||
consul_policies="[[ .instance ]]-prometheus"
|
||||
consul_policies="[[ .instance ]]"
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
[[ template "common/vault.mkpgrole.sh" merge .monitoring.grafana . ]]
|
20
init/pki
20
init/pki
|
@ -6,8 +6,8 @@ set -euo pipefail
|
|||
[[ template "common/vault.mkpki.sh" $c ]]
|
||||
|
||||
# Create a role for alertmanager
|
||||
vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-alertmanager \
|
||||
allowed_domains="[[ .instance ]]" \
|
||||
vault write [[ $c.vault.pki.path ]]/roles/alertmanager[[ .consul.suffix ]] \
|
||||
allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -19,8 +19,8 @@ vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-alertmanager \
|
|||
ou="[[ $c.vault.pki.ou ]]"
|
||||
|
||||
# Create a role for prometheus (which will only be a client, for AlertManager)
|
||||
vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-prometheus \
|
||||
allowed_domains="[[ .instance ]]" \
|
||||
vault write [[ $c.vault.pki.path ]]/roles/prometheus[[ .consul.suffix ]] \
|
||||
allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -32,8 +32,8 @@ vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-prometheus \
|
|||
ou="[[ $c.vault.pki.ou ]]"
|
||||
|
||||
# Create a role for loki (which will only be a client, for AlertManager)
|
||||
vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-loki \
|
||||
allowed_domains="[[ .instance ]]" \
|
||||
vault write [[ $c.vault.pki.path ]]/roles/loki[[ .consul.suffix ]] \
|
||||
allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -45,8 +45,8 @@ vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-loki \
|
|||
ou="[[ $c.vault.pki.ou ]]"
|
||||
|
||||
# Create a role for metrics exporters (server only)
|
||||
vault write [[ $c.vault.pki.path ]]/roles/metrics \
|
||||
allowed_domains="[[ .instance ]]" \
|
||||
vault write [[ $c.vault.pki.path ]]/roles/metrics[[ .consul.suffix ]] \
|
||||
allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
allow_localhost=false \
|
||||
|
@ -60,7 +60,7 @@ vault write [[ $c.vault.pki.path ]]/roles/metrics \
|
|||
ou="[[ $c.vault.pki.ou ]]"
|
||||
|
||||
# Create a role on the Nomad PKI for the cluster exporter
|
||||
vault write pki/nomad/roles/[[ .instance ]]-cluster-exporter \
|
||||
vault write pki/nomad/roles/cluster-exporter[[ .consul.suffix ]] \
|
||||
allowed_domains='nomad.[[ .consul.domain ]]' \
|
||||
allow_subdomains=true \
|
||||
allow_wildcard_certificates=false \
|
||||
|
@ -71,7 +71,7 @@ vault write pki/nomad/roles/[[ .instance ]]-cluster-exporter \
|
|||
ou="Cluster metrics exporter"
|
||||
|
||||
# Create a role on the Consul PKI for the cluster exporter
|
||||
vault write pki/consul/roles/[[ .instance ]]-cluster-exporter \
|
||||
vault write pki/consul/roles/cluster-exporter[[ .consul.suffix ]] \
|
||||
allowed_domains="consul.[[ .consul.domain ]]" \
|
||||
allow_bare_domains=false \
|
||||
allow_subdomains=true \
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
[[ template "common/vault.rand_secrets" merge .monitoring . ]]
|
|
@ -17,7 +17,7 @@ job "[[ .instance ]]-services" {
|
|||
[[ template "common/volumes" $c ]]
|
||||
|
||||
service {
|
||||
name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]"
|
||||
name = "prometheus[[ .consul.suffix ]]"
|
||||
port = 9090
|
||||
|
||||
[[ template "common/service_meta" $c ]]
|
||||
|
@ -122,8 +122,8 @@ _EOT
|
|||
# A client cert, to connect to the AlertManager API
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-prometheus"
|
||||
(printf "common_name=prometheus-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/prometheus"
|
||||
(printf "common_name=prometheus-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}}
|
||||
{{ .Cert }}
|
||||
{{ .Key }}
|
||||
|
@ -177,7 +177,7 @@ _EOT
|
|||
|
||||
# This service is used for the different instances of alertmanager to communicate
|
||||
service {
|
||||
name = "[[ .instance ]]-alertmanager-gossip[[ .consul.suffix ]]"
|
||||
name = "alertmanager-gossip[[ .consul.suffix ]]"
|
||||
port = "cluster"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -187,7 +187,7 @@ _EOT
|
|||
# This service is used by prometheus. As it needs to be able to reach every instances, it cannot use
|
||||
# the service mesh. The exposed port uses mTLS, so it's safe to expose it outside of the mesh
|
||||
service {
|
||||
name = "[[ .instance ]]-alertmanager-tls[[ .consul.suffix ]]"
|
||||
name = "alertmanager-tls[[ .consul.suffix ]]"
|
||||
port = "web-tls"
|
||||
meta {
|
||||
alloc = "${NOMAD_ALLOC_INDEX}"
|
||||
|
@ -197,7 +197,7 @@ _EOT
|
|||
# This service is exposed through the service mesh
|
||||
# and can be used to reach the web interface through Traefik
|
||||
service {
|
||||
name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]"
|
||||
name = "alertmanager[[ .consul.suffix ]]"
|
||||
port = 9093
|
||||
[[ template "common/service_meta" $c ]]
|
||||
[[ template "common/connect" $c ]]
|
||||
|
@ -224,7 +224,7 @@ _EOT
|
|||
|
||||
# This task will handle mTLS to the AlertManager API
|
||||
# And expose it as plain http on 127.0.0.1 for Traefik (through the service mesh) and for the metrics proxy
|
||||
task "tls-proxy" {
|
||||
task "untls-proxy" {
|
||||
driver = "[[ $c.nomad.driver ]]"
|
||||
user = 9093
|
||||
|
||||
|
@ -256,8 +256,8 @@ _EOT
|
|||
# Certifiate used by AlertManager
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager"
|
||||
(printf "common_name=alertmanager-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/alertmanager"
|
||||
(printf "common_name=alertmanager-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }}
|
||||
{{ .Cert }}
|
||||
|
@ -342,8 +342,8 @@ _EOT
|
|||
# Certifiate used by AlertManager
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager"
|
||||
(printf "common_name=alertmanager-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/alertmanager"
|
||||
(printf "common_name=alertmanager-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }}
|
||||
{{ .Cert }}
|
||||
|
@ -389,7 +389,7 @@ _EOT
|
|||
[[ template "common/volumes" $c ]]
|
||||
|
||||
service {
|
||||
name = "[[ .instance ]]-loki[[ .consul.suffix ]]"
|
||||
name = "loki[[ .consul.suffix ]]"
|
||||
port = 3100
|
||||
[[ template "common/service_meta" $c ]]
|
||||
[[ template "common/connect" $c ]]
|
||||
|
@ -443,8 +443,8 @@ _EOT
|
|||
# A client cert, to connect to the AlertManager API
|
||||
template {
|
||||
data = <<_EOT
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-loki"
|
||||
(printf "common_name=loki-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/loki"
|
||||
(printf "common_name=loki-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX"))
|
||||
(printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}}
|
||||
{{ .Cert }}
|
||||
{{ .Key }}
|
||||
|
@ -496,7 +496,7 @@ _EOT
|
|||
# The main service is the vector source
|
||||
# It will provide access to other services through the mesh (like loki)
|
||||
service {
|
||||
name = "[[ .instance ]]-vector-aggregator[[ .consul.suffix ]]"
|
||||
name = "vector-aggregator[[ .consul.suffix ]]"
|
||||
port = 9000
|
||||
[[ template "common/service_meta" $c ]]
|
||||
[[ template "common/connect" $c ]]
|
||||
|
@ -509,7 +509,7 @@ _EOT
|
|||
# The syslog UDP service can be used to ingest standard syslog logs from other
|
||||
# devices, and can be exposed by Traefik for this
|
||||
service {
|
||||
name = "[[ .instance ]]-syslog-udp[[ .consul.suffix ]]"
|
||||
name = "syslog-udp[[ .consul.suffix ]]"
|
||||
port = "syslog-udp"
|
||||
tags = [
|
||||
[[ template "common/traefik_tags" merge $c.syslog_udp $c ]]
|
||||
|
@ -522,7 +522,7 @@ _EOT
|
|||
[[- if $c.fluentd.enabled ]]
|
||||
# The fluentd service can be used to ingest fluentd logs
|
||||
service {
|
||||
name = "[[ .instance ]]-syslog-udp[[ .consul.suffix ]]"
|
||||
name = "syslog-udp[[ .consul.suffix ]]"
|
||||
port = 24224
|
||||
tags = [
|
||||
[[ template "common/traefik_tags" merge $c.fluentd $c ]]
|
||||
|
@ -555,6 +555,84 @@ _EOT
|
|||
change_signal = "SIGHUP"
|
||||
}
|
||||
|
||||
[[ template "common/resources" $c ]]
|
||||
}
|
||||
}
|
||||
|
||||
group "interface" {
|
||||
[[- $c := merge .monitoring.grafana .monitoring . ]]
|
||||
|
||||
shutdown_delay = "6s"
|
||||
|
||||
network {
|
||||
mode = "bridge"
|
||||
port "metrics" {}
|
||||
}
|
||||
|
||||
[[ template "common/volumes" $c ]]
|
||||
|
||||
service {
|
||||
name = "grafana[[ .consul.suffix ]]"
|
||||
port = 3000
|
||||
|
||||
[[ template "common/metrics_meta" $c ]]
|
||||
[[ template "common/connect" $c ]]
|
||||
|
||||
check {
|
||||
name = "health"
|
||||
type = "http"
|
||||
path = "/api/health"
|
||||
expose = true
|
||||
interval = "30s"
|
||||
timeout = "8s"
|
||||
}
|
||||
|
||||
tags = [
|
||||
[[ template "common/traefik_tags" $c ]]
|
||||
]
|
||||
}
|
||||
|
||||
[[ template "common/task.metrics_proxy" $c ]]
|
||||
[[ template "common/task.pgpooler" $c ]]
|
||||
[[ template "common/task.memcached" ]]
|
||||
|
||||
task "grafana" {
|
||||
|
||||
driver = "[[ $c.nomad.driver ]]"
|
||||
leader = true
|
||||
|
||||
config {
|
||||
image = "[[ $c.image ]]"
|
||||
readonly_rootfs = true
|
||||
pids_limit = 100
|
||||
command = "grafana"
|
||||
args = [
|
||||
"server",
|
||||
"--homepath=/opt/grafana",
|
||||
"--config=/secrets/grafana.ini",
|
||||
"--packaging=docker"
|
||||
]
|
||||
}
|
||||
|
||||
[[ template "common/vault.policies" $c ]]
|
||||
[[ template "common/file_env" $c ]]
|
||||
|
||||
# Basic grafana configuration file
|
||||
template {
|
||||
data = <<_EOT
|
||||
[[ template "monitoring/grafana/grafana.ini" $c ]]
|
||||
_EOT
|
||||
destination = "secrets/grafana.ini"
|
||||
uid = 103000
|
||||
perms = 400
|
||||
}
|
||||
|
||||
# Mount volume in /data for persistence
|
||||
volume_mount {
|
||||
volume = "data"
|
||||
destination = "/data"
|
||||
}
|
||||
|
||||
[[ template "common/resources" $c ]]
|
||||
}
|
||||
}
|
|
@ -5,7 +5,7 @@ server {
|
|||
proxy_ssl_certificate /secrets/alertmanager.bundle.pem;
|
||||
proxy_ssl_certificate_key /secrets/alertmanager.bundle.pem;
|
||||
proxy_ssl_verify on;
|
||||
proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.monitoring;
|
||||
proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.[[ .instance ]].[[ .consul.domain ]];
|
||||
proxy_ssl_trusted_certificate /local/monitoring.ca.pem;
|
||||
allow 127.0.0.1;
|
||||
deny all;
|
||||
|
|
|
@ -10,7 +10,7 @@ exec alertmanager \
|
|||
--web.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_web-tls" }} \
|
||||
--cluster.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_cluster" }} \
|
||||
--cluster.advertise-address={{ env "NOMAD_HOST_ADDR_cluster" }} \
|
||||
{{- range service "[[ .instance ]]-am-gossip[[ .consul.suffix ]]" -}}
|
||||
{{- range service "alertmanager-gossip[[ .consul.suffix ]]" -}}
|
||||
{{- if not (eq (env "NOMAD_ALLOC_INDEX") (index .ServiceMeta "alloc")) }}
|
||||
--cluster.peer={{ .Address }}:{{ .Port }} \
|
||||
{{ end -}}
|
||||
|
|
|
@ -24,7 +24,7 @@ server {
|
|||
return 405;
|
||||
}
|
||||
|
||||
set $consul_token "{{ with secret "consul/creds/[[ .instance ]]-cluster-exporter" }}{{ .Data.token }}{{ end }}";
|
||||
set $consul_token "{{ with secret "consul/creds/cluster-exporter[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}";
|
||||
|
||||
{{- range service "nomad-client" }}
|
||||
location /nomad-client/{{ .Node }} {
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
[server]
|
||||
http_addr = 127.0.0.1
|
||||
http_port = 3000
|
||||
root_url = [[ .monitoring.grafana.public_url ]]
|
||||
serve_from_sub_path = [[ if eq (urlParse .monitoring.grafana.public_url).Path "" ]]false[[ else ]]true[[ end ]]
|
||||
|
||||
[database]
|
||||
type = postgres
|
||||
name = [[ .postgres.database ]]
|
||||
[[- if ne .postgres.pooler.engine "none" ]]
|
||||
host = 127.0.0.1:[[ .postgres.pooler.port ]]
|
||||
user = [[ .instance ]]
|
||||
password = {{ env "NOMAD_ALLOC_ID" }}
|
||||
ssl_mode = disable
|
||||
[[- else ]]
|
||||
host = [[ .postgres.host ]]:[[ .postgres.port ]]
|
||||
user = [[ .postgres.user ]]
|
||||
password = [[ .postgres.password ]]
|
||||
[[ end ]]
|
||||
|
||||
[remote_cache]
|
||||
type = memcached
|
||||
connstr = 127.0.0.1:11211
|
||||
|
||||
[analytics]
|
||||
reporting_enabled = false
|
||||
check_for_updates = false
|
||||
check_for_plugin_updates = false
|
||||
|
||||
[security]
|
||||
cookie_secure = true
|
||||
cookie_samesite = strict
|
||||
x_xss_protection = true
|
||||
secret_key = {{ with secret "[[ .vault.root ]]kv/service/[[ .instance ]]/grafana" }}{{ .Data.data.secret_key }}{{ end }}
|
||||
|
||||
[dataproxy]
|
||||
timeout = 120
|
|
@ -53,7 +53,7 @@ limits_config:
|
|||
max_query_parallelism: 128
|
||||
|
||||
ruler:
|
||||
alertmanager_url: [[ .instance ]]-alertmanager-tls[[ .consul.suffix ]]
|
||||
alertmanager_url: alertmanager-tls[[ .consul.suffix ]]
|
||||
enable_alertmanager_discovery: true
|
||||
alertmanager_client:
|
||||
tls_cert_path: /secrets/loki.bundle.pem
|
||||
|
|
|
@ -19,13 +19,13 @@ alerting:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: [[ .consul.datacenter ]]
|
||||
relabel_configs:
|
||||
# Only keep alertmanagers
|
||||
- source_labels: [__meta_consul_service]
|
||||
action: keep
|
||||
regex: [[ .instance ]]-alertmanager-tls[[ .consul.suffix ]]
|
||||
regex: alertmanager-tls[[ .consul.suffix ]]
|
||||
|
||||
scrape_configs:
|
||||
|
||||
|
@ -40,7 +40,7 @@ scrape_configs:
|
|||
[[- end ]]
|
||||
|
||||
[[- if gt (len .exporters.blackbox.http_probes) 0 ]]
|
||||
|
||||
{{- if gt (len (service "blackbox-exporter[[ .consul.suffix ]]")) 0 }}
|
||||
# Blackbox Exporter HTTP targets
|
||||
- job_name: http_probe
|
||||
metrics_path: /probe
|
||||
|
@ -52,6 +52,7 @@ scrape_configs:
|
|||
params:
|
||||
module: ["http_2xx"]
|
||||
static_configs:
|
||||
{{ range $idx, $instance := service "blackbox-exporter[[ .consul.suffix ]]" }}
|
||||
- targets:
|
||||
[[- range $http_probe := .exporters.blackbox.http_probes ]]
|
||||
- [[ $http_probe ]]
|
||||
|
@ -62,11 +63,13 @@ scrape_configs:
|
|||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: {{ range $idx, $instance := service "[[ .instance ]]-blackbox-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
|
||||
replacement: {{ .Address }}:{{ .Port }}
|
||||
{{ end }}
|
||||
{{- end }}
|
||||
[[- end ]]
|
||||
|
||||
[[- if gt (len .exporters.blackbox.tcp_probes) 0 ]]
|
||||
|
||||
{{ if gt (len (service "blackbox-exporter[[ .consul.suffix ]]")) 0 }}
|
||||
# Blackbox Exporter TCP targets
|
||||
- job_name: tcp_probe
|
||||
metrics_path: /probe
|
||||
|
@ -78,6 +81,7 @@ scrape_configs:
|
|||
params:
|
||||
module: ["tcp_connect"]
|
||||
static_configs:
|
||||
{ range $idx, $instance := service "blackbox-exporter[[ .consul.suffix ]]" }}
|
||||
[[- range $target := .exporters.blackbox.tcp_probes ]]
|
||||
- [[ $target ]]
|
||||
[[- end ]]
|
||||
|
@ -87,7 +91,9 @@ scrape_configs:
|
|||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: {{ range $idx, $instance := service "[[ .instance ]]-blackbox-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
|
||||
replacement: {{ .Address }}:{{ .Port }}
|
||||
{{ end }}
|
||||
{{- end }}
|
||||
[[- end ]]
|
||||
|
||||
# Cluster services
|
||||
|
@ -100,7 +106,7 @@ scrape_configs:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: [[ .consul.datacenter ]]
|
||||
relabel_configs:
|
||||
|
||||
|
@ -117,7 +123,7 @@ scrape_configs:
|
|||
|
||||
- source_labels: [__meta_consul_service]
|
||||
regex: (.+)
|
||||
replacement: {{ range $idx, $instance := service "[[ .instance ]]-cluster-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
|
||||
replacement: {{ range $idx, $instance := service "cluster-exporter[[ .consul.suffix ]]" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
|
||||
target_label: __address__
|
||||
|
||||
# Rewrite the job labels to the name of the service
|
||||
|
@ -143,7 +149,7 @@ scrape_configs:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: [[ .consul.datacenter ]]
|
||||
|
||||
relabel_configs:
|
||||
|
@ -203,7 +209,7 @@ scrape_configs:
|
|||
consul_sd_configs:
|
||||
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
||||
scheme: http
|
||||
token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }}
|
||||
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
|
||||
datacenter: [[ .consul.datacenter ]]
|
||||
|
||||
relabel_configs:
|
||||
|
|
|
@ -6,6 +6,10 @@ vault:
|
|||
pki:
|
||||
path: '[[ .prometheus.vault_pki ]]'
|
||||
ou: Monitoring
|
||||
rand_secrets:
|
||||
- path: grafana
|
||||
fields:
|
||||
- secret_key
|
||||
|
||||
monitoring:
|
||||
|
||||
|
@ -40,7 +44,7 @@ monitoring:
|
|||
memory: 32
|
||||
vault:
|
||||
policies:
|
||||
- '[[ .instance ]]-consul-exporter[[ .consul.suffix ]]'
|
||||
- 'consul-exporter[[ .consul.suffix ]]'
|
||||
|
||||
cluster:
|
||||
image: nginxinc/nginx-unprivileged:alpine
|
||||
|
@ -50,12 +54,12 @@ monitoring:
|
|||
memory: 15
|
||||
vault:
|
||||
policies:
|
||||
- '[[ .instance ]]-cluster-exporter[[ .consul.suffix ]]'
|
||||
- 'cluster-exporter[[ .consul.suffix ]]'
|
||||
- metrics
|
||||
|
||||
prometheus:
|
||||
|
||||
version: 2.50.1
|
||||
version: 2.51.0
|
||||
|
||||
count: 1
|
||||
|
||||
|
@ -70,12 +74,12 @@ monitoring:
|
|||
volumes:
|
||||
data:
|
||||
type: csi
|
||||
source: '[[ .instance ]]-prometheus-data[[ .consul.suffix ]]'
|
||||
source: 'prometheus-data'
|
||||
per_alloc: true
|
||||
|
||||
vault:
|
||||
policies:
|
||||
- '[[ .instance ]]-prometheus[[ .consul.suffix ]]'
|
||||
- 'prometheus[[ .consul.suffix ]]'
|
||||
|
||||
jobs: {}
|
||||
alert_rules: {}
|
||||
|
@ -110,7 +114,7 @@ monitoring:
|
|||
strip_prefix: false
|
||||
volumes:
|
||||
data:
|
||||
source: '[[ .instance ]]-alertmanager-data[[ .consul.suffix ]]'
|
||||
source: 'alertmanager-data'
|
||||
type: csi
|
||||
per_alloc: true
|
||||
prometheus:
|
||||
|
@ -118,13 +122,13 @@ monitoring:
|
|||
vault:
|
||||
policies:
|
||||
- metrics
|
||||
- '[[ .instance ]]-alertmanager[[ .consul.suffix ]]'
|
||||
- 'alertmanager[[ .consul.suffix ]]'
|
||||
email:
|
||||
from: alertmanager@[[ .consul.domain ]]
|
||||
custom_config: {}
|
||||
|
||||
loki:
|
||||
version: 2.9.5
|
||||
version: 2.9.6
|
||||
image: '[[ .docker.repo ]]loki:[[ .monitoring.loki.version ]]-1'
|
||||
env: {}
|
||||
resources:
|
||||
|
@ -132,7 +136,7 @@ monitoring:
|
|||
memory: 512
|
||||
vault:
|
||||
policies:
|
||||
- '[[ .instance ]]-loki[[ .consul.suffix ]]'
|
||||
- 'loki[[ .consul.suffix ]]'
|
||||
public_url: https://loki.example.org
|
||||
traefik:
|
||||
router: loki
|
||||
|
@ -143,7 +147,7 @@ monitoring:
|
|||
volumes:
|
||||
data:
|
||||
type: csi
|
||||
source: '[[ .instance ]]-loki-data[[ .consul.suffix ]]'
|
||||
source: 'loki-data'
|
||||
|
||||
vector:
|
||||
version: 0.36.1
|
||||
|
@ -159,7 +163,7 @@ monitoring:
|
|||
consul:
|
||||
connect:
|
||||
upstreams:
|
||||
- destination_name: '[[ .instance ]]-loki[[ .consul.suffix ]]'
|
||||
- destination_name: 'loki[[ .consul.suffix ]]'
|
||||
local_bind_port: 3100
|
||||
fluentd:
|
||||
enabled: false
|
||||
|
@ -181,6 +185,46 @@ monitoring:
|
|||
prometheus:
|
||||
metrics_url: http://127.0.0.1:9001/metrics
|
||||
|
||||
grafana:
|
||||
version: 10.4.1
|
||||
image: '[[ .docker.repo ]]grafana:[[ .monitoring.grafana.version ]]-1'
|
||||
env: {}
|
||||
resources:
|
||||
cpu: 100
|
||||
memory: 256
|
||||
public_url: https://grafana.example.org
|
||||
plugins:
|
||||
#- alexanderzobnin-zabbix-app
|
||||
#- ddurieux-glpi-app
|
||||
- grafana-clock-panel
|
||||
- grafana-piechart-panel
|
||||
traefik:
|
||||
enabled: true
|
||||
router: grafana
|
||||
strip_prefix: false
|
||||
consul:
|
||||
connect:
|
||||
upstreams:
|
||||
- destination_name: postgres[[ .consul.suffix ]]
|
||||
local_bind_port: 5432
|
||||
volumes:
|
||||
data:
|
||||
type: csi
|
||||
source: 'grafana-data'
|
||||
vault:
|
||||
policies:
|
||||
- 'grafana[[ .consul.suffix ]]'
|
||||
database:
|
||||
role: grafana
|
||||
pgrole: grafana
|
||||
postgres:
|
||||
database: grafana
|
||||
user: '{{ with secret "[[ .vault.root ]]database/creds/grafana" }}{{ .Data.username }}{{ end }}'
|
||||
password: '{{ with secret "[[ .vault.root ]]database/creds/grafana" }}{{ .Data.password }}{{ end }}'
|
||||
pooler:
|
||||
mode: session
|
||||
prometheus:
|
||||
metrics_url: http://localhost:3000[[ (urlParse .monitoring.grafana.public_url).Path ]]/metrics
|
||||
|
||||
prometheus:
|
||||
enabled: true
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[[- $c := merge .monitoring.alertmanager .monitoring . ]]
|
||||
path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager" {
|
||||
path "[[ $c.vault.pki.path ]]/issue/alertmanager" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
# Read vault metrics
|
||||
path "sys/metrics" {
|
||||
capabilities = ["read", "list"]
|
||||
}
|
||||
|
||||
# Get a cert for Nomad
|
||||
path "pki/nomad/issue/cluster-exporter" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Get a cert for Consul
|
||||
path "pki/consul/issue/cluster-exporter" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Get a consul token
|
||||
path "consul/creds/cluster-exporter" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
path "consul/creds/consul-exporter" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
path "[[ .vault.root ]]database/creds/[[ .monitoring.grafana.vault.database.role ]]" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "[[ .vault.root ]]kv/data/service/[[ .instance ]]/grafana" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
[[- $c := merge .monitoring.loki .monitoring . ]]
|
||||
path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-loki" {
|
||||
path "[[ $c.vault.pki.path ]]/issue/loki" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
[[- $c := merge .monitoring.exporters.cluster .monitoring.exporters .monitoring . ]]
|
||||
# Read vault metrics
|
||||
path "sys/metrics" {
|
||||
capabilities = ["read", "list"]
|
||||
}
|
||||
|
||||
# Get a cert for Nomad
|
||||
path "pki/nomad/issue/[[ .instance ]]-cluster-exporter" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Get a cert for Consul
|
||||
path "pki/consul/issue/[[ .instance ]]-cluster-exporter" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
# Get a consul token
|
||||
path "consul/creds/[[ .instance ]]-cluster-exporter" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -1,4 +0,0 @@
|
|||
[[- $c := merge .monitoring.exporters.consul .monitoring.exporters .monitoring . ]]
|
||||
path "[[ $c.vault.root ]]consul/creds/[[ .instance ]]-consul-exporter" {
|
||||
capabilities = ["read"]
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
[[- $c := merge .monitoring.prometheus .monitoring . ]]
|
||||
path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-prometheus" {
|
||||
path "[[ $c.vault.pki.path ]]/issue/prometheus" {
|
||||
capabilities = ["update"]
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,6 @@ path "[[ $c.vault.root ]]kv/service/[[ .instance ]]/prometheus" {
|
|||
capabilities = ["read"]
|
||||
}
|
||||
|
||||
path "[[ $c.vault.root ]]consul/creds/[[ .instance ]]-prometheus" {
|
||||
path "[[ $c.vault.root ]]consul/creds/prometheus" {
|
||||
capabilities = ["read"]
|
||||
}
|
Loading…
Reference in New Issue