Start: prometheus + alertmanager + exporters

2024-03-19 13:53:28 +01:00 · 2024-03-19 13:53:28 +01:00 · 65441a4a9e
parent b32fe575af
commit 65441a4a9e
67 changed files with 4446 additions and 0 deletions
--- a/TODO.md
+++ b/TODO.md
@ -0,0 +1,43 @@
+- ~~Split exporters dans un job dédié (pour pouvoir tourner sur node_pool spécifique)~~
+- Créer monitoring-agent type system avec vector + node-exporter
+- images
+  - ~~prometheus~~
+  - ~~ping-exporter~~
+  - ~~blackbox-exporter~~
+  - ~~consul-exporter~~
+  - vector
+  - loki
+  - grafana
+  - nomad-vector-logger
+
+- pki roles:
+  - ~~monitoring -> prom~~
+  - ~~consul -> prom~~
+  - ~~monitoring -> am~~
+
+- vault pol
+  - ~~prometheus~~
+    - ~~issue prom on monitoring~~
+    - ~~issue prom on consul~~
+  - ~~consul-exporter~~
+    - ~~issue consul-exporter on consul~~
+  - ~~alertmanager~~
+    - ~~issue alertmanager on monitoring~~
+
+- consul defaults & intentions
+  - ~~prometheus~~
+  - ~~alertmanager~~
+  - loki
+
+- tasks
+  - ~~alertmanager~~
+  - vector-aggregator
+  - vector-agent (dans job agent)
+  - loki (modulariser ou laisser en monolithique ?)
+  - grafana
+  - ~~cluster-metrics (job exporters)~~
+
+- questions
+  - prom rules: keep or move to a -conf bundle ?
+  - ~~config alertes am (recipient + routing)~~
+  - ~~http and tcp probes, as exporters are now in a dedicated job~~
--- a/bundles.yml
+++ b/bundles.yml
@ -0,0 +1,4 @@
+---
+
+dependencies:
+  - url: ../common.git
--- a/consul/config/service-defaults/monitoring-alertmanager.hcl
+++ b/consul/config/service-defaults/monitoring-alertmanager.hcl
@ -0,0 +1,3 @@
+Kind = "service-defaults"
+Name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]"
+Protocol = "http"
--- a/consul/config/service-defaults/monitoring-prometheus.hcl
+++ b/consul/config/service-defaults/monitoring-prometheus.hcl
@ -0,0 +1,3 @@
+Kind = "service-defaults"
+Name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]"
+Protocol = "http"
--- a/consul/config/service-intentions/monitoring-alertmanager.hcl
+++ b/consul/config/service-intentions/monitoring-alertmanager.hcl
@ -0,0 +1,16 @@
+Kind = "service-intentions"
+Name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]"
+Sources = [
+  {
+    Name = "[[ (merge .monitoring.alertmanager .).traefik.instance ]]"
+    Permissions = [
+      {
+        Action = "allow"
+        HTTP {
+          PathPrefix = "[[ if eq "" (urlParse .monitoring.alertmanager.public_url).Path ]]/[[ else ]](urlParse .monitoring.alertmanager.public_url).Path[[ end ]]"
+          Methods = ["GET", "HEAD", "POST", "PUT", "DELETE", "PATCH"]
+        }
+      }
+    ]
+  }
+]
--- a/consul/config/service-intentions/monitoring-prometheus.hcl
+++ b/consul/config/service-intentions/monitoring-prometheus.hcl
@ -0,0 +1,34 @@
+Kind = "service-intentions"
+Name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]"
+Sources = [
+  {
+    Name = "[[ (merge .monitoring.prometheus .).traefik.instance ]]"
+    Permissions = [
+      {
+        Action = "allow"
+        HTTP {
+          Methods = ["GET", "HEAD", "POST"]
+        }
+      }
+    ]
+  },
+  {
+    Name = "[[ .instance ]]-grafana[[ .consul.suffix ]]"
+    Permissions = [
+      {
+        # Deny access to the admin API from Grafana
+        Action = "deny"
+        HTTP {
+          PathPrefix = "/api/v1/admin"
+        }
+      },
+      {
+        Action = "allow"
+        HTTP {
+          PathPrefix = "/api/v1"
+          Methods = ["GET", "HEAD", "POST", "PUT"]
+        }
+      }
+    ]
+  }
+]
--- a/consul/policies/monitoring-prometheus.hcl
+++ b/consul/policies/monitoring-prometheus.hcl
@ -0,0 +1,9 @@
+agent_prefix "" {
+  policy = "read"
+}
+node_prefix "" {
+  policy = "read"
+}
+service_prefix "" {
+  policy = "read"
+}
--- a/example/.monitoring.nomad.hcl.swp
+++ b/example/.monitoring.nomad.hcl.swp
--- a/example/.variables.yml.swp
+++ b/example/.variables.yml.swp
--- a/example/README.md
+++ b/example/README.md
@ -0,0 +1,3 @@
+# monitoring
+
+Monitoring stack
--- a/example/TODO.md
+++ b/example/TODO.md
@ -0,0 +1,43 @@
+- ~~Split exporters dans un job dédié (pour pouvoir tourner sur node_pool spécifique)~~
+- Créer monitoring-agent type system avec vector + node-exporter
+- images
+  - prometheus
+  - ping-exporter
+  - blackbox-exporter
+  - consul-exporter
+  - vector
+  - loki
+  - grafana
+  - nomad-vector-logger
+
+- pki roles:
+  - ~~monitoring -> prom~~
+  - ~~consul -> prom~~
+  - ~~monitoring -> am~~
+
+- vault pol
+  - prometheus
+    - ~~issue prom on monitoring~~
+    - ~~issue prom on consul~~
+  - consul-exporter
+    - issue consul-exporter on consul
+  - alertmanager
+    - ~~issue alertmanager on monitoring~~
+
+- consul defaults & intentions
+  - ~~prometheus~~
+  - ~~alertmanager~~
+  - loki
+
+- tasks
+  - ~~alertmanager~~
+  - vector-aggregator
+  - vector-agent (dans job agent)
+  - loki (modulariser ou laisser en monolithique ?)
+  - grafana
+  - cluster-metrics (job exporters)
+
+- questions
+  - prom rules: keep or move to a -conf bundle ?
+  - ~~config alertes am (recipient + routing)~~
+  - http and tcp probes, as exporters are now in a dedicated job
--- a/example/consul/config/service-defaults/monitoring-alertmanager.hcl
+++ b/example/consul/config/service-defaults/monitoring-alertmanager.hcl
@ -0,0 +1,3 @@
+Kind = "service-defaults"
+Name = "monitoring-alertmanager"
+Protocol = "http"
--- a/example/consul/config/service-defaults/monitoring-prometheus.hcl
+++ b/example/consul/config/service-defaults/monitoring-prometheus.hcl
@ -0,0 +1,3 @@
+Kind = "service-defaults"
+Name = "monitoring-prometheus"
+Protocol = "http"
--- a/example/consul/config/service-intentions/monitoring-alertmanager.hcl
+++ b/example/consul/config/service-intentions/monitoring-alertmanager.hcl
@ -0,0 +1,16 @@
+Kind = "service-intentions"
+Name = "monitoring-alertmanager"
+Sources = [
+  {
+    Name = "traefik"
+    Permissions = [
+      {
+        Action = "allow"
+        HTTP {
+          PathPrefix = "/"
+          Methods = ["GET", "HEAD", "POST", "PUT", "DELETE", "PATCH"]
+        }
+      }
+    ]
+  }
+]
--- a/example/consul/config/service-intentions/monitoring-prometheus.hcl
+++ b/example/consul/config/service-intentions/monitoring-prometheus.hcl
@ -0,0 +1,34 @@
+Kind = "service-intentions"
+Name = "monitoring-prometheus"
+Sources = [
+  {
+    Name = "traefik"
+    Permissions = [
+      {
+        Action = "allow"
+        HTTP {
+          Methods = ["GET", "HEAD", "POST"]
+        }
+      }
+    ]
+  },
+  {
+    Name = "monitoring-grafana"
+    Permissions = [
+      {
+        # Deny access to the admin API from Grafana
+        Action = "deny"
+        HTTP {
+          PathPrefix = "/api/v1/admin"
+        }
+      },
+      {
+        Action = "allow"
+        HTTP {
+          PathPrefix = "/api/v1"
+          Methods = ["GET", "HEAD", "POST", "PUT"]
+        }
+      }
+    ]
+  }
+]
--- a/example/consul/policies/monitoring-prometheus.hcl
+++ b/example/consul/policies/monitoring-prometheus.hcl
@ -0,0 +1,9 @@
+agent_prefix "" {
+  policy = "read"
+}
+node_prefix "" {
+  policy = "read"
+}
+service_prefix "" {
+  policy = "read"
+}
--- a/example/images/alertmanager/Dockerfile
+++ b/example/images/alertmanager/Dockerfile
@ -0,0 +1,41 @@
+FROM danielberteaud/alpine:24.3-1 AS builder
+
+ARG AM_VERSION=0.27.0
+
+ADD https://github.com/prometheus/alertmanager/releases/download/v${AM_VERSION}/alertmanager-${AM_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/alertmanager/releases/download/v${AM_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add \
+      tar \
+    &&\
+    cd /tmp &&\
+    grep "alertmanager-${AM_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xzf alertmanager-${AM_VERSION}.linux-amd64.tar.gz &&\
+    mv alertmanager-${AM_VERSION}.linux-amd64 /opt/alertmanager
+
+FROM danielberteaud/alpine:24.3-1
+MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
+
+ENV PATH=/opt/alertmanager:$PATH
+
+COPY --from=builder /opt/alertmanager /opt/alertmanager
+RUN set -eux &&\
+    addgroup -g 9093 alertmanager &&\
+    adduser --system \
+            --disabled-password \
+            --uid 9093 \
+            --ingroup alertmanager \
+            --home /opt/alertmanager \
+            --no-create-home \
+            --shell /sbin/nologin \
+            alertmanager &&\
+    mkdir /data &&\
+    chown alertmanager:alertmanager /data &&\
+    chmod 700 data
+
+WORKDIR /opt/alertmanager
+USER alertmanager
+EXPOSE 9093
+CMD [ "alertmanager", \
+      "--config.file=/opt/alertmanager/alertmanager.yml", \
+      "--storage.path=/data" ]
--- a/example/images/blackbox-exporter/Dockerfile
+++ b/example/images/blackbox-exporter/Dockerfile
@ -0,0 +1,29 @@
+FROM danielberteaud/alpine:24.3-1 AS builder
+
+ARG BLACKBOX_EXPORTER_VERSION=0.24.0
+
+ADD https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_EXPORTER_VERSION}/blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_EXPORTER_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add tar gzip &&\
+    cd /tmp &&\
+    grep "blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xvf blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64.tar.gz &&\
+    mkdir blackbox &&\
+    mv blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64/blackbox_exporter /usr/local/bin/blackbox_exporter
+
+FROM danielberteaud/alpine:24.3-1
+MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
+
+ENV BLACKBOX_CONF=/etc/blackbox.yml
+
+COPY --from=builder /usr/local/bin/blackbox_exporter /usr/local/bin/blackbox_exporter
+
+RUN set -eux &&\
+    apk --no-cache upgrade &&\
+    apk --no-cache add ca-certificates curl
+
+COPY root/ /
+
+EXPOSE 9195
+CMD ["sh", "-c", "exec blackbox_exporter --config.file=${BLACKBOX_CONF}"]
--- a/example/images/blackbox-exporter/root/etc/blackbox.yml
+++ b/example/images/blackbox-exporter/root/etc/blackbox.yml
@ -0,0 +1,65 @@
+modules:
+  http_2xx:
+    prober: http
+    http:
+      preferred_ip_protocol: "ip4"
+  http_ssl_no_check:
+    prober: http
+    http:
+      preferred_ip_protocol: "ip4"
+      tls_config:
+        insecure_skip_verify: true
+  http_post_2xx:
+    prober: http
+    http:
+      method: POST
+      preferred_ip_protocol: "ip4"
+  tcp_connect:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+  pop3s_banner:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+      query_response:
+      - expect: "^+OK"
+      tls: true
+      tls_config:
+        insecure_skip_verify: false
+  grpc:
+    prober: grpc
+    grpc:
+      tls: true
+      preferred_ip_protocol: "ip4"
+  grpc_plain:
+    prober: grpc
+    grpc:
+      preferred_ip_protocol: "ip4"
+      tls: false
+      service: "service1"
+  ssh_banner:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+      query_response:
+      - expect: "^SSH-2.0-"
+      - send: "SSH-2.0-blackbox-ssh-check"
+  irc_banner:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+      query_response:
+      - send: "NICK prober"
+      - send: "USER prober prober prober :prober"
+      - expect: "PING :([^ ]+)"
+        send: "PONG ${1}"
+      - expect: "^:[^ ]+ 001"
+  icmp:
+    prober: icmp
+  icmp_ttl5:
+    prober: icmp
+    timeout: 5s
+    icmp:
+      ttl: 5
+
--- a/example/images/consul-exporter/Dockerfile
+++ b/example/images/consul-exporter/Dockerfile
@ -0,0 +1,21 @@
+FROM danielberteaud/alpine:24.3-1 AS builder
+
+ARG CONSUL_EXPORTER_VERSION=0.11.0
+
+ADD https://github.com/prometheus/consul_exporter/releases/download/v${CONSUL_EXPORTER_VERSION}/consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/consul_exporter/releases/download/v${CONSUL_EXPORTER_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add tar gzip &&\
+    cd /tmp &&\
+    grep "consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xvf consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64.tar.gz &&\
+    mv consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64/consul_exporter /usr/local/bin/consul_exporter
+
+FROM danielberteaud/alpine:24.3-1
+MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
+
+COPY --from=builder /usr/local/bin/consul_exporter /usr/local/bin/consul_exporter
+
+USER 9107
+EXPOSE 9107
+CMD ["consul_exporter"]
--- a/example/images/ping-exporter/Dockerfile
+++ b/example/images/ping-exporter/Dockerfile
@ -0,0 +1,24 @@
+FROM danielberteaud/alpine:24.3-1 AS builder
+MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
+
+ARG PING_EXPORTER_VERSION=1.1.0
+
+ADD https://github.com/czerwonk/ping_exporter/releases/download/${PING_EXPORTER_VERSION}/ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz /tmp
+ADD https://github.com/czerwonk/ping_exporter/releases/download/${PING_EXPORTER_VERSION}/ping_exporter_${PING_EXPORTER_VERSION}_checksums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add \
+      tar \
+      gzip \
+    &&\
+    cd /tmp &&\
+    grep "ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz" ping_exporter_${PING_EXPORTER_VERSION}_checksums.txt | sha256sum -c &&\
+    tar xvf ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz &&\
+    mv ping_exporter /usr/local/bin/
+
+FROM danielberteaud/alpine:24.3-1
+MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
+
+COPY --from=builder /usr/local/bin/ping_exporter /usr/local/bin/ping_exporter
+
+EXPOSE 9427
+CMD ["ping_exporter", "--config.path=/config.yml"]
--- a/example/images/ping-exporter/root/config.yml
+++ b/example/images/ping-exporter/root/config.yml
@ -0,0 +1,4 @@
+# targets:
+#   - foo.bar
+#   - acme.com
+targets: []
--- a/example/images/prometheus/Dockerfile
+++ b/example/images/prometheus/Dockerfile
@ -0,0 +1,48 @@
+FROM danielberteaud/alpine:24.3-1 AS builder
+
+ARG PROM_VERSION=2.50.1
+
+ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add \
+      curl \
+      tar \
+      ca-certificates \
+    &&\
+    cd /tmp &&\
+    grep "prometheus-${PROM_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xvzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz &&\
+    rm -f prometheus-${PROM_VERSION}.linux-amd64.tar.gz &&\
+    mv prometheus-${PROM_VERSION}.linux-amd64 /opt/prometheus
+
+FROM danielberteaud/alpine:24.3-1
+MAINTAINER Daniel Berteaud <dbd@ehtrace.com>
+
+ENV PATH=/opt/prometheus:$PATH
+
+COPY --from=builder /opt/prometheus /opt/prometheus
+RUN set -eux &&\
+    addgroup -g 9090 prometheus &&\
+    adduser --system \
+            --disabled-password \
+            --uid 9090 \
+            --ingroup prometheus \
+            --home /opt/prometheus \
+            --no-create-home \
+            --shell /sbin/nologin \
+            prometheus &&\
+    mkdir /data &&\
+    chown prometheus.prometheus /data &&\
+    chmod 700 /data
+
+WORKDIR /opt/prometheus
+USER prometheus
+EXPOSE 9090
+CMD [ "/opt/prometheus/prometheus", \
+      "--config.file=/opt/prometheus/prometheus.yml", \
+      "--storage.tsdb.path=/data", \
+      "--storage.tsdb.wal-compression", \
+      "--storage.tsdb.wal-compression-type=zstd", \
+      "--web.console.libraries=/opt/prometheus/console_libraries", \
+      "--web.console.templates=/opt/prometheus/consoles" ]
--- a/example/init/consul
+++ b/example/init/consul
@ -0,0 +1,17 @@
+#!/bin/sh
+# vim: syntax=sh
+
+vault write consul/roles/monitoring-prometheus \
+  ttl=720h \
+  max_ttl=720h \
+  consul_policies="monitoring-prometheus"
+
+vault write consul/roles/monitoring-consul-exporter \
+  ttl=720h \
+  max_ttl=720h \
+  consul_policies="monitoring-prometheus"
+
+vault write consul/roles/monitoring-cluster-exporter \
+  ttl=720h \
+  max_ttl=720h \
+  consul_policies="monitoring-prometheus"
--- a/example/init/pki
+++ b/example/init/pki
@ -0,0 +1,156 @@
+#!/bin/sh
+
+set -euo pipefail
+
+
+#!/bin/sh
+
+# vim: syntax=sh
+
+set -euo pipefail
+
+TMP=$(mktemp -d)
+
+INITIAL_SETUP=false
+if [ "$(vault secrets list -format json | jq -r '.["pki/monitoring/"].type')" != "pki" ]; then
+  INITIAL_SETUP=true
+fi
+
+if [ "${INITIAL_SETUP}" = "true" ]; then
+  # Enable the secret engine
+  echo "Mounting new PKI secret engine at pki/monitoring"
+  vault secrets enable -path=pki/monitoring pki
+else
+  echo "Secret engine already mounted at pki/monitoring"
+fi
+
+# Configure max-lease-ttl
+echo "Tune PKI secret engine"
+vault secrets tune -max-lease-ttl=131400h pki/monitoring
+
+# Configure PKI URLs
+echo "Configure URL endpoints"
+vault write pki/monitoring/config/urls \
+  issuing_certificates="${VAULT_ADDR}/v1pki/monitoring/ca" \
+  crl_distribution_points="${VAULT_ADDR}/v1pki/monitoring/crl" \
+  ocsp_servers="${VAULT_ADDR}/v1pki/monitoring/ocsp"
+
+vault write pki/monitoring/config/cluster \
+  path="${VAULT_ADDR}/v1pki/monitoring"
+
+vault write pki/monitoring/config/crl \
+  auto_rebuild=true \
+  enable_delta=true
+
+# Configure tidy
+echo "Configure auto tidy for the PKI"
+vault write pki/monitoring/config/auto-tidy \
+  enabled=true \
+  tidy_cert_store=true \
+  tidy_expired_issuers=true \
+  tidy_revocation_queue=true \
+  tidy_revoked_cert_issuer_associations=true \
+  tidy_revoked_certs=true \
+  tidy_acme=true \
+  tidy_cross_cluster_revoked_certs=true \
+  tidy_move_legacy_ca_bundle=true \
+  maintain_stored_certificate_counts=true
+
+if [ "${INITIAL_SETUP}" = "true" ]; then
+  # Generate an internal CA
+  echo "Generating an internal CA"
+  vault write -format=json pki/monitoring/intermediate/generate/internal \
+    common_name="monitoring Certificate Authority" \
+    ttl="131400h" \
+    organization="ACME Corp" \
+    ou="Monitoring" \
+    locality="FooBar Ville" \
+    key_type=rsa \
+    key_bits=4096 \
+      | jq -r '.data.csr' > ${TMP}/monitoring.csr
+
+
+
+  # Sign this PKI with a root PKI
+  echo "Signing the new CA with the authority from pki/root"
+  vault write -format=json pki/root/root/sign-intermediate \
+      csr=@${TMP}/monitoring.csr \
+      format=pem_bundle \
+      ttl="131400h" \
+        | jq -r '.data.certificate' > ${TMP}/monitoring.crt
+
+  # Update the intermediate CA with the signed one
+  echo "Update the new CA with the signed version"
+  vault write pki/monitoring/intermediate/set-signed \
+    certificate=@${TMP}/monitoring.crt
+
+
+fi
+
+# Remove temp files
+echo "Cleaning temp files"
+rm -rf ${TMP}
+
+
+# Create a role for alertmanager
+vault write pki/monitoring/roles/monitoring-alertmanager \
+  allowed_domains="monitoring" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_localhost=false \
+  allow_ip_sans=true \
+  server_flag=true \
+  client_flag=true \
+  allow_wildcard_certificates=false \
+  max_ttl=100h \
+  ou="Monitoring"
+
+# Create a role for prometheus (which will only be a client, for AlertManager)
+vault write pki/monitoring/roles/monitoring-prometheus \
+  allowed_domains="monitoring" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_localhost=false \
+  allow_ip_sans=false \
+  server_flag=false \
+  client_flag=true \
+  allow_wildcard_certificates=false \
+  max_ttl=100h \
+  ou="Monitoring"
+
+# Create a role for metrics exporters (server only)
+vault write pki/monitoring/roles/metrics \
+  allowed_domains="monitoring" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_localhost=false \
+  allow_ip_sans=true \
+  server_flag=true \
+  client_flag=false \
+  allow_wildcard_certificates=false \
+  require_cn=false \
+  max_ttl=72h \
+  no_store=true \
+  ou="Monitoring"
+
+# Create a role on the Nomad PKI for the cluster exporter
+vault write pki/nomad/roles/monitoring-cluster-exporter \
+  allowed_domains='nomad.consul' \
+  allow_subdomains=true \
+  allow_wildcard_certificates=false \
+  max_ttl=168h \
+  allow_ip_sans=false \
+  server_flag=false \
+  client_flag=true \
+  ou="Cluster metrics exporter"
+
+# Create a role on the Consul PKI for the cluster exporter
+vault write pki/consul/roles/monitoring-cluster-exporter \
+  allowed_domains="consul.consul" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_wildcard_certificates=false \
+  max_ttl=168h \
+  server_flags=false \
+  client_flags=true \
+  ou="Cluster metrics exporter"
--- a/example/monitoring-exporters.nomad.hcl
+++ b/example/monitoring-exporters.nomad.hcl
@ -0,0 +1,419 @@
+job "monitoring-exporters" {
+
+  datacenters = ["dc1"]
+  region      = "global"
+
+
+  # Run exporters. Use a separated job so exporters can run in a distinct node_pool
+  group "exporters" {
+
+    count = 1
+
+    network {
+      mode = "bridge"
+      port "ping" {}
+      port "blackbox" {}
+      port "consul" {}
+      port "cluster" {}
+    }
+
+    service {
+      name = "monitoring-ping-exporter"
+      port = "ping"
+      meta {
+        alloc        = "${NOMAD_ALLOC_INDEX}"
+        metrics-port = "${NOMAD_HOST_PORT_ping}"
+      }
+    }
+
+    service {
+      name = "monitoring-blackbox-exporter"
+      port = "blackbox"
+      meta {
+        alloc = "${NOMAD_ALLOC_INDEX}"
+      }
+    }
+
+    service {
+      name = "monitoring-consul-exporter"
+      port = "ping"
+      meta {
+        alloc        = "${NOMAD_ALLOC_INDEX}"
+        metrics-port = "${NOMAD_HOST_PORT_consul}"
+      }
+    }
+
+    service {
+      name = "monitoring-cluster-exporter"
+      port = "cluster"
+      meta {
+        alloc = "${NOMAD_ALLOC_INDEX}"
+      }
+    }
+
+    # Export consul services status to prometheus
+    task "consul-exporter" {
+      driver = "docker"
+
+      config {
+        image           = "danielberteaud/consul-exporter:0.11.0-2"
+        readonly_rootfs = true
+        pids_limit      = 30
+        command         = "/local/consul-exporter"
+      }
+
+
+
+      # Use a template block instead of env {} so we can fetch values from vault
+      template {
+        data        = <<_EOT
+LANG=fr_FR.utf8
+TZ=Europe/Paris
+_EOT
+        destination = "secrets/.env"
+        perms       = 400
+        env         = true
+      }
+
+
+      vault {
+        policies     = ["monitoring-consul-exporter"]
+        env          = false
+        disable_file = true
+        change_mode  = "noop"
+      }
+
+
+      template {
+        data        = <<_EOT
+#!/bin/sh
+
+set -euo pipefail
+
+exec consul_exporter \
+  --web.listen-address=127.0.0.1:9107 \
+  --consul.server=http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 \
+  --consul.request-limit=20
+
+_EOT
+        destination = "local/consul-exporter"
+        perms       = 755
+      }
+
+      template {
+        data        = <<_EOT
+CONSUL_HTTP_TOKEN={{ with secret "consul/creds/monitoring-consul-exporter" }}{{ .Data.token }}{{ end }}
+_EOT
+        destination = "secrets/.consul.env"
+        uid         = 100000
+        gid         = 100000
+        perms       = 400
+        env         = true
+      }
+
+
+      resources {
+        cpu    = 20
+        memory = 64
+      }
+
+    }
+
+    # The cluster metrics exposes prometheus metrics from the various nodes of the cluster
+    # Nomad, Consul and Vault
+    # It also exposes the other exporters metrics with mTLS
+    task "cluster-metrics-proxy" {
+      driver = "docker"
+      user   = 8685
+
+      lifecycle {
+        hook    = "poststart"
+        sidecar = true
+      }
+
+      config {
+        image           = "nginxinc/nginx-unprivileged:alpine"
+        readonly_rootfs = true
+        pids_limit      = 30
+        # Mount the config in nginx conf dir
+        volumes = [
+          "secrets/metrics.conf:/etc/nginx/conf.d/default.conf"
+        ]
+        mount {
+          type   = "tmpfs"
+          target = "/tmp"
+          tmpfs_options {
+            size = 3000000
+          }
+        }
+
+      }
+
+
+      vault {
+        policies     = ["monitoring-cluster-exporter", "metrics"]
+        env          = false
+        disable_file = true
+        change_mode  = "noop"
+      }
+
+
+      # This is the main nginx configuration, which will proxypass requests to the real metrics endpoints
+      template {
+        data          = <<_EOT
+
+# Cluster exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_cluster" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+
+  set $consul_token "{{ with secret "consul/creds/monitoring-cluster-exporter" }}{{ .Data.token }}{{ end }}";
+
+{{- range service "nomad-client" }}
+  location /nomad-client/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:{{ .Port }}/v1/metrics?format=prometheus;
+    proxy_ssl_certificate /secrets/nomad_client_bundle.pem;
+    proxy_ssl_certificate_key /secrets/nomad_client_bundle.pem;
+    proxy_ssl_verify on;
+    proxy_ssl_name client.{{ env "NOMAD_REGION" }}.nomad;
+    proxy_ssl_trusted_certificate /local/nomad_ca.crt;
+  }
+{{- end }}
+
+{{- range service "nomad" }}
+  {{- if .Tags | contains "http" }}
+  location /nomad/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:{{ .Port }}/v1/metrics?format=prometheus;
+    proxy_ssl_certificate /secrets/nomad_client_bundle.pem;
+    proxy_ssl_certificate_key /secrets/nomad_client_bundle.pem;
+    proxy_ssl_verify on;
+    proxy_ssl_name server.{{ env "NOMAD_REGION" }}.nomad;
+    proxy_ssl_trusted_certificate /local/nomad_ca.crt;
+  }
+  {{- end }}
+{{- end }}
+
+{{- range service "consul" }}
+  location /consul/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:8501/v1/agent/metrics?format=prometheus;
+    proxy_set_header X-Consul-Token $consul_token;
+    proxy_ssl_certificate /secrets/consul_client_bundle.pem;
+    proxy_ssl_certificate_key /secrets/consul_client_bundle.pem;
+    proxy_ssl_verify off;
+    proxy_ssl_trusted_certificate /local/consul_ca.crt;
+  }
+{{- end }}
+
+{{- range service "vault" }}
+  location /vault/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:{{ .Port }}/v1/sys/metrics?format=prometheus;
+    proxy_ssl_verify on;
+    proxy_ssl_trusted_certificate /etc/ssl/cert.pem;
+    proxy_set_header X-Forwarded-For "$proxy_add_x_forwarded_for";
+    proxy_set_header X-Real-IP "$remote_addr";
+    proxy_set_header X-Forwarded-Proto "$scheme";
+    proxy_set_header X-Scheme "$scheme";
+    proxy_set_header X-Forwarded-Host "$host";
+    proxy_set_header X-Forwarded-Port "$server_port";
+  }
+{{- end }}
+
+  location / {
+    root /usr/share/nginx/html;
+    index index.html;
+  }
+}
+
+# Ping exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_ping" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+  location /metrics {
+    proxy_pass http://127.0.0.1:9427;
+  }
+}
+
+# Blackbox exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_blackbox" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+
+  location / {
+    proxy_pass http://127.0.0.1:9115;
+  }
+}
+
+# Consul exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_consul" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+  location /metrics {
+    proxy_pass http://127.0.0.1:9107;
+  }
+}
+
+_EOT
+        destination   = "secrets/metrics.conf"
+        perms         = "0440"
+        uid           = 108685
+        gid           = 100000
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # Get certificate to add mTLS to metrics endpoints
+      template {
+        data          = <<_EOT
+{{- with pkiCert "pki/monitoring/issue/metrics" (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster")) }}
+{{ .Cert }}
+{{ .Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/metrics.bundle.pem"
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # Get the CA for the monitoring PKI
+      template {
+        data        = <<_EOT
+{{ with secret "pki/monitoring/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/monitoring.ca.pem"
+      }
+
+      # Get a Nomad client certificate
+      template {
+        data          = <<_EOT
+{{- with pkiCert "pki/nomad/issue/monitoring-cluster-exporter" "common_name=metrics-proxy.nomad.consul" "ttl=24h" }}
+{{ .Data.Cert }}
+{{ .Data.Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/nomad_client_bundle.pem"
+        perms         = "0400"
+        uid           = 108685
+        gid           = 100000
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # The CA chain for Nomad
+      template {
+        data        = <<_EOT
+{{ with secret "pki/nomad/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/nomad_ca.crt"
+      }
+
+      # Same for Consul
+      template {
+        data          = <<_EOT
+{{- with pkiCert "pki/consul/issue/monitoring-cluster-exporter" "common_name=metrics-proxy.consul.consul" "ttl=24h" }}
+{{ .Data.Cert }}
+{{ .Data.Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/consul_client_bundle.pem"
+        perms         = "0400"
+        uid           = 108685
+        gid           = 100000
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+      template {
+        data        = <<_EOT
+{{ with secret "pki/consul/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/consul_ca.crt"
+      }
+
+
+      resources {
+        cpu    = 10
+        memory = 18
+      }
+
+    }
+  }
+}
--- a/example/monitoring.nomad.hcl
+++ b/example/monitoring.nomad.hcl
--- a/example/vault/policies/metrics.hcl
+++ b/example/vault/policies/metrics.hcl
@ -0,0 +1,3 @@
+path "pki/monitoring/issue/metrics" {
+  capabilities = ["update"]
+}
--- a/example/vault/policies/monitoring-alertmanager.hcl
+++ b/example/vault/policies/monitoring-alertmanager.hcl
@ -0,0 +1,8 @@
+
+path "pki/monitoring/issue/monitoring-alertmanager" {
+  capabilities = ["update"]
+}
+
+path "kv/service/monitoring/alertmanager" {
+  capabilities = ["read"]
+}
--- a/example/vault/policies/monitoring-cluster-exporter.hcl
+++ b/example/vault/policies/monitoring-cluster-exporter.hcl
@ -0,0 +1,20 @@
+
+# Read vault metrics
+path "sys/metrics" {
+  capabilities = ["read", "list"]
+}
+
+# Get a cert for Nomad
+path "pki/nomad/issue/monitoring-cluster-exporter" {
+  capabilities = ["update"]
+}
+
+# Get a cert for Consul
+path "pki/consul/issue/monitoring-cluster-exporter" {
+  capabilities = ["update"]
+}
+
+# Get a consul token
+path "consul/creds/monitoring-cluster-exporter" {
+  capabilities = ["read"]
+}
--- a/example/vault/policies/monitoring-consul-exporter.hcl
+++ b/example/vault/policies/monitoring-consul-exporter.hcl
@ -0,0 +1,4 @@
+
+path "consul/creds/monitoring-consul-exporter" {
+  capabilities = ["read"]
+}
--- a/example/vault/policies/monitoring-prometheus.hcl
+++ b/example/vault/policies/monitoring-prometheus.hcl
@ -0,0 +1,12 @@
+
+path "pki/monitoring/issue/monitoring-prometheus" {
+  capabilities = ["update"]
+}
+
+path "kv/service/monitoring/prometheus" {
+  capabilities = ["read"]
+}
+
+path "consul/creds/monitoring-prometheus" {
+  capabilities = ["read"]
+}
--- a/images/alertmanager/Dockerfile
+++ b/images/alertmanager/Dockerfile
@ -0,0 +1,41 @@
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] AS builder
+
+ARG AM_VERSION=[[ .monitoring.alertmanager.version ]]
+
+ADD https://github.com/prometheus/alertmanager/releases/download/v${AM_VERSION}/alertmanager-${AM_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/alertmanager/releases/download/v${AM_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add \
+      tar \
+    &&\
+    cd /tmp &&\
+    grep "alertmanager-${AM_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xzf alertmanager-${AM_VERSION}.linux-amd64.tar.gz &&\
+    mv alertmanager-${AM_VERSION}.linux-amd64 /opt/alertmanager
+
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]]
+MAINTAINER [[ .docker.maintainer ]]
+
+ENV PATH=/opt/alertmanager:$PATH
+
+COPY --from=builder /opt/alertmanager /opt/alertmanager
+RUN set -eux &&\
+    addgroup -g 9093 alertmanager &&\
+    adduser --system \
+            --disabled-password \
+            --uid 9093 \
+            --ingroup alertmanager \
+            --home /opt/alertmanager \
+            --no-create-home \
+            --shell /sbin/nologin \
+            alertmanager &&\
+    mkdir /data &&\
+    chown alertmanager:alertmanager /data &&\
+    chmod 700 data
+
+WORKDIR /opt/alertmanager
+USER alertmanager
+EXPOSE 9093
+CMD [ "alertmanager", \
+      "--config.file=/opt/alertmanager/alertmanager.yml", \
+      "--storage.path=/data" ]
--- a/images/blackbox-exporter/Dockerfile
+++ b/images/blackbox-exporter/Dockerfile
@ -0,0 +1,29 @@
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] AS builder
+
+ARG BLACKBOX_EXPORTER_VERSION=[[ .monitoring.exporters.blackbox.version ]]
+
+ADD https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_EXPORTER_VERSION}/blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/blackbox_exporter/releases/download/v${BLACKBOX_EXPORTER_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add tar gzip &&\
+    cd /tmp &&\
+    grep "blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xvf blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64.tar.gz &&\
+    mkdir blackbox &&\
+    mv blackbox_exporter-${BLACKBOX_EXPORTER_VERSION}.linux-amd64/blackbox_exporter /usr/local/bin/blackbox_exporter
+
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]]
+MAINTAINER [[ .docker.maintainer ]]
+
+ENV BLACKBOX_CONF=/etc/blackbox.yml
+
+COPY --from=builder /usr/local/bin/blackbox_exporter /usr/local/bin/blackbox_exporter
+
+RUN set -eux &&\
+    apk --no-cache upgrade &&\
+    apk --no-cache add ca-certificates curl
+
+COPY root/ /
+
+EXPOSE 9195
+CMD ["sh", "-c", "exec blackbox_exporter --config.file=${BLACKBOX_CONF}"]
--- a/images/blackbox-exporter/root/etc/blackbox.yml
+++ b/images/blackbox-exporter/root/etc/blackbox.yml
@ -0,0 +1,65 @@
+modules:
+  http_2xx:
+    prober: http
+    http:
+      preferred_ip_protocol: "ip4"
+  http_ssl_no_check:
+    prober: http
+    http:
+      preferred_ip_protocol: "ip4"
+      tls_config:
+        insecure_skip_verify: true
+  http_post_2xx:
+    prober: http
+    http:
+      method: POST
+      preferred_ip_protocol: "ip4"
+  tcp_connect:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+  pop3s_banner:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+      query_response:
+      - expect: "^+OK"
+      tls: true
+      tls_config:
+        insecure_skip_verify: false
+  grpc:
+    prober: grpc
+    grpc:
+      tls: true
+      preferred_ip_protocol: "ip4"
+  grpc_plain:
+    prober: grpc
+    grpc:
+      preferred_ip_protocol: "ip4"
+      tls: false
+      service: "service1"
+  ssh_banner:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+      query_response:
+      - expect: "^SSH-2.0-"
+      - send: "SSH-2.0-blackbox-ssh-check"
+  irc_banner:
+    prober: tcp
+    tcp:
+      preferred_ip_protocol: "ip4"
+      query_response:
+      - send: "NICK prober"
+      - send: "USER prober prober prober :prober"
+      - expect: "PING :([^ ]+)"
+        send: "PONG ${1}"
+      - expect: "^:[^ ]+ 001"
+  icmp:
+    prober: icmp
+  icmp_ttl5:
+    prober: icmp
+    timeout: 5s
+    icmp:
+      ttl: 5
+
--- a/images/consul-exporter/Dockerfile
+++ b/images/consul-exporter/Dockerfile
@ -0,0 +1,21 @@
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] AS builder
+
+ARG CONSUL_EXPORTER_VERSION=[[ .monitoring.exporters.consul.version ]]
+
+ADD https://github.com/prometheus/consul_exporter/releases/download/v${CONSUL_EXPORTER_VERSION}/consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/consul_exporter/releases/download/v${CONSUL_EXPORTER_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add tar gzip &&\
+    cd /tmp &&\
+    grep "consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xvf consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64.tar.gz &&\
+    mv consul_exporter-${CONSUL_EXPORTER_VERSION}.linux-amd64/consul_exporter /usr/local/bin/consul_exporter
+
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]]
+MAINTAINER [[ .docker.maintainer ]]
+
+COPY --from=builder /usr/local/bin/consul_exporter /usr/local/bin/consul_exporter
+
+USER 9107
+EXPOSE 9107
+CMD ["consul_exporter"]
--- a/images/ping-exporter/Dockerfile
+++ b/images/ping-exporter/Dockerfile
@ -0,0 +1,24 @@
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] AS builder
+MAINTAINER [[ .docker.maintainer ]]
+
+ARG PING_EXPORTER_VERSION=[[ .monitoring.exporters.ping.version ]]
+
+ADD https://github.com/czerwonk/ping_exporter/releases/download/${PING_EXPORTER_VERSION}/ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz /tmp
+ADD https://github.com/czerwonk/ping_exporter/releases/download/${PING_EXPORTER_VERSION}/ping_exporter_${PING_EXPORTER_VERSION}_checksums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add \
+      tar \
+      gzip \
+    &&\
+    cd /tmp &&\
+    grep "ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz" ping_exporter_${PING_EXPORTER_VERSION}_checksums.txt | sha256sum -c &&\
+    tar xvf ping_exporter_${PING_EXPORTER_VERSION}_linux_amd64.tar.gz &&\
+    mv ping_exporter /usr/local/bin/
+
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]]
+MAINTAINER [[ .docker.maintainer ]]
+
+COPY --from=builder /usr/local/bin/ping_exporter /usr/local/bin/ping_exporter
+
+EXPOSE 9427
+CMD ["ping_exporter", "--config.path=/config.yml"]
--- a/images/ping-exporter/root/config.yml
+++ b/images/ping-exporter/root/config.yml
@ -0,0 +1,4 @@
+# targets:
+#   - foo.bar
+#   - acme.com
+targets: []
--- a/images/prometheus/Dockerfile
+++ b/images/prometheus/Dockerfile
@ -0,0 +1,48 @@
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] AS builder
+
+ARG PROM_VERSION=[[ .monitoring.prometheus.version ]]
+
+ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz /tmp
+ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/sha256sums.txt /tmp
+RUN set -eux &&\
+    apk --no-cache add \
+      curl \
+      tar \
+      ca-certificates \
+    &&\
+    cd /tmp &&\
+    grep "prometheus-${PROM_VERSION}.linux-amd64.tar.gz" sha256sums.txt | sha256sum -c &&\
+    tar xvzf prometheus-${PROM_VERSION}.linux-amd64.tar.gz &&\
+    rm -f prometheus-${PROM_VERSION}.linux-amd64.tar.gz &&\
+    mv prometheus-${PROM_VERSION}.linux-amd64 /opt/prometheus
+
+FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]]
+MAINTAINER [[ .docker.maintainer ]]
+
+ENV PATH=/opt/prometheus:$PATH
+
+COPY --from=builder /opt/prometheus /opt/prometheus
+RUN set -eux &&\
+    addgroup -g 9090 prometheus &&\
+    adduser --system \
+            --disabled-password \
+            --uid 9090 \
+            --ingroup prometheus \
+            --home /opt/prometheus \
+            --no-create-home \
+            --shell /sbin/nologin \
+            prometheus &&\
+    mkdir /data &&\
+    chown prometheus.prometheus /data &&\
+    chmod 700 /data
+
+WORKDIR /opt/prometheus
+USER prometheus
+EXPOSE 9090
+CMD [ "/opt/prometheus/prometheus", \
+      "--config.file=/opt/prometheus/prometheus.yml", \
+      "--storage.tsdb.path=/data", \
+      "--storage.tsdb.wal-compression", \
+      "--storage.tsdb.wal-compression-type=zstd", \
+      "--web.console.libraries=/opt/prometheus/console_libraries", \
+      "--web.console.templates=/opt/prometheus/consoles" ]
--- a/init/consul
+++ b/init/consul
@ -0,0 +1,17 @@
+#!/bin/sh
+# vim: syntax=sh
+
+vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-prometheus \
+  ttl=720h \
+  max_ttl=720h \
+  consul_policies="[[ .instance ]]-prometheus"
+
+vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-consul-exporter \
+  ttl=720h \
+  max_ttl=720h \
+  consul_policies="[[ .instance ]]-prometheus"
+
+vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-cluster-exporter \
+  ttl=720h \
+  max_ttl=720h \
+  consul_policies="[[ .instance ]]-prometheus"
--- a/init/pki
+++ b/init/pki
@ -0,0 +1,69 @@
+#!/bin/sh
+
+set -euo pipefail
+
+[[ $c := merge .monitoring . ]]
+[[ template "common/vault.mkpki.sh" $c ]]
+
+# Create a role for alertmanager
+vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-alertmanager \
+  allowed_domains="[[ .instance ]]" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_localhost=false \
+  allow_ip_sans=true \
+  server_flag=true \
+  client_flag=true \
+  allow_wildcard_certificates=false \
+  max_ttl=100h \
+  ou="[[ $c.vault.pki.ou ]]"
+
+# Create a role for prometheus (which will only be a client, for AlertManager)
+vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-prometheus \
+  allowed_domains="[[ .instance ]]" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_localhost=false \
+  allow_ip_sans=false \
+  server_flag=false \
+  client_flag=true \
+  allow_wildcard_certificates=false \
+  max_ttl=100h \
+  ou="[[ $c.vault.pki.ou ]]"
+
+# Create a role for metrics exporters (server only)
+vault write [[ $c.vault.pki.path ]]/roles/metrics \
+  allowed_domains="[[ .instance ]]" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_localhost=false \
+  allow_ip_sans=true \
+  server_flag=true \
+  client_flag=false \
+  allow_wildcard_certificates=false \
+  require_cn=false \
+  max_ttl=72h \
+  no_store=true \
+  ou="[[ $c.vault.pki.ou ]]"
+
+# Create a role on the Nomad PKI for the cluster exporter
+vault write pki/nomad/roles/[[ .instance ]]-cluster-exporter \
+  allowed_domains='nomad.[[ .consul.domain ]]' \
+  allow_subdomains=true \
+  allow_wildcard_certificates=false \
+  max_ttl=168h \
+  allow_ip_sans=false \
+  server_flag=false \
+  client_flag=true \
+  ou="Cluster metrics exporter"
+
+# Create a role on the Consul PKI for the cluster exporter
+vault write pki/consul/roles/[[ .instance ]]-cluster-exporter \
+  allowed_domains="consul.[[ .consul.domain ]]" \
+  allow_bare_domains=false \
+  allow_subdomains=true \
+  allow_wildcard_certificates=false \
+  max_ttl=168h \
+  server_flags=false \
+  client_flags=true \
+  ou="Cluster metrics exporter"
--- a/monitoring-exporters.nomad.hcl
+++ b/monitoring-exporters.nomad.hcl
@ -0,0 +1,253 @@
+job "[[ .instance ]]-exporters" {
+
+[[- $c := merge .monitoring.exporters . ]]
+[[ template "common/job_start" $c ]]
+
+  # Run exporters. Use a separated job so exporters can run in a distinct node_pool
+  group "exporters" {
+    
+    count = [[ $c.count ]]
+
+    network {
+      mode = "bridge"
+      port "ping" {}
+      port "blackbox" {}
+      port "consul" {}
+      port "cluster" {}
+    }
+
+    service {
+      name = "[[ .instance ]]-ping-exporter[[ .consul.suffix ]]"
+      port = "ping"
+      meta {
+        alloc        = "${NOMAD_ALLOC_INDEX}"
+        metrics-port = "${NOMAD_HOST_PORT_ping}"
+      }
+    }
+
+    service {
+      name = "[[ .instance ]]-blackbox-exporter[[ .consul.suffix ]]"
+      port = "blackbox"
+      meta {
+        alloc = "${NOMAD_ALLOC_INDEX}"
+      }
+    }
+
+    service {
+      name = "[[ .instance ]]-consul-exporter[[ .consul.suffix ]]"
+      port = "ping"
+      meta {
+        alloc        = "${NOMAD_ALLOC_INDEX}"
+        metrics-port = "${NOMAD_HOST_PORT_consul}"
+      }
+    }
+
+    service {
+      name = "[[ .instance ]]-cluster-exporter[[ .consul.suffix ]]"
+      port = "cluster"
+      meta {
+        alloc = "${NOMAD_ALLOC_INDEX}"
+      }
+    }
+
+[[- if gt (len $c.ping.probes) 0 ]]
+[[- $e := merge $c.ping $c ]]
+    # Ping exporter will collect ICMP ping stats and expose them
+    # Note : we could do it with blackbox, but as pings require privileges, it's better to grant it
+    # to a smaller, more focused container. This one only handle icmp pings check, and only from the configuration file
+    task "ping-exporter" {
+      driver = "[[ $e.nomad.driver ]]"
+
+      config {
+        image           = "[[ $e.image ]]"
+        readonly_rootfs = true
+        pids_limit      = 30
+        # Pings require privileges
+        privileged      = true
+        userns_mode     = "host"
+        command         = "ping_exporter"
+        args            = [
+          "--web.listen-address=127.0.0.1:9427",
+          "--config.path=/local/config.yml"
+        ]
+      }
+
+[[ template "common/file_env" $e ]]
+
+      template {
+        data        = <<_EOT
+[[ template "monitoring/ping_exporter/config.yml" $e ]]
+_EOT
+        destination = "local/config.yml"
+      }
+
+[[ template "common/resources" $e ]]
+    }
+[[- end ]]
+
+[[- if or (gt (len $c.blackbox.tcp_probes) 0) (gt (len $c.blackbox.http_probes) 0) ]]
+[[- $e := merge $c.blackbox $c ]]
+    # Blackbox exporter will probe http/tcp targets and expose them
+    # for prometheus
+    task "blackbox-exporter" {
+      driver = "[[ $e.nomad.driver ]]"
+
+      config {
+        image           = "[[ $e.image ]]"
+        readonly_rootfs = true
+        pids_limit      = 30
+      }
+
+[[ template "common/file_env" $e ]]
+[[ template "common/resources" $e ]]
+
+    }
+[[- end ]]
+
+    # Export consul services status to prometheus
+    task "consul-exporter" {
+[[- $e := merge $c.consul $c ]]
+      driver = "[[ $e.nomad.driver ]]"
+
+      config {
+        image           = "[[ $e.image ]]"
+        readonly_rootfs = true
+        pids_limit      = 30
+        command         = "/local/consul-exporter"
+      }
+
+[[ template "common/file_env" $e ]]
+[[ template "common/vault.policies" $e ]]
+
+      template {
+        data = <<_EOT
+[[ template "monitoring/consul-exporter/start.sh" $e ]]
+_EOT
+        destination = "local/consul-exporter"
+        perms       = 755
+      }
+
+      template {
+        data = <<_EOT
+CONSUL_HTTP_TOKEN={{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-consul-exporter" }}{{ .Data.token }}{{ end }}
+_EOT
+        destination = "secrets/.consul.env"
+        uid         = 100000
+        gid         = 100000
+        perms       = 400
+        env         = true
+      }
+
+[[ template "common/resources" $e ]]
+    }
+
+    # The cluster metrics exposes prometheus metrics from the various nodes of the cluster
+    # Nomad, Consul and Vault
+    # It also exposes the other exporters metrics with mTLS
+    task "cluster-metrics-proxy" {
+[[- $e := merge $c.cluster $c ]]
+      driver = "[[ $e.nomad.driver ]]"
+      user   = 8685
+
+      lifecycle {
+        hook    = "poststart"
+        sidecar = true
+      }
+
+      config {
+        image           = "[[ $e.image ]]"
+        readonly_rootfs = true
+        pids_limit      = 30
+        # Mount the config in nginx conf dir
+        volumes = [
+          "secrets/metrics.conf:/etc/nginx/conf.d/default.conf"
+        ]
+[[ template "common/tmpfs" "/tmp" ]]
+      }
+
+[[ template "common/vault.policies" $e ]]
+
+      # This is the main nginx configuration, which will proxypass requests to the real metrics endpoints
+      template {
+        data =<<_EOT
+[[ template "monitoring/cluster-exporter/nginx.conf" $e ]]
+_EOT
+        destination   = "secrets/metrics.conf"
+        perms         = "0440"
+        uid           = 108685
+        gid           = 100000
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # Get certificate to add mTLS to metrics endpoints
+      template {
+        data =<<_EOT
+{{- with pkiCert "[[ .prometheus.vault_pki ]]/issue/metrics" (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster")) }}
+{{ .Cert }}
+{{ .Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/metrics.bundle.pem"
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # Get the CA for the monitoring PKI
+      template {
+        data        =<<_EOT
+{{ with secret "[[ .vault.root ]]pki/monitoring/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/monitoring.ca.pem"
+      }
+
+      # Get a Nomad client certificate
+      template {
+        data = <<_EOT
+{{- with pkiCert "pki/nomad/issue/[[ .instance ]]-cluster-exporter" "common_name=metrics-proxy.nomad.[[ .consul.domain ]]" "ttl=24h" }}
+{{ .Data.Cert }}
+{{ .Data.Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/nomad_client_bundle.pem"
+        perms         = "0400"
+        uid           = 108685
+        gid           = 100000
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # The CA chain for Nomad
+      template {
+        data        = <<_EOT
+{{ with secret "pki/nomad/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/nomad_ca.crt"
+      }
+
+      # Same for Consul
+      template {
+        data = <<_EOT
+{{- with pkiCert "pki/consul/issue/[[ .instance ]]-cluster-exporter" "common_name=metrics-proxy.consul.[[ .consul.domain ]]" "ttl=24h" }}
+{{ .Data.Cert }}
+{{ .Data.Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/consul_client_bundle.pem"
+        perms         = "0400"
+        uid           = 108685
+        gid           = 100000
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+      template {
+        data        = <<_EOT
+{{ with secret "pki/consul/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/consul_ca.crt"
+      }
+
+[[ template "common/resources" $e ]]
+    }
+  }
+}
--- a/monitoring.nomad.hcl
+++ b/monitoring.nomad.hcl
@ -0,0 +1,376 @@
+job "[[ .instance ]]" {
+
+[[ template "common/job_start" . ]]
+
+  # Metrics is running prometheus and various exporters
+  group "metrics" {
+[[- $c := merge .monitoring.prometheus .monitoring . ]]
+
+    shutdown_delay = "6s"
+    count = [[ $c.count ]]
+
+    network {
+      mode = "bridge"
+      port "metrics" {}
+    }
+
+[[ template "common/volumes" $c ]]
+
+    service {
+      name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]"
+      port = 9090
+
+[[ template "common/service_meta" $c ]]
+[[ template "common/connect" $c ]]
+
+      check {
+        name     = "health"
+        type     = "http"
+        expose   = true
+        path     = "/-/healthy"
+        interval = "15s"
+        timeout  = "8s"
+        check_restart {
+          limit = 10
+          grace = "5m"
+        }
+      }
+
+      tags = [
+[[ template "common/traefik_tags" $c ]]
+      ]
+    }
+
+[[ template "common/task.metrics_proxy" $c ]]
+
+    # The main prometheus task
+    task "prometheus" {
+      driver = "[[ $c.nomad.driver ]]"
+      leader = true
+
+      config {
+        image = "[[ $c.image ]]"
+        readonly_rootfs = true
+        pids_limit = 200
+        command = "prometheus"
+        args = [
+          "--config.file=/local/prometheus.yml",
+          "--log.level=debug",
+          "--web.listen-address=127.0.0.1:9090",
+          "--storage.tsdb.path=/data",
+          "--storage.tsdb.retention.time=[[ $c.retention ]]",
+          "--web.console.libraries=/opt/prometheus/console_libraries",
+          "--web.console.templates=/opt/prometheus/consoles",
+          "--web.external-url=[[ $c.public_url ]]",
+          "--web.route-prefix=[[ if eq "" (urlParse $c.public_url).Path ]]/[[ else ]](urlParse $c.public_url).Path[[ end ]]"
+        ]
+      }
+
+[[ template "common/vault.policies" $c ]]
+[[ template "common/artifacts" $c ]]
+
+      # Main configuration for prometheus
+      template {
+        data = <<_EOT
+[[ tmpl.Exec "monitoring/prometheus/prometheus.yml" $c | replaceAll "${" "$${" ]]
+_EOT
+        destination   = "local/prometheus.yml"
+        uid           = 100000
+        gid           = 109090
+        perms         = 640
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # Alert rules
+[[- range (file.ReadDir "bundles/monitoring/templates/prometheus/rules") ]]
+  [[- if not (file.Exists (printf "prometheus/rules/%s" .)) ]]
+      template {
+        data = <<_EOT
+[[ file.Read (printf "bundles/monitoring/templates/prometheus/rules/%s" .) ]]
+_EOT
+        destination = "local/rules/[[ . ]]"
+        left_delimiter = "{{{"
+        right_delimiter = "}}}"
+      }
+  [[- end ]]
+[[- end ]]
+
+[[- if file.Exists "prometheus/rules" ]]
+  [[- range (file.ReadDir "prometheus/rules") ]]
+
+      template {
+        data = <<_EOT
+[[ file.Read (printf "prometheus/rules/%s" .) ]]
+_EOT
+        destination = "local/rules/[[ . ]]"
+        left_delimiter = "{{{"
+        right_delimiter = "}}}"
+      }
+  [[- end ]]
+[[- end ]]
+
+[[- range $k, $v := $c.alert_rules ]]
+
+      artifact {
+        source      = "[[ $v.url ]]"
+        destination = "local/rules/[[ $k ]].yml"
+        mode        = "file"
+      }
+[[- end ]]
+
+      # A client cert, to connect to the AlertManager API
+      template {
+        data          = <<_EOT
+{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-prometheus"
+    (printf "common_name=prometheus-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX"))
+    (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}}
+{{ .Cert }}
+{{ .Key }}
+{{- end -}}
+_EOT
+        destination   = "secrets/prometheus.bundle.pem"
+        uid           = 100000
+        gid           = 109090
+        perms         = "0440"
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # The monitoring CA chain, to validate AlertManager cert
+      template {
+        data          = <<_EOT
+{{ with secret "[[ $c.vault.pki.path ]]/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination   = "local/monitoring.ca.pem"
+        uid           = 100000
+        gid           = 100000
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # Persistent data
+      volume_mount {
+        volume      = "data"
+        destination = "/data"
+      }
+
+[[ template "common/resources" $c ]]
+    }
+  }
+
+  group "alerts" {
+
+[[- $c := merge .monitoring.alertmanager .monitoring . ]]
+
+    count = [[ $c.count ]]
+
+    network {
+      mode = "bridge"
+      port "web-tls" {}
+      port "cluster" {}
+      port "metrics" {}
+    }
+
+[[ template "common/volumes" $c ]]
+
+    # This service is used for the different instances of alertmanager to communicate
+    service {
+      name = "[[ .instance ]]-alertmanager-gossip[[ .consul.suffix ]]"
+      port = "cluster"
+      meta {
+        alloc = "${NOMAD_ALLOC_INDEX}"
+      }
+    }
+
+    # This service is used by prometheus. As it needs to be able to reach every instances, it cannot use
+    # the service mesh. The exposed port uses mTLS, so it's safe to expose it outside of the mesh
+    service {
+      name = "[[ .instance ]]-alertmanager-tls[[ .consul.suffix ]]"
+      port = "web-tls"
+      meta {
+        alloc = "${NOMAD_ALLOC_INDEX}"
+      }
+    }
+
+    # This service is exposed through the service mesh
+    # and can be used to reach the web interface through Traefik
+    service {
+      name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]"
+      port = 9093
+[[ template "common/service_meta" $c ]]
+[[ template "common/connect" $c ]]
+
+      check {
+        name     = "health"
+        type     = "http"
+        expose   = true
+        path     = "/-/healthy"
+        interval = "20s"
+        timeout  = "8s"
+        check_restart {
+          limit = 12
+          grace = "30s"
+        }
+      }
+
+      tags = [
+[[ template "common/traefik_tags" $c ]]
+      ]
+    }
+
+[[ template "common/task.metrics_proxy" $c ]]
+
+    # This task will handle mTLS to the AlertManager API
+    # And expose it as plain http on 127.0.0.1 for Traefik (through the service mesh) and for the metrics proxy
+    task "tls-proxy" {
+      driver = "[[ $c.nomad.driver ]]"
+      user   = 9093
+
+      config {
+        image           = "nginxinc/nginx-unprivileged:alpine"
+        force_pull      = true
+        readonly_rootfs = true
+        pids_limit      = 30
+        volumes         = [
+          "local/alertmanager.conf:/etc/nginx/conf.d/default.conf:ro",
+        ]
+[[ template "common/tmpfs" "/tmp" ]]
+      }
+
+[[ template "common/vault.policies" $c ]]
+
+      lifecycle {
+        hook    = "poststart"
+        sidecar = true
+      }
+
+      template {
+        data = <<_EOT
+[[ template "monitoring/alertmanager/nginx.conf" $c ]]
+_EOT
+        destination = "local/alertmanager.conf"
+      }
+
+      # Certifiate used by AlertManager
+      template {
+        data          = <<_EOT
+{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager"
+    (printf "common_name=alertmanager-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX"))
+    (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster"))
+    (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }}
+{{ .Cert }}
+{{ .Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/alertmanager.bundle.pem"
+        uid           = 109093
+        gid           = 100000
+        perms         = "0440"
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # The trusted CA
+      template {
+        data        = <<_EOT
+{{ with secret "[[ $c.vault.pki.path ]]/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/monitoring.ca.pem"
+      }
+
+      resources {
+        cpu    = 10
+        memory = 18
+      }
+    }
+
+    # The main alertmanager task
+    task "alertmanager" {
+      driver = "[[ $c.nomad.driver ]]"
+      leader = true
+
+      config {
+        image           = "[[ $c.image ]]"
+        readonly_rootfs = true
+        pids_limit      = 200
+        command         = "/local/alertmanager"
+      }
+
+[[ template "common/vault.policies" $c ]]
+[[ template "common/file_env" $c ]]
+
+      template {
+        data = <<_EOT
+[[- if isKind "map" $c.custom_config ]]
+[[ merge $c.custom_config (tmpl.Exec "monitoring/alertmanager/alertmanager.yml" $c | yaml) | toYAML ]]
+[[- else if isKind "string" $c.custom_config ]]
+[[ merge ($c.custom_config | yaml) (tmpl.Exec "monitoring/alertmanager/alertmanager.yml" $c | yaml) | toYAML ]]
+[[- else ]]
+# Invalid custom config, using template only
+[[ template "monitoring/alertmanager/alertmanager.yml" $c ]]
+[[- end ]]
+_EOT
+        destination = "secrets/alertmanager.yml"
+      }
+
+      template {
+        data = <<_EOT
+[[ template "monitoring/alertmanager/cluster_tls.yml" $c ]]
+_EOT
+        destination = "local/cluster_tls.yml"
+      }
+
+      template {
+        data = <<_EOT
+[[ template "monitoring/alertmanager/web_tls.yml" $c ]]
+_EOT
+        destination = "local/web_tls.yml"
+      }
+
+      template {
+        data = <<_EOT
+[[ template "monitoring/alertmanager/start.sh" $c ]]
+_EOT
+        destination = "local/alertmanager"
+        uid         = 100000
+        gid         = 100000
+        perms       = "0755"
+      }
+
+      # Certifiate used by AlertManager
+      template {
+        data          = <<_EOT
+{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager"
+    (printf "common_name=alertmanager-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX"))
+    (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster"))
+    (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }}
+{{ .Cert }}
+{{ .Key }}
+{{- end }}
+_EOT
+        destination   = "secrets/alertmanager.bundle.pem"
+        uid           = 109093
+        gid           = 109090
+        perms         = "0440"
+        change_mode   = "signal"
+        change_signal = "SIGHUP"
+      }
+
+      # The trusted CA
+      template {
+        data        = <<_EOT
+{{ with secret "[[ $c.vault.pki.path ]]/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
+_EOT
+        destination = "local/monitoring.ca.pem"
+      }
+
+      volume_mount {
+        volume      = "data"
+        destination = "/data"
+      }
+
+[[ template "common/resources" $c ]]
+    }
+  }
+}
--- a/templates/alertmanager/alertmanager.yml
+++ b/templates/alertmanager/alertmanager.yml
@ -0,0 +1,5 @@
+global:
+  smtp_from: '[[ .email.from ]]'
+  smtp_smarthost: localhost:25
+  smtp_require_tls: false
+
--- a/templates/alertmanager/cluster_tls.yml
+++ b/templates/alertmanager/cluster_tls.yml
@ -0,0 +1,10 @@
+tls_server_config:
+  cert_file: /secrets/alertmanager.bundle.pem
+  key_file: /secrets/alertmanager.bundle.pem
+  client_auth_type: RequireAndVerifyClientCert
+  client_ca_file: /local/monitoring.ca.pem
+
+tls_client_config:
+  cert_file: /secrets/alertmanager.bundle.pem
+  key_file: /secrets/alertmanager.bundle.pem
+  ca_file: /local/monitoring.ca.pem
--- a/templates/alertmanager/nginx.conf
+++ b/templates/alertmanager/nginx.conf
@ -0,0 +1,13 @@
+server {
+  listen 127.0.0.1:9093;
+  location / {
+    proxy_pass https://localhost:{{ env "NOMAD_ALLOC_PORT_web-tls" }};
+    proxy_ssl_certificate /secrets/alertmanager.bundle.pem;
+    proxy_ssl_certificate_key /secrets/alertmanager.bundle.pem;
+    proxy_ssl_verify on;
+    proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.monitoring;
+    proxy_ssl_trusted_certificate /local/monitoring.ca.pem;
+    allow 127.0.0.1;
+    deny all;
+  }
+}
--- a/templates/alertmanager/start.sh
+++ b/templates/alertmanager/start.sh
@ -0,0 +1,19 @@
+#!/bin/sh
+
+set -euo pipefail
+
+exec alertmanager \
+  --config.file=/secrets/alertmanager.yml \
+  --storage.path=/data \
+  --web.external-url=[[ .public_url ]] \
+  --web.route-prefix=[[ if eq "" (urlParse .public_url).Path ]]/[[ else ]](urlParse .public_url).Path[[ end ]] \
+  --web.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_web-tls" }} \
+  --cluster.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_cluster" }} \
+  --cluster.advertise-address={{ env "NOMAD_HOST_ADDR_cluster" }} \
+{{- range service "[[ .instance ]]-am-gossip[[ .consul.suffix ]]" -}}
+{{- if not (eq (env "NOMAD_ALLOC_INDEX") (index .ServiceMeta "alloc")) }}
+  --cluster.peer={{ .Address }}:{{ .Port }} \
+{{ end -}}
+{{- end -}}
+  --cluster.tls-config=/local/cluster_tls.yml \
+  --web.config.file=/local/web_tls.yml
--- a/templates/alertmanager/web_tls.yml
+++ b/templates/alertmanager/web_tls.yml
@ -0,0 +1,5 @@
+tls_server_config:
+  cert_file: /secrets/alertmanager.bundle.pem
+  key_file: /secrets/alertmanager.bundle.pem
+  client_auth_type: RequireAndVerifyClientCert
+  client_ca_file: /local/monitoring.ca.pem
--- a/templates/cluster-exporter/nginx.conf
+++ b/templates/cluster-exporter/nginx.conf
@ -0,0 +1,170 @@
+
+# Cluster exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_cluster" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+
+  set $consul_token "{{ with secret "consul/creds/[[ .instance ]]-cluster-exporter" }}{{ .Data.token }}{{ end }}";
+
+{{- range service "nomad-client" }}
+  location /nomad-client/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:{{ .Port }}/v1/metrics?format=prometheus;
+    proxy_ssl_certificate /secrets/nomad_client_bundle.pem;
+    proxy_ssl_certificate_key /secrets/nomad_client_bundle.pem;
+    proxy_ssl_verify on;
+    proxy_ssl_name client.{{ env "NOMAD_REGION" }}.nomad;
+    proxy_ssl_trusted_certificate /local/nomad_ca.crt;
+  }
+{{- end }}
+
+{{- range service "nomad" }}
+  {{- if .Tags | contains "http" }}
+  location /nomad/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:{{ .Port }}/v1/metrics?format=prometheus;
+    proxy_ssl_certificate /secrets/nomad_client_bundle.pem;
+    proxy_ssl_certificate_key /secrets/nomad_client_bundle.pem;
+    proxy_ssl_verify on;
+    proxy_ssl_name server.{{ env "NOMAD_REGION" }}.nomad;
+    proxy_ssl_trusted_certificate /local/nomad_ca.crt;
+  }
+  {{- end }}
+{{- end }}
+
+{{- range service "consul" }}
+  location /consul/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:8501/v1/agent/metrics?format=prometheus;
+    proxy_set_header X-Consul-Token $consul_token;
+    proxy_ssl_certificate /secrets/consul_client_bundle.pem;
+    proxy_ssl_certificate_key /secrets/consul_client_bundle.pem;
+    proxy_ssl_verify off;
+    proxy_ssl_trusted_certificate /local/consul_ca.crt;
+  }
+{{- end }}
+
+{{- range service "vault" }}
+  location /vault/{{ .Node }} {
+    proxy_pass https://{{ .Address }}:{{ .Port }}/v1/sys/metrics?format=prometheus;
+    proxy_ssl_verify on;
+    proxy_ssl_trusted_certificate /etc/ssl/cert.pem;
+    proxy_set_header X-Forwarded-For "$proxy_add_x_forwarded_for";
+    proxy_set_header X-Real-IP "$remote_addr";
+    proxy_set_header X-Forwarded-Proto "$scheme";
+    proxy_set_header X-Scheme "$scheme";
+    proxy_set_header X-Forwarded-Host "$host";
+    proxy_set_header X-Forwarded-Port "$server_port";
+  }
+{{- end }}
+
+  location / {
+    root /usr/share/nginx/html;
+    index index.html;
+  }
+}
+
+# Ping exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_ping" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+  location /metrics {
+    proxy_pass http://127.0.0.1:9427;
+  }
+}
+
+# Blackbox exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_blackbox" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+
+  location / {
+    proxy_pass http://127.0.0.1:9115;
+  }
+}
+
+# Consul exporter
+server {
+  listen {{ env "NOMAD_ALLOC_PORT_consul" }} ssl;
+  http2 on;
+
+  ssl_certificate /secrets/metrics.bundle.pem;
+  ssl_certificate_key /secrets/metrics.bundle.pem;
+  ssl_client_certificate /local/monitoring.ca.pem;
+  ssl_verify_client on;
+  ssl_protocols TLSv1.2 TLSv1.3;
+  ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384;
+  ssl_session_cache shared:SSL:10m;
+  ssl_session_timeout 1h;
+  ssl_session_tickets off;
+  gzip on;
+  gzip_types
+    text/plain;
+  gzip_vary on;
+
+  server_tokens off;
+
+  if ($request_method !~ ^(GET|HEAD)$ ) {
+    return 405;
+  }
+  location /metrics {
+    proxy_pass http://127.0.0.1:9107;
+  }
+}
--- a/templates/consul-exporter/start.sh
+++ b/templates/consul-exporter/start.sh
@ -0,0 +1,8 @@
+#!/bin/sh
+
+set -euo pipefail
+
+exec consul_exporter \
+  --web.listen-address=127.0.0.1:9107 \
+  --consul.server=http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 \
+  --consul.request-limit=20
--- a/templates/ping_exporter/config.yml
+++ b/templates/ping_exporter/config.yml
@ -0,0 +1,4 @@
+targets:
+[[- range $idx, $probe := .probes ]]
+  - [[ $probe ]]
+[[- end ]]
--- a/templates/prometheus/prometheus.yml
+++ b/templates/prometheus/prometheus.yml
@ -0,0 +1,237 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+  #query_log_file: /dev/stdout
+  external_labels:
+    cluster: [[ .consul.domain ]]
+    env: [[ getenv "NOMAD_NAMESPACE" ]]
+
+rule_files:
+  - /local/rules/*.yml
+
+alerting:
+  alertmanagers:
+    - scheme: https
+      tls_config:
+        ca_file: /local/monitoring.ca.pem
+        cert_file: /secrets/prometheus.bundle.pem
+        key_file: /secrets/prometheus.bundle.pem
+      consul_sd_configs:
+        - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
+          scheme: http
+          datacenter: [[ .consul.datacenter ]]
+      relabel_configs:
+        # Only keep alertmanagers
+        - source_labels: [__meta_consul_service]
+          action: keep
+          regex: [[ .instance ]]-alertmanager-tls[[ .consul.suffix ]]
+
+scrape_configs:
+
+[[- range $k, $v := .jobs ]]
+
+  - job_name: [[ $k ]]
+    static_configs:
+      - targets:
+  [[- range $target := $v.targets ]]
+         - [[ $target ]]
+  [[- end ]]
+[[- end ]]
+
+[[- if gt (len .exporters.blackbox.http_probes) 0 ]]
+
+  # Blackbox Exporter HTTP targets
+  - job_name: http_probe
+    metrics_path: /probe
+    scheme: https
+    tls_config:
+      ca_file: /local/monitoring.ca.pem
+      cert_file: /secrets/prometheus.bundle.pem
+      key_file: /secrets/prometheus.bundle.pem
+    params:
+      module: ["http_2xx"]
+    static_configs:
+      - targets:
+  [[- range $http_probe := .exporters.blackbox.http_probes ]]
+          - [[ $http_probe ]]
+  [[- end ]]
+    relabel_configs:
+      - source_labels: [__address__]
+        target_label: __param_target
+      - source_labels: [__param_target]
+        target_label: instance
+      - target_label: __address__
+        replacement: {{ range $idx, $instance := service "[[ .instance ]]-blackbox-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
+[[- end ]]
+
+[[- if gt (len .exporters.blackbox.tcp_probes) 0 ]]
+
+  # Blackbox Exporter TCP targets
+  - job_name: tcp_probe
+    metrics_path: /probe
+    scheme: https
+    tls_config:
+      ca_file: /local/monitoring.ca.pem
+      cert_file: /secrets/prometheus.bundle.pem
+      key_file: /secrets/prometheus.bundle.pem
+    params:
+      module: ["tcp_connect"]
+    static_configs:
+  [[- range $target := .exporters.blackbox.tcp_probes ]]
+      - [[ $target ]]
+  [[- end ]]
+    relabel_configs:
+      - source_labels: [__address__]
+        target_label: __param_target
+      - source_labels: [__param_target]
+        target_label: instance
+      - target_label: __address__
+        replacement: {{ range $idx, $instance := service "[[ .instance ]]-blackbox-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
+[[- end ]]
+
+  # Cluster services
+  - job_name: cluster-services
+    scheme: https
+    tls_config:
+      ca_file: /local/monitoring.ca.pem
+      cert_file: /secrets/prometheus.bundle.pem
+      key_file: /secrets/prometheus.bundle.pem
+    consul_sd_configs:
+      - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
+        scheme: http
+        token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }}
+        datacenter: [[ .consul.datacenter ]]
+    relabel_configs:
+
+      # Drop anything which is not Nomad, Consul or Vault
+      # Other services will be monitored with another job
+      - source_labels: [__meta_consul_service]
+        action: keep
+        regex: (nomad(\-client)?|consul|vault)
+
+      - source_labels: [__meta_consul_service,__meta_consul_node]
+        regex: (.+);(.+)
+        replacement: ${1}/${2}
+        target_label: __metrics_path__
+
+      - source_labels: [__meta_consul_service]
+        regex: (.+)
+        replacement: {{ range $idx, $instance := service "[[ .instance ]]-cluster-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
+        target_label: __address__
+
+      # Rewrite the job labels to the name of the service
+      - source_labels: [__meta_consul_service]
+        regex: (.+)
+        replacement: ${1}
+        target_label: job
+
+      # Rewrite the instance labels
+      - source_labels: [__meta_consul_node]
+        regex: (.+)
+        replacement: ${1}
+        target_label: instance
+
+  # regular services discovered from the Consul Catalog
+  - job_name: consul-services
+    scheme: https
+    tls_config:
+      ca_file: /local/monitoring.ca.pem
+      cert_file: /secrets/prometheus.bundle.pem
+      key_file: /secrets/prometheus.bundle.pem
+
+    consul_sd_configs:
+      - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
+        scheme: http
+        token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }}
+        datacenter: [[ .consul.datacenter ]]
+
+    relabel_configs:
+
+      # Drop sidecar's service to prevent duplicate. Sidecar themselves are treated in another job
+      - source_labels: [__meta_consul_service]
+        action: drop
+        regex: (.+)-sidecar-proxy
+
+      # Drop Nomad, Consul and vault, already handled
+      - source_labels: [__meta_consul_service]
+        action: drop
+        regex: (nomad(\-client)?|consul|vault)
+
+      # Only keep services having a metrics-port set
+      - source_labels: [__meta_consul_service_metadata_metrics_port]
+        regex: \d+
+        action: keep
+
+      # Get metrics path from metadata
+      - source_labels: [__meta_consul_service_metadata_metrics_path]
+        target_label: __metrics_path__
+        regex: (.+)
+
+      # Rewrite the scheme if needed
+      - source_labels: [__meta_consul_service_metadata_metrics_scheme]
+        regex: (https?)
+        replacement: ${1}
+        target_label: __scheme__
+
+      # Rewrite the address to use the metrics port
+      - source_labels: [__address__, __meta_consul_service_metadata_metrics_port]
+        regex: ([^:]+)(?::\d+)?;(\d+)
+        replacement: ${1}:${2}
+        target_label: __address__
+
+      # Rewrite the job labels to the name of the service
+      - source_labels: [__meta_consul_service]
+        regex: (.+)
+        replacement: ${1}
+        target_label: job
+
+      # Set the default alloc to 0 if not set
+      - source_labels: [__meta_consul_service_metadata_alloc]
+        regex: ^$
+        replacement: 0
+        target_label: __meta_consul_service_metadata_alloc
+
+      # Rewerite the instance label to be service-alloc
+      - source_labels: [__meta_consul_service, __meta_consul_service_metadata_alloc]
+        regex: (.+);([a-zA-Z\d\-\.]+)
+        replacement: ${1}-${2}
+        target_label: instance
+
+  # envoy sidecars from consul
+  - job_name: consul-envoy-services
+    consul_sd_configs:
+      - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
+        scheme: http
+        token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }}
+        datacenter: [[ .consul.datacenter ]]
+
+    relabel_configs:
+
+      # Only keep sidecar-service with a envoy-metrics-port defined
+      - source_labels: [__meta_consul_service, __meta_consul_service_metadata_envoy_metrics_port]
+        action: keep
+        regex: (.+)-sidecar-proxy;\d+
+
+      # Rewrite the address to use the envoy-metrics-port
+      - source_labels: [__address__, __meta_consul_service_metadata_envoy_metrics_port]
+        regex: ([^:]+)(?::\d+)?;(\d+)
+        replacement: ${1}:${2}
+        target_label: __address__
+
+      # Rewrite the job label
+      - source_labels: [__meta_consul_service]
+        regex: (.+)
+        replacement: ${1}
+        target_label: job
+
+      # Set the default alloc to 0 if not set
+      - source_labels: [__meta_consul_service_metadata_alloc]
+        regex: ^$
+        replacement: 0
+        target_label: __meta_consul_service_metadata_alloc
+
+      # Rewerite the instance label to be service-alloc
+      - source_labels: [__meta_consul_service, __meta_consul_service_metadata_alloc]
+        regex: (.+);([a-zA-Z\d\-\.]+)
+        replacement: ${1}-${2}
+        target_label: instance
--- a/templates/prometheus/rules/blackbox.yml
+++ b/templates/prometheus/rules/blackbox.yml
@ -0,0 +1,69 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: Blackbox
+  rules:
+
+    - alert: BlackboxProbeFailed
+      expr: probe_success == 0
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox probe failed (instance {{ $labels.instance }})
+        description: "Probe failed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSlowProbe
+      expr: avg_over_time(probe_duration_seconds[1m]) > 1
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox slow probe (instance {{ $labels.instance }})
+        description: "Blackbox probe took more than 1s to complete\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxProbeHttpFailure
+      expr: probe_http_status_code <= 199 OR probe_http_status_code >= 400
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox probe HTTP failure (instance {{ $labels.instance }})
+        description: "HTTP status code is not 200-399\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateWillExpireSoon
+      expr: '3 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 20'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
+        description: "SSL certificate expires in less than 20 days\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateWillExpireSoon
+      expr: '0 <= round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 3'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox SSL certificate will expire soon (instance {{ $labels.instance }})
+        description: "SSL certificate expires in less than 3 days\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxSslCertificateExpired
+      expr: 'round((last_over_time(probe_ssl_earliest_cert_expiry[10m]) - time()) / 86400, 0.1) < 0'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Blackbox SSL certificate expired (instance {{ $labels.instance }})
+        description: "SSL certificate has expired already\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: BlackboxProbeSlowHttp
+      expr: 'avg_over_time(probe_http_duration_seconds[1m]) > 1'
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Blackbox probe slow HTTP (instance {{ $labels.instance }})
+        description: "HTTP request took more than 1s\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
--- a/templates/prometheus/rules/consul.yml
+++ b/templates/prometheus/rules/consul.yml
@ -0,0 +1,54 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: ConsulExporter
+
+  rules:
+
+    - alert: ConsulServiceHealthcheckFailed
+      # Note : don't check sidecar service health, as they can report a critical state when the main task is pending (eg, waiting for a volume to be available)
+      expr: 'consul_catalog_service_node_healthy{service_name!~".*-sidecar-proxy"} == 0'
+      for: 2m
+      labels:
+        severity: critical
+      annotations:
+        summary: Consul service healthcheck failed (service {{ $labels.service_name }})
+        description: "Service: `{{ $labels.service_name }}` Healthcheck: `{{ $labels.service_id }}`\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ConsulMissingMasterNode
+      expr: 'consul_raft_peers < (max_over_time(consul_raft_peers{}[6h]) / 2) + 1'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Consul missing master node (node {{ $labels.node }})
+        description: "Numbers of consul raft peers should be 3, in order to preserve quorum.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ConsulAgentUnhealthy
+      expr: 'consul_health_node_status{status="critical"} == 1'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Consul agent unhealthy (node {{ $labels.node }})
+        description: "A Consul agent is down\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ConsulServiceWarning
+      expr: 'consul_health_service_status{status="warning"} == 1'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Service {{ $labels.service_name }} on node {{ $labels.node }} is in warning state
+        description: "Service {{ $labels.service_name }} on node {{ $labels.node }} is in warning state\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: ConsulServiceCritical
+      expr: 'consul_health_service_status{status="critical",service_name!~".*-sidecar-proxy"} == 1'
+      for: 2m
+      labels:
+        severity: critical
+      annotations:
+        summary: Service {{ $labels.service_name }} on node {{ $labels.node }} is in critical state
+        description: "Service {{ $labels.service_name }} on node {{ $labels.node }} is in critical state\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
--- a/templates/prometheus/rules/jvm.yml
+++ b/templates/prometheus/rules/jvm.yml
@ -0,0 +1,16 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: JVM
+
+  rules:
+
+    - alert: JvmMemoryFillingUp
+      expr: '(sum by (instance)(jvm_memory_used_bytes{area="heap"}) / sum by (instance)(jvm_memory_max_bytes{area="heap"})) * 100 > 90'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: JVM memory filling up (instance {{ $labels.instance }})
+        description: "JVM memory is filling up (> 90%)\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
--- a/templates/prometheus/rules/nomad.yml
+++ b/templates/prometheus/rules/nomad.yml
@ -0,0 +1,51 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: Nomad
+  rules:
+
+    - alert: NomadJobFailed
+      expr: 'delta(nomad_nomad_job_summary_failed[30m]) > 0'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Nomad job failed (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
+        description: "Nomad job failed\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: NomadJobLost
+      expr: 'nomad_nomad_job_summary_lost > 0'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Nomad job lost (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
+        description: "Nomad job lost\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: NomadJobQueued
+      expr: 'nomad_nomad_job_summary_queued > 0'
+      for: 3m
+      labels:
+        severity: warning
+      annotations:
+        summary: Nomad job queued (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
+        description: "Nomad job queued\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: NomadBlockedEvaluation
+      expr: 'nomad_nomad_blocked_evals_total_blocked > 0'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Nomad blocked evaluation (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
+        description: "Nomad blocked evaluation\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: NomadTaskOOM
+      expr: 'count_over_time(nomad_client_allocs_oom_killed[1h]) > 1'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Nomad task killed by OOM (job {{ $labels.exported_job }}, group {{ $labels.task_group }}, instance {{ $labels.instance }}, task {{ $labels.task }})
+        description: "Nomad task killed by OOM \n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
--- a/templates/prometheus/rules/ping.yml
+++ b/templates/prometheus/rules/ping.yml
@ -0,0 +1,25 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: Ping
+  rules:
+
+    - alert: HostDown
+      expr: ping_loss_ratio == 1
+      for: 3m
+      labels:
+        severity: critical
+      annotations:
+        summary: Host down (host {{ $labels.target }})
+        description: "Host {{ $labels.target }} doesn't respond to ICMP pings, VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: PingLoss
+      expr: |
+        avg_over_time(ping_loss_ratio[10m]) > 0.1 and min_over_time(ping_loss_ratio[10m]) < 1
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: High packet loss (host {{ $labels.target }})
+        description: "ICMP pings have a loss ratio > 10%, VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
--- a/templates/prometheus/rules/postgres.yml
+++ b/templates/prometheus/rules/postgres.yml
@ -0,0 +1,80 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: Postgres
+
+  rules:
+
+    - alert: PostgresqlDown
+      expr: 'pg_up == 0'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Postgresql down (instance {{ $labels.instance }})
+        description: "Postgresql instance is down\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: PostgresTooManyRestarts
+      expr: changes(process_start_time_seconds{job="pg"}[15m]) > 3
+      for: 1m
+      labels:
+        severity: warning
+      annotations:
+        summary: Postgres too many restarts (instance {{ $labels.instance }})
+        description: "Postgres server has restarted more than 3 times in the last 15 minutes. It might be crashlooping.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: PostgresqlTooManyConnections
+      expr: 'sum by (datname) (pg_stat_activity_count{datname!~"template.*|postgres"}) > pg_settings_max_connections * 0.8'
+      for: 2m
+      labels:
+        severity: warning
+      annotations:
+        summary: Postgresql too many connections (instance {{ $labels.instance }})
+        description: "PostgreSQL instance has too many connections (> 80%).\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: PostgresqlDeadLocks
+      expr: 'increase(pg_stat_database_deadlocks{datname!~"template.*|postgres"}[1m]) > 5'
+      for: 0m
+      labels:
+        severity: warning
+      annotations:
+        summary: Postgresql dead locks (instance {{ $labels.instance }})
+        description: "PostgreSQL has dead-locks\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+#    - alert: PostgresqlHighRollbackRate
+#      expr: 'rate(pg_stat_database_xact_rollback{datname!~"template.*"}[3m]) / rate(pg_stat_database_xact_commit{datname!~"template.*"}[3m]) > 0.05'
+#      for: 0m
+#      labels:
+#        severity: warning
+#      annotations:
+#        summary: Postgresql high rollback rate (instance {{ $labels.instance }})
+#        description: "Ratio of transactions being aborted compared to committed is > 5 %\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: PostgresqlHighRateStatementTimeout
+      expr: 'rate(postgresql_errors_total{type="statement_timeout"}[1m]) > 3'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Postgresql high rate statement timeout (instance {{ $labels.instance }})
+        description: "Postgres transactions showing high rate of statement timeouts\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: PostgresqlHighRateDeadlock
+      expr: 'increase(postgresql_errors_total{type="deadlock_detected"}[1m]) > 1'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Postgresql high rate deadlock (instance {{ $labels.instance }})
+        description: "Postgres detected deadlocks\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+    - alert: PostgresqlTooManyLocksAcquired
+      expr: '((sum (pg_locks_count)) / (pg_settings_max_locks_per_transaction * pg_settings_max_connections)) > 0.20'
+      for: 2m
+      labels:
+        severity: critical
+      annotations:
+        summary: Postgresql too many locks acquired (instance {{ $labels.instance }})
+        description: "Too many locks acquired on the database. If this alert happens frequently, we may need to increase the postgres setting max_locks_per_transaction.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
--- a/templates/prometheus/rules/prometheus.yml
+++ b/templates/prometheus/rules/prometheus.yml
@ -0,0 +1,89 @@
+# vi: syntax=yaml
+
+groups:
+
+# Prometheus
+- name: Prometheus
+  rules:
+
+  - alert: PrometheusTargetMissing
+    expr: up{job!~"sftp-PR\\d+"} == 0
+    for: 5m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus target missing (job {{ $labels.job }}, instance {{ $labels.instance }})
+      description: "A Prometheus target has disappeared. An exporter might be crashed.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusTooManyRestarts
+    expr: changes(process_start_time_seconds{job=~"prometheus|pushgateway|alertmanager"}[15m]) > 3
+    for: 1m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus too many restarts (job {{ $labels.job }}, instance {{ $labels.instance }})
+      description: "Prometheus has restarted more than 3 times in the last 15 minutes. It might be crashlooping.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusNotConnectedToAlertmanager
+    expr: prometheus_notifications_alertmanagers_discovered < 1
+    for: 2m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }})
+      description: "Prometheus cannot connect the alertmanager\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusRuleEvaluationFailures
+    expr: increase(prometheus_rule_evaluation_failures_total[3m]) > 0
+    for: 0m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus rule evaluation failures (instance {{ $labels.instance }})
+      description: "Prometheus encountered {{ $value }} rule evaluation failures, leading to potentially ignored alerts.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusRuleEvaluationSlow
+    expr: prometheus_rule_group_last_duration_seconds > prometheus_rule_group_interval_seconds
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus rule evaluation slow (instance {{ $labels.instance }})
+      description: "Prometheus rule evaluation took more time than the scheduled interval. It indicates a slower storage backend access or too complex query.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusNotificationsBacklog
+    expr: min_over_time(prometheus_notifications_queue_length[10m]) > 0
+    for: 0m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus notifications backlog (instance {{ $labels.instance }})
+      description: "The Prometheus notification queue has not been empty for 10 minutes\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusAlertmanagerNotificationFailing
+    expr: rate(alertmanager_notifications_failed_total[1m]) > 0
+    for: 0m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus AlertManager notification failing (instance {{ $labels.instance }})
+      description: "Alertmanager is failing sending notifications\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusTargetScrapingSlow
+    expr: prometheus_target_interval_length_seconds{quantile="0.9"} / on (interval, instance, job) prometheus_target_interval_length_seconds{quantile="0.5"} > 1.05
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: Prometheus target scraping slow (instance {{ $labels.instance }})
+      description: "Prometheus is scraping exporters slowly since it exceeded the requested interval time. Your Prometheus server is under-provisioned.\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
+  - alert: PrometheusTsdbWalCorruptions
+    expr: increase(prometheus_tsdb_wal_corruptions_total[1m]) > 0
+    for: 0m
+    labels:
+      severity: critical
+    annotations:
+      summary: Prometheus TSDB WAL corruptions (instance {{ $labels.instance }})
+      description: "Prometheus encountered {{ $value }} TSDB WAL corruptions\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
+
--- a/templates/prometheus/rules/traefik.yml
+++ b/templates/prometheus/rules/traefik.yml
@ -0,0 +1,16 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: Traefik
+
+  rules:
+
+    - alert: TraefikHighHttp5xxErrorRateService
+      expr: 'sum(rate(traefik_service_requests_total{code=~"5.*"}[3m])) by (service) / sum(rate(traefik_service_requests_total[3m])) by (service) * 100 > 5'
+      for: 1m
+      labels:
+        severity: critical
+      annotations:
+        summary: Traefik high HTTP 5xx error rate service (instance {{ $labels.instance }})
+        description: "Traefik service 5xx error rate is above 5%\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
--- a/templates/prometheus/rules/vault.yml
+++ b/templates/prometheus/rules/vault.yml
@ -0,0 +1,16 @@
+# vi: syntax=yaml
+
+groups:
+
+- name: HashicorpVault
+
+  rules:
+
+    - alert: VaultSealed
+      expr: 'vault_core_unsealed == 0'
+      for: 0m
+      labels:
+        severity: critical
+      annotations:
+        summary: Vault sealed (instance {{ $labels.instance }})
+        description: "Vault instance is sealed on {{ $labels.instance }}\n  VALUE = {{ $value }}\n  LABELS = {{ $labels }}"
--- a/variables.yml
+++ b/variables.yml
@ -0,0 +1,127 @@
+---
+
+instance: monitoring
+
+vault:
+  pki:
+    path: '[[ .prometheus.vault_pki ]]'
+    ou: Monitoring
+
+monitoring:
+
+  exporters:
+    count: 1
+
+    ping:
+      version: 1.1.0
+      image: '[[ .docker.repo ]]ping-exporter:[[ .monitoring.exporters.ping.version ]]-1'
+      env: {}
+      resources:
+        cpu: 10
+        memory: 30
+      probes: []
+
+    blackbox:
+      version: 0.24.0
+      image: '[[ .docker.repo ]]blackbox-exporter:[[ .monitoring.exporters.blackbox.version ]]-1'
+      env: {}
+      resources:
+        cpu: 10
+        memory: 50
+      tcp_probes: []
+      http_probes: []
+
+    consul:
+      version: 0.11.0
+      image: '[[ .docker.repo ]]consul-exporter:[[ .monitoring.exporters.consul.version ]]-2'
+      env: {}
+      resources:
+        cpu: 20
+        memory: 64
+      vault:
+        policies:
+          - '[[ .instance ]]-consul-exporter'
+
+    cluster:
+      image: nginxinc/nginx-unprivileged:alpine
+      env: {}
+      resources:
+        cpu: 10
+        memory: 18
+      vault:
+        policies:
+          - '[[ .instance ]]-cluster-exporter'
+          - metrics
+
+  prometheus:
+
+    version: 2.50.1
+
+    count: 1
+
+    image: '[[ .docker.repo ]]prometheus:[[ .monitoring.prometheus.version ]]-1'
+
+    env: {}
+
+    resources:
+      cpu: 200
+      memory: 768
+
+    volumes:
+      data:
+        type: csi
+        source: '[[ .instance ]]-prometheus-data'
+        per_alloc: true
+
+    vault:
+      policies:
+        - '[[ .instance ]]-prometheus'
+
+    jobs: {}
+    alert_rules: {}
+    # alert_rules:
+    #   postgres:
+    #     url: https://raw.githubusercontent.com/samber/awesome-prometheus-alerts/master/dist/rules/prometheus-self-monitoring/embedded-exporter.yml
+
+    public_url: https://prometheus.example.org
+    traefik:
+      enabled: true
+      router: prometheus
+
+    retention: 30d
+
+    prometheus:
+      enabled: true
+      metrics_url: http://localhost:9090/metrics
+
+  alertmanager:
+    count: 1
+    version: 0.27.0
+    image: '[[ .docker.repo ]]alertmanager:[[ .monitoring.alertmanager.version ]]-1'
+    env: {}
+    resources:
+      cpu: 50
+      memory: 80
+    public_url: https://alerte.example.org
+    traefik:
+      enabled: true
+      router: alertmanager
+      strip_prefix: false
+    volumes:
+      data:
+        source: '[[ .instance ]]-alertmanager-data'
+        type: csi
+        per_alloc: true
+    prometheus:
+      metrics_url: http://127.0.0.1:9093/metrics
+    vault:
+      policies:
+        - metrics
+        - '[[ .instance ]]-alertmanager'
+    email:
+      from: alertmanager@[[ .consul.domain ]]
+    custom_config: ""
+      
+
+prometheus:
+  enabled: true
--- a/vault/policies/metrics.hcl
+++ b/vault/policies/metrics.hcl
@ -0,0 +1,3 @@
+path "[[ .prometheus.vault_pki ]]/issue/metrics" {
+  capabilities = ["update"]
+}
--- a/vault/policies/monitoring-alertmanager.hcl
+++ b/vault/policies/monitoring-alertmanager.hcl
@ -0,0 +1,8 @@
+[[- $c := merge .monitoring.alertmanager .monitoring . ]]
+path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager" {
+  capabilities = ["update"]
+}
+
+path "[[ .vault.root ]]kv/service/[[ .instance ]]/alertmanager" {
+  capabilities = ["read"]
+}
--- a/vault/policies/monitoring-cluster-exporter.hcl
+++ b/vault/policies/monitoring-cluster-exporter.hcl
@ -0,0 +1,20 @@
+[[- $c := merge .monitoring.exporters.cluster .monitoring.exporters .monitoring . ]]
+# Read vault metrics
+path "sys/metrics" {
+  capabilities = ["read", "list"]
+}
+
+# Get a cert for Nomad
+path "pki/nomad/issue/[[ .instance ]]-cluster-exporter" {
+  capabilities = ["update"]
+}
+
+# Get a cert for Consul
+path "pki/consul/issue/[[ .instance ]]-cluster-exporter" {
+  capabilities = ["update"]
+}
+
+# Get a consul token
+path "consul/creds/[[ .instance ]]-cluster-exporter" {
+  capabilities = ["read"]
+}
--- a/vault/policies/monitoring-consul-exporter.hcl
+++ b/vault/policies/monitoring-consul-exporter.hcl
@ -0,0 +1,4 @@
+[[- $c := merge .monitoring.exporters.consul .monitoring.exporters .monitoring . ]]
+path "[[ $c.vault.root ]]consul/creds/[[ .instance ]]-consul-exporter" {
+  capabilities = ["read"]
+}
--- a/vault/policies/monitoring-prometheus.hcl
+++ b/vault/policies/monitoring-prometheus.hcl
@ -0,0 +1,12 @@
+[[- $c := merge .monitoring.prometheus .monitoring . ]]
+path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-prometheus" {
+  capabilities = ["update"]
+}
+
+path "[[ $c.vault.root ]]kv/service/[[ .instance ]]/prometheus" {
+  capabilities = ["read"]
+}
+
+path "[[ $c.vault.root ]]consul/creds/[[ .instance ]]-prometheus" {
+  capabilities = ["read"]
+}