diff --git a/consul/config/service-defaults/alertmanager.hcl b/consul/config/service-defaults/alertmanager.hcl new file mode 100644 index 0000000..09ca817 --- /dev/null +++ b/consul/config/service-defaults/alertmanager.hcl @@ -0,0 +1,3 @@ +Kind = "service-defaults" +Name = "alertmanager[[ .consul.suffix ]]" +Protocol = "http" diff --git a/consul/config/service-defaults/grafana.hcl b/consul/config/service-defaults/grafana.hcl new file mode 100644 index 0000000..c8a5234 --- /dev/null +++ b/consul/config/service-defaults/grafana.hcl @@ -0,0 +1,3 @@ +Kind = "service-defaults" +Name = "grafana[[ .consul.suffix ]]" +Protocol = "http" diff --git a/consul/config/service-defaults/loki.hcl b/consul/config/service-defaults/loki.hcl new file mode 100644 index 0000000..63c7629 --- /dev/null +++ b/consul/config/service-defaults/loki.hcl @@ -0,0 +1,3 @@ +Kind = "service-defaults" +Name = "loki[[ .consul.suffix ]]" +Protocol = "http" diff --git a/consul/config/service-defaults/monitoring-alertmanager.hcl b/consul/config/service-defaults/monitoring-alertmanager.hcl deleted file mode 100644 index c25108a..0000000 --- a/consul/config/service-defaults/monitoring-alertmanager.hcl +++ /dev/null @@ -1,3 +0,0 @@ -Kind = "service-defaults" -Name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]" -Protocol = "http" diff --git a/consul/config/service-defaults/monitoring-loki.hcl b/consul/config/service-defaults/monitoring-loki.hcl deleted file mode 100644 index b63d91f..0000000 --- a/consul/config/service-defaults/monitoring-loki.hcl +++ /dev/null @@ -1,3 +0,0 @@ -Kind = "service-defaults" -Name = "[[ .instance ]]-loki[[ .consul.suffix ]]" -Protocol = "http" diff --git a/consul/config/service-defaults/monitoring-prometheus.hcl b/consul/config/service-defaults/monitoring-prometheus.hcl deleted file mode 100644 index 2b6d3cf..0000000 --- a/consul/config/service-defaults/monitoring-prometheus.hcl +++ /dev/null @@ -1,3 +0,0 @@ -Kind = "service-defaults" -Name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]" -Protocol = "http" diff --git a/consul/config/service-defaults/prometheus.hcl b/consul/config/service-defaults/prometheus.hcl new file mode 100644 index 0000000..c4955e3 --- /dev/null +++ b/consul/config/service-defaults/prometheus.hcl @@ -0,0 +1,3 @@ +Kind = "service-defaults" +Name = "prometheus[[ .consul.suffix ]]" +Protocol = "http" diff --git a/consul/config/service-intentions/monitoring-alertmanager.hcl b/consul/config/service-intentions/alertmanager.hcl similarity index 88% rename from consul/config/service-intentions/monitoring-alertmanager.hcl rename to consul/config/service-intentions/alertmanager.hcl index 8a29522..09588dd 100644 --- a/consul/config/service-intentions/monitoring-alertmanager.hcl +++ b/consul/config/service-intentions/alertmanager.hcl @@ -1,5 +1,5 @@ Kind = "service-intentions" -Name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]" +Name = "alertmanager[[ .consul.suffix ]]" Sources = [ { Name = "[[ (merge .monitoring.alertmanager .).traefik.instance ]]" diff --git a/consul/config/service-intentions/grafana.hcl b/consul/config/service-intentions/grafana.hcl new file mode 100644 index 0000000..059cd9f --- /dev/null +++ b/consul/config/service-intentions/grafana.hcl @@ -0,0 +1,15 @@ +Kind = "service-intentions" +Name = "grafana[[ .consul.suffix ]]" +Sources = [ + { + Name = "[[ (merge .monitoring.grafana .monitoring .).traefik.instance ]]" + Permissions = [ + { + Action = "allow" + HTTP { + PathPrefix = "[[ if eq (urlParse .monitoring.grafana.public_url).Path "" ]]/[[ else ]][[ (urlParse .monitoring.grafana.public_url).Path ]][[ end ]]" + } + } + ] + } +] diff --git a/consul/config/service-intentions/monitoring-loki.hcl b/consul/config/service-intentions/loki.hcl similarity index 84% rename from consul/config/service-intentions/monitoring-loki.hcl rename to consul/config/service-intentions/loki.hcl index d7d533e..b2df884 100644 --- a/consul/config/service-intentions/monitoring-loki.hcl +++ b/consul/config/service-intentions/loki.hcl @@ -1,5 +1,5 @@ Kind = "service-intentions" -Name = "[[ .instance ]]-loki[[ .consul.suffix ]]" +Name = "loki[[ .consul.suffix ]]" Sources = [ { Name = "[[ (merge .monitoring.loki .monitoring .).traefik.instance ]]" @@ -13,7 +13,7 @@ Sources = [ ] }, { - Name = "[[ .instance ]]-grafana[[ .consul.suffix ]]" + Name = "grafana[[ .consul.suffix ]]" Permissions = [ { Action = "allow" @@ -33,7 +33,7 @@ Sources = [ }, [[- range $idx, $service := coll.Slice "vector-aggregator" "vector-agent" ]] { - Name = "[[ $.instance ]]-[[ $service ]][[ $.consul.suffix ]]" + Name = "[[ $service ]][[ $.consul.suffix ]]" Permissions = [ { Action = "allow" diff --git a/consul/config/service-intentions/monitoring-prometheus.hcl b/consul/config/service-intentions/prometheus.hcl similarity index 84% rename from consul/config/service-intentions/monitoring-prometheus.hcl rename to consul/config/service-intentions/prometheus.hcl index 65f4f23..ba041b1 100644 --- a/consul/config/service-intentions/monitoring-prometheus.hcl +++ b/consul/config/service-intentions/prometheus.hcl @@ -1,5 +1,5 @@ Kind = "service-intentions" -Name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]" +Name = "prometheus[[ .consul.suffix ]]" Sources = [ { Name = "[[ (merge .monitoring.prometheus .).traefik.instance ]]" @@ -13,7 +13,7 @@ Sources = [ ] }, { - Name = "[[ .instance ]]-grafana[[ .consul.suffix ]]" + Name = "grafana[[ .consul.suffix ]]" Permissions = [ { # Deny access to the admin API from Grafana diff --git a/consul/policies/monitoring-prometheus.hcl b/consul/policies/monitoring.hcl similarity index 100% rename from consul/policies/monitoring-prometheus.hcl rename to consul/policies/monitoring.hcl diff --git a/example/consul/config/service-defaults/monitoring-loki.hcl b/example/consul/config/service-defaults/alertmanager.hcl similarity index 63% rename from example/consul/config/service-defaults/monitoring-loki.hcl rename to example/consul/config/service-defaults/alertmanager.hcl index 69e7b61..489254c 100644 --- a/example/consul/config/service-defaults/monitoring-loki.hcl +++ b/example/consul/config/service-defaults/alertmanager.hcl @@ -1,3 +1,3 @@ Kind = "service-defaults" -Name = "monitoring-loki" +Name = "alertmanager" Protocol = "http" diff --git a/example/consul/config/service-defaults/monitoring-prometheus.hcl b/example/consul/config/service-defaults/grafana.hcl similarity index 58% rename from example/consul/config/service-defaults/monitoring-prometheus.hcl rename to example/consul/config/service-defaults/grafana.hcl index e4820a0..69f1301 100644 --- a/example/consul/config/service-defaults/monitoring-prometheus.hcl +++ b/example/consul/config/service-defaults/grafana.hcl @@ -1,3 +1,3 @@ Kind = "service-defaults" -Name = "monitoring-prometheus" +Name = "grafana" Protocol = "http" diff --git a/example/consul/config/service-defaults/monitoring-alertmanager.hcl b/example/consul/config/service-defaults/loki.hcl similarity index 57% rename from example/consul/config/service-defaults/monitoring-alertmanager.hcl rename to example/consul/config/service-defaults/loki.hcl index 0908bb8..30fecc1 100644 --- a/example/consul/config/service-defaults/monitoring-alertmanager.hcl +++ b/example/consul/config/service-defaults/loki.hcl @@ -1,3 +1,3 @@ Kind = "service-defaults" -Name = "monitoring-alertmanager" +Name = "loki" Protocol = "http" diff --git a/example/consul/config/service-defaults/prometheus.hcl b/example/consul/config/service-defaults/prometheus.hcl new file mode 100644 index 0000000..1203385 --- /dev/null +++ b/example/consul/config/service-defaults/prometheus.hcl @@ -0,0 +1,3 @@ +Kind = "service-defaults" +Name = "prometheus" +Protocol = "http" diff --git a/example/consul/config/service-intentions/monitoring-alertmanager.hcl b/example/consul/config/service-intentions/alertmanager.hcl similarity index 88% rename from example/consul/config/service-intentions/monitoring-alertmanager.hcl rename to example/consul/config/service-intentions/alertmanager.hcl index fedec67..7fc5075 100644 --- a/example/consul/config/service-intentions/monitoring-alertmanager.hcl +++ b/example/consul/config/service-intentions/alertmanager.hcl @@ -1,5 +1,5 @@ Kind = "service-intentions" -Name = "monitoring-alertmanager" +Name = "alertmanager" Sources = [ { Name = "traefik" diff --git a/example/consul/config/service-intentions/grafana.hcl b/example/consul/config/service-intentions/grafana.hcl new file mode 100644 index 0000000..5be630f --- /dev/null +++ b/example/consul/config/service-intentions/grafana.hcl @@ -0,0 +1,15 @@ +Kind = "service-intentions" +Name = "grafana" +Sources = [ + { + Name = "traefik" + Permissions = [ + { + Action = "allow" + HTTP { + PathPrefix = "/" + } + } + ] + } +] diff --git a/example/consul/config/service-intentions/monitoring-loki.hcl b/example/consul/config/service-intentions/loki.hcl similarity index 88% rename from example/consul/config/service-intentions/monitoring-loki.hcl rename to example/consul/config/service-intentions/loki.hcl index 02d055d..a4e50fd 100644 --- a/example/consul/config/service-intentions/monitoring-loki.hcl +++ b/example/consul/config/service-intentions/loki.hcl @@ -1,5 +1,5 @@ Kind = "service-intentions" -Name = "monitoring-loki" +Name = "loki" Sources = [ { Name = "traefik" @@ -13,7 +13,7 @@ Sources = [ ] }, { - Name = "monitoring-grafana" + Name = "grafana" Permissions = [ { Action = "allow" @@ -32,7 +32,7 @@ Sources = [ ] }, { - Name = "monitoring-vector-aggregator" + Name = "vector-aggregator" Permissions = [ { Action = "allow" @@ -51,7 +51,7 @@ Sources = [ ] }, { - Name = "monitoring-vector-agent" + Name = "vector-agent" Permissions = [ { Action = "allow" diff --git a/example/consul/config/service-intentions/monitoring-prometheus.hcl b/example/consul/config/service-intentions/prometheus.hcl similarity index 89% rename from example/consul/config/service-intentions/monitoring-prometheus.hcl rename to example/consul/config/service-intentions/prometheus.hcl index 54c5e1f..a7e6f1d 100644 --- a/example/consul/config/service-intentions/monitoring-prometheus.hcl +++ b/example/consul/config/service-intentions/prometheus.hcl @@ -1,5 +1,5 @@ Kind = "service-intentions" -Name = "monitoring-prometheus" +Name = "prometheus" Sources = [ { Name = "traefik" @@ -13,7 +13,7 @@ Sources = [ ] }, { - Name = "monitoring-grafana" + Name = "grafana" Permissions = [ { # Deny access to the admin API from Grafana diff --git a/example/consul/policies/monitoring-prometheus.hcl b/example/consul/policies/monitoring.hcl similarity index 100% rename from example/consul/policies/monitoring-prometheus.hcl rename to example/consul/policies/monitoring.hcl diff --git a/example/monitoring-exporters.nomad.hcl b/example/exporters.nomad.hcl similarity index 93% rename from example/monitoring-exporters.nomad.hcl rename to example/exporters.nomad.hcl index d7752f6..79fe485 100644 --- a/example/monitoring-exporters.nomad.hcl +++ b/example/exporters.nomad.hcl @@ -18,7 +18,7 @@ job "monitoring-exporters" { } service { - name = "monitoring-ping-exporter" + name = "ping-exporter" port = "ping" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -27,7 +27,7 @@ job "monitoring-exporters" { } service { - name = "monitoring-blackbox-exporter" + name = "blackbox-exporter" port = "blackbox" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -35,7 +35,7 @@ job "monitoring-exporters" { } service { - name = "monitoring-consul-exporter" + name = "consul-exporter" port = "ping" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -44,7 +44,7 @@ job "monitoring-exporters" { } service { - name = "monitoring-cluster-exporter" + name = "cluster-exporter" port = "cluster" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -77,7 +77,7 @@ _EOT vault { - policies = ["monitoring-consul-exporter"] + policies = ["consul-exporter"] env = false disable_file = true change_mode = "noop" @@ -102,7 +102,7 @@ _EOT template { data = <<_EOT -CONSUL_HTTP_TOKEN={{ with secret "consul/creds/monitoring-consul-exporter" }}{{ .Data.token }}{{ end }} +CONSUL_HTTP_TOKEN={{ with secret "consul/creds/consul-exporter" }}{{ .Data.token }}{{ end }} _EOT destination = "secrets/.consul.env" uid = 100000 @@ -151,7 +151,7 @@ _EOT vault { - policies = ["monitoring-cluster-exporter", "metrics"] + policies = ["cluster-exporter", "metrics"] env = false disable_file = true change_mode = "noop" @@ -187,7 +187,7 @@ server { return 405; } - set $consul_token "{{ with secret "consul/creds/monitoring-cluster-exporter" }}{{ .Data.token }}{{ end }}"; + set $consul_token "{{ with secret "consul/creds/cluster-exporter" }}{{ .Data.token }}{{ end }}"; {{- range service "nomad-client" }} location /nomad-client/{{ .Node }} { @@ -365,7 +365,7 @@ _EOT # Get a Nomad client certificate template { data = <<_EOT -{{- with pkiCert "pki/nomad/issue/monitoring-cluster-exporter" "common_name=metrics-proxy.nomad.consul" "ttl=24h" }} +{{- with pkiCert "pki/nomad/issue/cluster-exporter" "common_name=metrics-proxy.nomad.consul" "ttl=24h" }} {{ .Data.Cert }} {{ .Data.Key }} {{- end }} @@ -389,7 +389,7 @@ _EOT # Same for Consul template { data = <<_EOT -{{- with pkiCert "pki/consul/issue/monitoring-cluster-exporter" "common_name=metrics-proxy.consul.consul" "ttl=24h" }} +{{- with pkiCert "pki/consul/issue/cluster-exporter" "common_name=metrics-proxy.consul.consul" "ttl=24h" }} {{ .Data.Cert }} {{ .Data.Key }} {{- end }} diff --git a/example/images/grafana/Dockerfile b/example/images/grafana/Dockerfile new file mode 100644 index 0000000..1522e27 --- /dev/null +++ b/example/images/grafana/Dockerfile @@ -0,0 +1,59 @@ +FROM danielberteaud/alpine:24.3-1 AS builder + +ARG GRAFANA_VERSION=10.4.1 \ + GRAFANA_PLUGINS=grafana-clock-panel,grafana-piechart-panel + +ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz /tmp +ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256 /tmp +RUN set -eux &&\ + apk --no-cache add \ + tar \ + curl \ + ca-certificates \ + bash \ + gcompat \ + libc6-compat \ + &&\ + ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\ + cd /tmp &&\ + echo "$(cat grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256) grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz" | sha256sum -c &&\ + tar xzf grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz &&\ + mv grafana-v${GRAFANA_VERSION} /opt/grafana &&\ + mkdir /opt/grafana/plugins &&\ + IFS=',' &&\ + for PLUGIN in ${GRAFANA_PLUGINS}; do /opt/grafana/bin/grafana cli --pluginsDir /opt/grafana/plugins plugins install ${PLUGIN}; done + +FROM danielberteaud/alpine:24.3-1 +MAINTAINER Daniel Berteaud + +ENV PATH=/opt/grafana/bin/:${PATH} \ + GF_PATHS_DATA=/data \ + GF_PATHS_PLUGINS=/opt/grafana/plugins \ + GF_LOG_MODE=console + +COPY --from=builder /opt/grafana /opt/grafana +RUN set -eux &&\ + apk --no-cache add \ + gcompat \ + libc6-compat \ + &&\ + ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\ + addgroup -g 3000 grafana &&\ + adduser --system \ + --ingroup grafana \ + --disabled-password \ + --uid 3000 \ + --home /opt/grafana \ + --no-create-home \ + --shell /sbin/nologin \ + grafana &&\ + mkdir /data &&\ + chown -R grafana:grafana /data /opt/grafana/plugins &&\ + chmod 700 /data + +WORKDIR /opt/grafana +USER grafana +CMD ["grafana", \ + "server", \ + "--homepath=/opt/grafana", \ + "--packaging=docker"] diff --git a/example/images/loki/Dockerfile b/example/images/loki/Dockerfile index 9f25ff0..6169a94 100644 --- a/example/images/loki/Dockerfile +++ b/example/images/loki/Dockerfile @@ -1,6 +1,6 @@ FROM danielberteaud/alpine:24.3-1 AS builder -ARG LOKI_VERSION=2.9.5 +ARG LOKI_VERSION=2.9.6 ADD https://github.com/grafana/loki/releases/download/v${LOKI_VERSION}/loki-linux-amd64.zip /tmp ADD https://github.com/grafana/loki/releases/download/v${LOKI_VERSION}/SHA256SUMS /tmp diff --git a/example/images/prometheus/Dockerfile b/example/images/prometheus/Dockerfile index 2b7ca51..4d08c1c 100644 --- a/example/images/prometheus/Dockerfile +++ b/example/images/prometheus/Dockerfile @@ -1,6 +1,6 @@ FROM danielberteaud/alpine:24.3-1 AS builder -ARG PROM_VERSION=2.50.1 +ARG PROM_VERSION=2.51.0 ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.linux-amd64.tar.gz /tmp ADD https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/sha256sums.txt /tmp diff --git a/example/init/consul b/example/init/consul index bc2250b..b85d77a 100755 --- a/example/init/consul +++ b/example/init/consul @@ -1,17 +1,17 @@ #!/bin/sh # vim: syntax=sh -vault write consul/roles/monitoring-prometheus \ +vault write consul/roles/prometheus \ ttl=720h \ max_ttl=720h \ - consul_policies="monitoring-prometheus" + consul_policies="monitoring" -vault write consul/roles/monitoring-consul-exporter \ +vault write consul/roles/consul-exporter \ ttl=720h \ max_ttl=720h \ - consul_policies="monitoring-prometheus" + consul_policies="monitoring" -vault write consul/roles/monitoring-cluster-exporter \ +vault write consul/roles/cluster-exporter \ ttl=720h \ max_ttl=720h \ - consul_policies="monitoring-prometheus" + consul_policies="monitoring" diff --git a/example/init/grafana-vault-database b/example/init/grafana-vault-database new file mode 100755 index 0000000..41ebf36 --- /dev/null +++ b/example/init/grafana-vault-database @@ -0,0 +1,12 @@ +#!/bin/sh + +set -euo pipefail + +vault write database/roles/grafana \ + db_name="postgres" \ + creation_statements="CREATE ROLE \"{{name}}\" WITH LOGIN PASSWORD '{{password}}' VALID UNTIL '{{expiration}}'; \ + GRANT \"grafana\" TO \"{{name}}\"; \ + ALTER ROLE \"{{name}}\" SET role = \"grafana\"" \ + default_ttl="12h" \ + max_ttl="720h" + diff --git a/example/init/pki b/example/init/pki index 30555d2..6333bc5 100755 --- a/example/init/pki +++ b/example/init/pki @@ -93,8 +93,8 @@ rm -rf ${TMP} # Create a role for alertmanager -vault write pki/monitoring/roles/monitoring-alertmanager \ - allowed_domains="monitoring" \ +vault write pki/monitoring/roles/alertmanager \ + allowed_domains="monitoring.consul" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -106,8 +106,8 @@ vault write pki/monitoring/roles/monitoring-alertmanager \ ou="Monitoring" # Create a role for prometheus (which will only be a client, for AlertManager) -vault write pki/monitoring/roles/monitoring-prometheus \ - allowed_domains="monitoring" \ +vault write pki/monitoring/roles/prometheus \ + allowed_domains="monitoring.consul" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -119,8 +119,8 @@ vault write pki/monitoring/roles/monitoring-prometheus \ ou="Monitoring" # Create a role for loki (which will only be a client, for AlertManager) -vault write pki/monitoring/roles/monitoring-loki \ - allowed_domains="monitoring" \ +vault write pki/monitoring/roles/loki \ + allowed_domains="monitoring.consul" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -133,7 +133,7 @@ vault write pki/monitoring/roles/monitoring-loki \ # Create a role for metrics exporters (server only) vault write pki/monitoring/roles/metrics \ - allowed_domains="monitoring" \ + allowed_domains="monitoring.consul" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -147,7 +147,7 @@ vault write pki/monitoring/roles/metrics \ ou="Monitoring" # Create a role on the Nomad PKI for the cluster exporter -vault write pki/nomad/roles/monitoring-cluster-exporter \ +vault write pki/nomad/roles/cluster-exporter \ allowed_domains='nomad.consul' \ allow_subdomains=true \ allow_wildcard_certificates=false \ @@ -158,7 +158,7 @@ vault write pki/nomad/roles/monitoring-cluster-exporter \ ou="Cluster metrics exporter" # Create a role on the Consul PKI for the cluster exporter -vault write pki/consul/roles/monitoring-cluster-exporter \ +vault write pki/consul/roles/cluster-exporter \ allowed_domains="consul.consul" \ allow_bare_domains=false \ allow_subdomains=true \ diff --git a/example/prep.d/10-montoring-rand-secrets b/example/prep.d/10-montoring-rand-secrets new file mode 100755 index 0000000..c8e3b01 --- /dev/null +++ b/example/prep.d/10-montoring-rand-secrets @@ -0,0 +1,22 @@ +#!/bin/sh + +set -euo pipefail + +# vim: syntax=sh + +export LC_ALL=C +VAULT_KV_PATH=kv/service/monitoring/grafana +RAND_CMD="tr -dc A-Za-z0-9\-_\/=~\.+ < /dev/urandom | head -c 50" +if ! vault kv list $(dirname ${VAULT_KV_PATH}) 2>/dev/null | grep -q -E "^$(basename ${VAULT_KV_PATH})\$"; then + vault kv put ${VAULT_KV_PATH} \ + secret_key="$(sh -c "${RAND_CMD}")" \ + +fi +for SECRET in secret_key; do + if ! vault kv get -field ${SECRET} ${VAULT_KV_PATH} >/dev/null 2>&1; then + vault kv patch ${VAULT_KV_PATH} \ + ${SECRET}=$(sh -c "${RAND_CMD}") + fi +done + + diff --git a/example/monitoring-services.nomad.hcl b/example/services.nomad.hcl similarity index 86% rename from example/monitoring-services.nomad.hcl rename to example/services.nomad.hcl index cfadd6b..bea656b 100644 --- a/example/monitoring-services.nomad.hcl +++ b/example/services.nomad.hcl @@ -18,7 +18,7 @@ job "monitoring-services" { volume "data" { - source = "monitoring-prometheus-data" + source = "prometheus-data" type = "csi" access_mode = "single-node-writer" attachment_mode = "file-system" @@ -27,7 +27,7 @@ job "monitoring-services" { service { - name = "monitoring-prometheus" + name = "prometheus" port = 9090 meta { @@ -173,7 +173,7 @@ _EOT leader = true config { - image = "danielberteaud/prometheus:2.50.1-1" + image = "danielberteaud/prometheus:2.51.0-1" readonly_rootfs = true pids_limit = 200 command = "prometheus" @@ -192,7 +192,7 @@ _EOT vault { - policies = ["monitoring-prometheus"] + policies = ["prometheus"] env = false disable_file = true change_mode = "noop" @@ -225,13 +225,13 @@ alerting: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }} datacenter: dc1 relabel_configs: # Only keep alertmanagers - source_labels: [__meta_consul_service] action: keep - regex: monitoring-alertmanager-tls + regex: alertmanager-tls scrape_configs: @@ -245,7 +245,7 @@ scrape_configs: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }} datacenter: dc1 relabel_configs: @@ -262,7 +262,7 @@ scrape_configs: - source_labels: [__meta_consul_service] regex: (.+) - replacement: {{ range $idx, $instance := service "monitoring-cluster-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }} + replacement: {{ range $idx, $instance := service "cluster-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }} target_label: __address__ # Rewrite the job labels to the name of the service @@ -288,7 +288,7 @@ scrape_configs: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }} datacenter: dc1 relabel_configs: @@ -348,7 +348,7 @@ scrape_configs: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "consul/creds/monitoring-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus" }}{{ .Data.token }}{{ end }} datacenter: dc1 relabel_configs: @@ -884,8 +884,8 @@ _EOT # A client cert, to connect to the AlertManager API template { data = <<_EOT -{{- with pkiCert "pki/monitoring/issue/monitoring-prometheus" - (printf "common_name=prometheus-%s.monitoring" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "pki/monitoring/issue/prometheus" + (printf "common_name=prometheus-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}} {{ .Cert }} {{ .Key }} @@ -940,7 +940,7 @@ _EOT volume "data" { - source = "monitoring-alertmanager-data" + source = "alertmanager-data" type = "csi" access_mode = "single-node-writer" attachment_mode = "file-system" @@ -950,7 +950,7 @@ _EOT # This service is used for the different instances of alertmanager to communicate service { - name = "monitoring-alertmanager-gossip" + name = "alertmanager-gossip" port = "cluster" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -960,7 +960,7 @@ _EOT # This service is used by prometheus. As it needs to be able to reach every instances, it cannot use # the service mesh. The exposed port uses mTLS, so it's safe to expose it outside of the mesh service { - name = "monitoring-alertmanager-tls" + name = "alertmanager-tls" port = "web-tls" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -970,7 +970,7 @@ _EOT # This service is exposed through the service mesh # and can be used to reach the web interface through Traefik service { - name = "monitoring-alertmanager" + name = "alertmanager" port = 9093 meta { metrics-port = "${NOMAD_HOST_PORT_metrics}" @@ -1111,7 +1111,7 @@ _EOT # This task will handle mTLS to the AlertManager API # And expose it as plain http on 127.0.0.1 for Traefik (through the service mesh) and for the metrics proxy - task "tls-proxy" { + task "untls-proxy" { driver = "docker" user = 9093 @@ -1135,7 +1135,7 @@ _EOT vault { - policies = ["metrics", "monitoring-alertmanager"] + policies = ["metrics", "alertmanager"] env = false disable_file = true change_mode = "noop" @@ -1156,7 +1156,7 @@ server { proxy_ssl_certificate /secrets/alertmanager.bundle.pem; proxy_ssl_certificate_key /secrets/alertmanager.bundle.pem; proxy_ssl_verify on; - proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.monitoring; + proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.monitoring.consul; proxy_ssl_trusted_certificate /local/monitoring.ca.pem; allow 127.0.0.1; deny all; @@ -1170,8 +1170,8 @@ _EOT # Certifiate used by AlertManager template { data = <<_EOT -{{- with pkiCert "pki/monitoring/issue/monitoring-alertmanager" - (printf "common_name=alertmanager-%s.monitoring" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "pki/monitoring/issue/alertmanager" + (printf "common_name=alertmanager-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX")) (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }} {{ .Cert }} @@ -1214,7 +1214,7 @@ _EOT vault { - policies = ["metrics", "monitoring-alertmanager"] + policies = ["metrics", "alertmanager"] env = false disable_file = true change_mode = "noop" @@ -1288,7 +1288,7 @@ exec alertmanager \ --web.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_web-tls" }} \ --cluster.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_cluster" }} \ --cluster.advertise-address={{ env "NOMAD_HOST_ADDR_cluster" }} \ -{{- range service "monitoring-am-gossip" -}} +{{- range service "alertmanager-gossip" -}} {{- if not (eq (env "NOMAD_ALLOC_INDEX") (index .ServiceMeta "alloc")) }} --cluster.peer={{ .Address }}:{{ .Port }} \ {{ end -}} @@ -1306,8 +1306,8 @@ _EOT # Certifiate used by AlertManager template { data = <<_EOT -{{- with pkiCert "pki/monitoring/issue/monitoring-alertmanager" - (printf "common_name=alertmanager-%s.monitoring" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "pki/monitoring/issue/alertmanager" + (printf "common_name=alertmanager-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX")) (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }} {{ .Cert }} @@ -1356,7 +1356,7 @@ _EOT volume "data" { - source = "monitoring-loki-data" + source = "loki-data" type = "csi" access_mode = "single-node-writer" attachment_mode = "file-system" @@ -1364,7 +1364,7 @@ _EOT service { - name = "monitoring-loki" + name = "loki" port = 3100 meta { metrics-port = "${NOMAD_HOST_PORT_metrics}" @@ -1507,14 +1507,14 @@ _EOT driver = "docker" config { - image = "danielberteaud/loki:2.9.5-1" + image = "danielberteaud/loki:2.9.6-1" command = "loki" args = ["--config.file=/local/loki.yml"] } vault { - policies = ["monitoring-loki"] + policies = ["loki"] env = false disable_file = true change_mode = "noop" @@ -1570,7 +1570,7 @@ ruler: tls_cert_path: /secrets/loki.bundle.pem tls_key_path: /secrets/loki.bundle.pem tls_server_name: alertmanager.monitoring - alertmanager_url: monitoring-alertmanager-tls + alertmanager_url: alertmanager-tls enable_alertmanager_discovery: true enable_alertmanager_v2: true enable_api: true @@ -1609,8 +1609,8 @@ _EOT # A client cert, to connect to the AlertManager API template { data = <<_EOT -{{- with pkiCert "pki/monitoring/issue/monitoring-loki" - (printf "common_name=loki-%s.monitoring" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "pki/monitoring/issue/loki" + (printf "common_name=loki-%s.monitoring.consul" (env "NOMAD_ALLOC_INDEX")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}} {{ .Cert }} {{ .Key }} @@ -1666,7 +1666,7 @@ _EOT # The main service is the vector source # It will provide access to other services through the mesh (like loki) service { - name = "monitoring-vector-aggregator" + name = "vector-aggregator" port = 9000 meta { metrics-port = "${NOMAD_HOST_PORT_metrics}" @@ -1678,7 +1678,7 @@ _EOT sidecar_service { proxy { upstreams { - destination_name = "monitoring-loki" + destination_name = "loki" local_bind_port = 3100 # Work arround, see https://github.com/hashicorp/nomad/issues/18538 destination_type = "service" @@ -1909,4 +1909,300 @@ _EOT } } + + group "interface" { + + shutdown_delay = "6s" + + network { + mode = "bridge" + port "metrics" {} + } + + + volume "data" { + source = "grafana-data" + type = "csi" + access_mode = "single-node-writer" + attachment_mode = "file-system" + } + + + service { + name = "grafana" + port = 3000 + + + meta { + metrics-port = "${NOMAD_HOST_PORT_metrics}" + alloc = "${NOMAD_ALLOC_INDEX}" + } + + connect { + sidecar_service { + proxy { + upstreams { + destination_name = "postgres" + local_bind_port = 5432 + # Work arround, see https://github.com/hashicorp/nomad/issues/18538 + destination_type = "service" + } + } + } + sidecar_task { + config { + args = [ + "-c", + "${NOMAD_SECRETS_DIR}/envoy_bootstrap.json", + "-l", + "${meta.connect.log_level}", + "--concurrency", + "${meta.connect.proxy_concurrency}", + "--disable-hot-restart" + ] + } + + resources { + cpu = 50 + memory = 64 + } + + } + } + + + check { + name = "health" + type = "http" + path = "/api/health" + expose = true + interval = "30s" + timeout = "8s" + } + + tags = [ + + "traefik.enable=true", + "traefik.http.routers.monitoring-grafana.entrypoints=https", + "traefik.http.routers.monitoring-grafana.rule=Host(`grafana.example.org`)", + "traefik.http.middlewares.csp-monitoring-grafana.headers.contentsecuritypolicy=default-src 'self';font-src 'self' data:;img-src 'self' data:;script-src 'self' 'unsafe-inline' 'unsafe-eval';style-src 'self' 'unsafe-inline';", + "traefik.http.routers.monitoring-grafana.middlewares=security-headers@file,rate-limit-std@file,forward-proto@file,inflight-std@file,hsts@file,compression@file,csp-monitoring-grafana", + + ] + } + + + # The prometheus metrics proxy, adding mTLS to the metrics endpoint + task "metrics-proxy" { + driver = "docker" + user = 8995 + + config { + image = "nginxinc/nginx-unprivileged:alpine" + force_pull = true + volumes = [ + "local/default.conf:/etc/nginx/conf.d/default.conf:ro" + ] + pids_limit = 100 + } + + lifecycle { + hook = "poststart" + sidecar = true + } + + vault { + policies = ["metrics"] + } + + template { + data = <<_EOT +{{- with pkiCert "pki/monitoring/issue/metrics" (printf "ip_sans=%s" (env "NOMAD_HOST_IP_metrics")) }} +{{ .Cert }} +{{ .Key }}{{ end -}} +_EOT + destination = "secrets/metrics.bundle.pem" + } + + template { + data = <<_EOT +{{ with secret "pki/monitoring/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }} +_EOT + destination = "local/monitoring.ca.pem" + } + + template { + data = <<_EOT +server { + listen {{ env "NOMAD_ALLOC_PORT_metrics" }} ssl; + http2 on; + + ssl_certificate /secrets/metrics.bundle.pem; + ssl_certificate_key /secrets/metrics.bundle.pem; + ssl_client_certificate /local/monitoring.ca.pem; + ssl_verify_client on; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 1h; + ssl_session_tickets off; + gzip on; + gzip_types + text/plain; + gzip_vary on; + + server_tokens off; + + if ($request_method !~ ^(GET|HEAD)$ ) { + return 405; + } + location /metrics { + proxy_pass http://localhost:3000/metrics; + } +} +_EOT + destination = "local/default.conf" + } + + resources { + cpu = 10 + memory = 10 + memory_max = 20 + } + } + + + + + # Local memcached instance + task "memcached" { + driver = "docker" + user = 11211 + + lifecycle { + hook = "prestart" + sidecar = true + } + + config { + image = "memcached:alpine" + readonly_rootfs = true + force_pull = true + entrypoint = ["/local/memcached"] + } + + template { + data = <<_EOT +#!/bin/sh + +set -eu +exec memcached -l 127.0.0.1 -p 11211 -m {{ env "NOMAD_MEMORY_LIMIT" | parseInt | subtract 5 }} +_EOT + destination = "local/memcached" + perms = 755 + } + + resources { + cpu = 10 + memory = 20 + } + } + + + + task "grafana" { + + driver = "docker" + leader = true + + config { + image = "danielberteaud/grafana:10.4.1-1" + readonly_rootfs = true + pids_limit = 100 + command = "grafana" + args = [ + "server", + "--homepath=/opt/grafana", + "--config=/secrets/grafana.ini", + "--packaging=docker" + ] + } + + + vault { + policies = ["grafana"] + env = false + disable_file = true + change_mode = "noop" + } + + + + # Use a template block instead of env {} so we can fetch values from vault + template { + data = <<_EOT +LANG=fr_FR.utf8 +TZ=Europe/Paris +_EOT + destination = "secrets/.env" + perms = 400 + env = true + } + + + # Basic grafana configuration file + template { + data = <<_EOT +[server] +http_addr = 127.0.0.1 +http_port = 3000 +root_url = https://grafana.example.org +serve_from_sub_path = false + +[database] +type = postgres +name = grafana +host = 127.0.0.1:5432 +user = {{ with secret "database/creds/grafana" }}{{ .Data.username }}{{ end }} +password = {{ with secret "database/creds/grafana" }}{{ .Data.password }}{{ end }} + + +[remote_cache] +type = memcached +connstr = 127.0.0.1:11211 + +[analytics] +reporting_enabled = false +check_for_updates = false +check_for_plugin_updates = false + +[security] +cookie_secure = true +cookie_samesite = strict +x_xss_protection = true +secret_key = {{ with secret "kv/service/monitoring/grafana" }}{{ .Data.data.secret_key }}{{ end }} + +[dataproxy] +timeout = 120 + +_EOT + destination = "secrets/grafana.ini" + uid = 103000 + perms = 400 + } + + # Mount volume in /data for persistence + volume_mount { + volume = "data" + destination = "/data" + } + + + resources { + cpu = 100 + memory = 256 + } + + } + } } diff --git a/example/vault/policies/monitoring-alertmanager.hcl b/example/vault/policies/alertmanager.hcl similarity index 65% rename from example/vault/policies/monitoring-alertmanager.hcl rename to example/vault/policies/alertmanager.hcl index 9d1e050..2f37367 100644 --- a/example/vault/policies/monitoring-alertmanager.hcl +++ b/example/vault/policies/alertmanager.hcl @@ -1,5 +1,5 @@ -path "pki/monitoring/issue/monitoring-alertmanager" { +path "pki/monitoring/issue/alertmanager" { capabilities = ["update"] } diff --git a/example/vault/policies/monitoring-cluster-exporter.hcl b/example/vault/policies/cluster-exporter.hcl similarity index 60% rename from example/vault/policies/monitoring-cluster-exporter.hcl rename to example/vault/policies/cluster-exporter.hcl index 9b98d67..052d66e 100644 --- a/example/vault/policies/monitoring-cluster-exporter.hcl +++ b/example/vault/policies/cluster-exporter.hcl @@ -1,20 +1,19 @@ - # Read vault metrics path "sys/metrics" { capabilities = ["read", "list"] } # Get a cert for Nomad -path "pki/nomad/issue/monitoring-cluster-exporter" { +path "pki/nomad/issue/cluster-exporter" { capabilities = ["update"] } # Get a cert for Consul -path "pki/consul/issue/monitoring-cluster-exporter" { +path "pki/consul/issue/cluster-exporter" { capabilities = ["update"] } # Get a consul token -path "consul/creds/monitoring-cluster-exporter" { +path "consul/creds/cluster-exporter" { capabilities = ["read"] } diff --git a/example/vault/policies/consul-exporter.hcl b/example/vault/policies/consul-exporter.hcl new file mode 100644 index 0000000..f01d53b --- /dev/null +++ b/example/vault/policies/consul-exporter.hcl @@ -0,0 +1,3 @@ +path "consul/creds/consul-exporter" { + capabilities = ["read"] +} diff --git a/example/vault/policies/grafana.hcl b/example/vault/policies/grafana.hcl new file mode 100644 index 0000000..b2dbe86 --- /dev/null +++ b/example/vault/policies/grafana.hcl @@ -0,0 +1,7 @@ +path "database/creds/grafana" { + capabilities = ["read"] +} + +path "kv/data/service/monitoring/grafana" { + capabilities = ["read"] +} diff --git a/example/vault/policies/monitoring-loki.hcl b/example/vault/policies/loki.hcl similarity index 67% rename from example/vault/policies/monitoring-loki.hcl rename to example/vault/policies/loki.hcl index 9c42dca..c0ff099 100644 --- a/example/vault/policies/monitoring-loki.hcl +++ b/example/vault/policies/loki.hcl @@ -1,5 +1,5 @@ -path "pki/monitoring/issue/monitoring-loki" { +path "pki/monitoring/issue/loki" { capabilities = ["update"] } diff --git a/example/vault/policies/monitoring-consul-exporter.hcl b/example/vault/policies/monitoring-consul-exporter.hcl deleted file mode 100644 index aeadd60..0000000 --- a/example/vault/policies/monitoring-consul-exporter.hcl +++ /dev/null @@ -1,4 +0,0 @@ - -path "consul/creds/monitoring-consul-exporter" { - capabilities = ["read"] -} diff --git a/example/vault/policies/monitoring-prometheus.hcl b/example/vault/policies/prometheus.hcl similarity index 57% rename from example/vault/policies/monitoring-prometheus.hcl rename to example/vault/policies/prometheus.hcl index 30e594f..b9ad796 100644 --- a/example/vault/policies/monitoring-prometheus.hcl +++ b/example/vault/policies/prometheus.hcl @@ -1,5 +1,5 @@ -path "pki/monitoring/issue/monitoring-prometheus" { +path "pki/monitoring/issue/prometheus" { capabilities = ["update"] } @@ -7,6 +7,6 @@ path "kv/service/monitoring/prometheus" { capabilities = ["read"] } -path "consul/creds/monitoring-prometheus" { +path "consul/creds/prometheus" { capabilities = ["read"] } diff --git a/monitoring-exporters.nomad.hcl b/exporters.nomad.hcl similarity index 90% rename from monitoring-exporters.nomad.hcl rename to exporters.nomad.hcl index 02564ca..38c2e7c 100644 --- a/monitoring-exporters.nomad.hcl +++ b/exporters.nomad.hcl @@ -17,7 +17,7 @@ job "[[ .instance ]]-exporters" { } service { - name = "[[ .instance ]]-ping-exporter[[ .consul.suffix ]]" + name = "ping-exporter[[ .consul.suffix ]]" port = "ping" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -26,7 +26,7 @@ job "[[ .instance ]]-exporters" { } service { - name = "[[ .instance ]]-blackbox-exporter[[ .consul.suffix ]]" + name = "blackbox-exporter[[ .consul.suffix ]]" port = "blackbox" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -34,7 +34,7 @@ job "[[ .instance ]]-exporters" { } service { - name = "[[ .instance ]]-consul-exporter[[ .consul.suffix ]]" + name = "consul-exporter[[ .consul.suffix ]]" port = "ping" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -43,7 +43,7 @@ job "[[ .instance ]]-exporters" { } service { - name = "[[ .instance ]]-cluster-exporter[[ .consul.suffix ]]" + name = "cluster-exporter[[ .consul.suffix ]]" port = "cluster" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -129,7 +129,7 @@ _EOT template { data = <<_EOT -CONSUL_HTTP_TOKEN={{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-consul-exporter" }}{{ .Data.token }}{{ end }} +CONSUL_HTTP_TOKEN={{ with secret "consul/creds/consul-exporter[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }} _EOT destination = "secrets/.consul.env" uid = 100000 @@ -204,7 +204,7 @@ _EOT # Get a Nomad client certificate template { data = <<_EOT -{{- with pkiCert "pki/nomad/issue/[[ .instance ]]-cluster-exporter" "common_name=metrics-proxy.nomad.[[ .consul.domain ]]" "ttl=24h" }} +{{- with pkiCert "pki/nomad/issue/cluster-exporter[[ .consul.suffix ]]" "common_name=metrics-proxy.nomad.[[ .consul.domain ]]" "ttl=24h" }} {{ .Data.Cert }} {{ .Data.Key }} {{- end }} @@ -228,7 +228,7 @@ _EOT # Same for Consul template { data = <<_EOT -{{- with pkiCert "pki/consul/issue/[[ .instance ]]-cluster-exporter" "common_name=metrics-proxy.consul.[[ .consul.domain ]]" "ttl=24h" }} +{{- with pkiCert "pki/consul/issue/cluster-exporter[[ .consul.suffix ]]" "common_name=metrics-proxy.consul.[[ .consul.domain ]]" "ttl=24h" }} {{ .Data.Cert }} {{ .Data.Key }} {{- end }} diff --git a/images/grafana/Dockerfile b/images/grafana/Dockerfile new file mode 100644 index 0000000..db96243 --- /dev/null +++ b/images/grafana/Dockerfile @@ -0,0 +1,59 @@ +FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] AS builder + +ARG GRAFANA_VERSION=[[ .monitoring.grafana.version ]] \ + GRAFANA_PLUGINS=[[ join .monitoring.grafana.plugins "," ]] + +ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz /tmp +ADD https://dl.grafana.com/oss/release/grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256 /tmp +RUN set -eux &&\ + apk --no-cache add \ + tar \ + curl \ + ca-certificates \ + bash \ + gcompat \ + libc6-compat \ + &&\ + ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\ + cd /tmp &&\ + echo "$(cat grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz.sha256) grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz" | sha256sum -c &&\ + tar xzf grafana-${GRAFANA_VERSION}.linux-amd64.tar.gz &&\ + mv grafana-v${GRAFANA_VERSION} /opt/grafana &&\ + mkdir /opt/grafana/plugins &&\ + IFS=',' &&\ + for PLUGIN in ${GRAFANA_PLUGINS}; do /opt/grafana/bin/grafana cli --pluginsDir /opt/grafana/plugins plugins install ${PLUGIN}; done + +FROM [[ .docker.repo ]][[ .docker.base_images.alpine.image ]] +MAINTAINER [[ .docker.maintainer ]] + +ENV PATH=/opt/grafana/bin/:${PATH} \ + GF_PATHS_DATA=/data \ + GF_PATHS_PLUGINS=/opt/grafana/plugins \ + GF_LOG_MODE=console + +COPY --from=builder /opt/grafana /opt/grafana +RUN set -eux &&\ + apk --no-cache add \ + gcompat \ + libc6-compat \ + &&\ + ln -s /lib/libc.so.6 /usr/lib/libresolv.so.2 &&\ + addgroup -g 3000 grafana &&\ + adduser --system \ + --ingroup grafana \ + --disabled-password \ + --uid 3000 \ + --home /opt/grafana \ + --no-create-home \ + --shell /sbin/nologin \ + grafana &&\ + mkdir /data &&\ + chown -R grafana:grafana /data /opt/grafana/plugins &&\ + chmod 700 /data + +WORKDIR /opt/grafana +USER grafana +CMD ["grafana", \ + "server", \ + "--homepath=/opt/grafana", \ + "--packaging=docker"] diff --git a/init/consul b/init/consul index f7ecc70..7acc0b7 100755 --- a/init/consul +++ b/init/consul @@ -1,17 +1,17 @@ #!/bin/sh # vim: syntax=sh -vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-prometheus \ +vault write consul/roles/prometheus[[ .consul.suffix ]] \ ttl=720h \ max_ttl=720h \ - consul_policies="[[ .instance ]]-prometheus" + consul_policies="[[ .instance ]]" -vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-consul-exporter \ +vault write consul/roles/consul-exporter[[ .consul.suffix ]] \ ttl=720h \ max_ttl=720h \ - consul_policies="[[ .instance ]]-prometheus" + consul_policies="[[ .instance ]]" -vault write [[ .vault.root ]]consul/roles/[[ .instance ]]-cluster-exporter \ +vault write consul/roles/cluster-exporter \ ttl=720h \ max_ttl=720h \ - consul_policies="[[ .instance ]]-prometheus" + consul_policies="[[ .instance ]]" diff --git a/init/grafana-vault-database b/init/grafana-vault-database new file mode 100755 index 0000000..89ca986 --- /dev/null +++ b/init/grafana-vault-database @@ -0,0 +1,5 @@ +#!/bin/sh + +set -euo pipefail + +[[ template "common/vault.mkpgrole.sh" merge .monitoring.grafana . ]] diff --git a/init/pki b/init/pki index 6b8ac44..7a89073 100755 --- a/init/pki +++ b/init/pki @@ -6,8 +6,8 @@ set -euo pipefail [[ template "common/vault.mkpki.sh" $c ]] # Create a role for alertmanager -vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-alertmanager \ - allowed_domains="[[ .instance ]]" \ +vault write [[ $c.vault.pki.path ]]/roles/alertmanager[[ .consul.suffix ]] \ + allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -19,8 +19,8 @@ vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-alertmanager \ ou="[[ $c.vault.pki.ou ]]" # Create a role for prometheus (which will only be a client, for AlertManager) -vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-prometheus \ - allowed_domains="[[ .instance ]]" \ +vault write [[ $c.vault.pki.path ]]/roles/prometheus[[ .consul.suffix ]] \ + allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -32,8 +32,8 @@ vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-prometheus \ ou="[[ $c.vault.pki.ou ]]" # Create a role for loki (which will only be a client, for AlertManager) -vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-loki \ - allowed_domains="[[ .instance ]]" \ +vault write [[ $c.vault.pki.path ]]/roles/loki[[ .consul.suffix ]] \ + allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -45,8 +45,8 @@ vault write [[ $c.vault.pki.path ]]/roles/[[ .instance ]]-loki \ ou="[[ $c.vault.pki.ou ]]" # Create a role for metrics exporters (server only) -vault write [[ $c.vault.pki.path ]]/roles/metrics \ - allowed_domains="[[ .instance ]]" \ +vault write [[ $c.vault.pki.path ]]/roles/metrics[[ .consul.suffix ]] \ + allowed_domains="[[ .instance ]].[[ .consul.domain ]]" \ allow_bare_domains=false \ allow_subdomains=true \ allow_localhost=false \ @@ -60,7 +60,7 @@ vault write [[ $c.vault.pki.path ]]/roles/metrics \ ou="[[ $c.vault.pki.ou ]]" # Create a role on the Nomad PKI for the cluster exporter -vault write pki/nomad/roles/[[ .instance ]]-cluster-exporter \ +vault write pki/nomad/roles/cluster-exporter[[ .consul.suffix ]] \ allowed_domains='nomad.[[ .consul.domain ]]' \ allow_subdomains=true \ allow_wildcard_certificates=false \ @@ -71,7 +71,7 @@ vault write pki/nomad/roles/[[ .instance ]]-cluster-exporter \ ou="Cluster metrics exporter" # Create a role on the Consul PKI for the cluster exporter -vault write pki/consul/roles/[[ .instance ]]-cluster-exporter \ +vault write pki/consul/roles/cluster-exporter[[ .consul.suffix ]] \ allowed_domains="consul.[[ .consul.domain ]]" \ allow_bare_domains=false \ allow_subdomains=true \ diff --git a/prep.d/10-montoring-rand-secrets b/prep.d/10-montoring-rand-secrets new file mode 100755 index 0000000..1490edb --- /dev/null +++ b/prep.d/10-montoring-rand-secrets @@ -0,0 +1,5 @@ +#!/bin/sh + +set -euo pipefail + +[[ template "common/vault.rand_secrets" merge .monitoring . ]] diff --git a/monitoring-services.nomad.hcl b/services.nomad.hcl similarity index 83% rename from monitoring-services.nomad.hcl rename to services.nomad.hcl index cfd0100..a17ba3a 100644 --- a/monitoring-services.nomad.hcl +++ b/services.nomad.hcl @@ -17,7 +17,7 @@ job "[[ .instance ]]-services" { [[ template "common/volumes" $c ]] service { - name = "[[ .instance ]]-prometheus[[ .consul.suffix ]]" + name = "prometheus[[ .consul.suffix ]]" port = 9090 [[ template "common/service_meta" $c ]] @@ -122,8 +122,8 @@ _EOT # A client cert, to connect to the AlertManager API template { data = <<_EOT -{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-prometheus" - (printf "common_name=prometheus-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/prometheus" + (printf "common_name=prometheus-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}} {{ .Cert }} {{ .Key }} @@ -177,7 +177,7 @@ _EOT # This service is used for the different instances of alertmanager to communicate service { - name = "[[ .instance ]]-alertmanager-gossip[[ .consul.suffix ]]" + name = "alertmanager-gossip[[ .consul.suffix ]]" port = "cluster" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -187,7 +187,7 @@ _EOT # This service is used by prometheus. As it needs to be able to reach every instances, it cannot use # the service mesh. The exposed port uses mTLS, so it's safe to expose it outside of the mesh service { - name = "[[ .instance ]]-alertmanager-tls[[ .consul.suffix ]]" + name = "alertmanager-tls[[ .consul.suffix ]]" port = "web-tls" meta { alloc = "${NOMAD_ALLOC_INDEX}" @@ -197,7 +197,7 @@ _EOT # This service is exposed through the service mesh # and can be used to reach the web interface through Traefik service { - name = "[[ .instance ]]-alertmanager[[ .consul.suffix ]]" + name = "alertmanager[[ .consul.suffix ]]" port = 9093 [[ template "common/service_meta" $c ]] [[ template "common/connect" $c ]] @@ -224,7 +224,7 @@ _EOT # This task will handle mTLS to the AlertManager API # And expose it as plain http on 127.0.0.1 for Traefik (through the service mesh) and for the metrics proxy - task "tls-proxy" { + task "untls-proxy" { driver = "[[ $c.nomad.driver ]]" user = 9093 @@ -256,8 +256,8 @@ _EOT # Certifiate used by AlertManager template { data = <<_EOT -{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager" - (printf "common_name=alertmanager-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/alertmanager" + (printf "common_name=alertmanager-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX")) (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }} {{ .Cert }} @@ -342,8 +342,8 @@ _EOT # Certifiate used by AlertManager template { data = <<_EOT -{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager" - (printf "common_name=alertmanager-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/alertmanager" + (printf "common_name=alertmanager-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX")) (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) }} {{ .Cert }} @@ -389,7 +389,7 @@ _EOT [[ template "common/volumes" $c ]] service { - name = "[[ .instance ]]-loki[[ .consul.suffix ]]" + name = "loki[[ .consul.suffix ]]" port = 3100 [[ template "common/service_meta" $c ]] [[ template "common/connect" $c ]] @@ -443,8 +443,8 @@ _EOT # A client cert, to connect to the AlertManager API template { data = <<_EOT -{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-loki" - (printf "common_name=loki-%s.[[ .instance ]]" (env "NOMAD_ALLOC_INDEX")) +{{- with pkiCert "[[ $c.vault.pki.path ]]/issue/loki" + (printf "common_name=loki-%s.[[ .instance ]].[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX")) (printf "ttl=%dh" (env "NOMAD_ALLOC_INDEX" | parseInt | multiply 24 | add 72)) -}} {{ .Cert }} {{ .Key }} @@ -496,7 +496,7 @@ _EOT # The main service is the vector source # It will provide access to other services through the mesh (like loki) service { - name = "[[ .instance ]]-vector-aggregator[[ .consul.suffix ]]" + name = "vector-aggregator[[ .consul.suffix ]]" port = 9000 [[ template "common/service_meta" $c ]] [[ template "common/connect" $c ]] @@ -509,7 +509,7 @@ _EOT # The syslog UDP service can be used to ingest standard syslog logs from other # devices, and can be exposed by Traefik for this service { - name = "[[ .instance ]]-syslog-udp[[ .consul.suffix ]]" + name = "syslog-udp[[ .consul.suffix ]]" port = "syslog-udp" tags = [ [[ template "common/traefik_tags" merge $c.syslog_udp $c ]] @@ -522,7 +522,7 @@ _EOT [[- if $c.fluentd.enabled ]] # The fluentd service can be used to ingest fluentd logs service { - name = "[[ .instance ]]-syslog-udp[[ .consul.suffix ]]" + name = "syslog-udp[[ .consul.suffix ]]" port = 24224 tags = [ [[ template "common/traefik_tags" merge $c.fluentd $c ]] @@ -555,6 +555,84 @@ _EOT change_signal = "SIGHUP" } +[[ template "common/resources" $c ]] + } + } + + group "interface" { +[[- $c := merge .monitoring.grafana .monitoring . ]] + + shutdown_delay = "6s" + + network { + mode = "bridge" + port "metrics" {} + } + +[[ template "common/volumes" $c ]] + + service { + name = "grafana[[ .consul.suffix ]]" + port = 3000 + +[[ template "common/metrics_meta" $c ]] +[[ template "common/connect" $c ]] + + check { + name = "health" + type = "http" + path = "/api/health" + expose = true + interval = "30s" + timeout = "8s" + } + + tags = [ +[[ template "common/traefik_tags" $c ]] + ] + } + +[[ template "common/task.metrics_proxy" $c ]] +[[ template "common/task.pgpooler" $c ]] +[[ template "common/task.memcached" ]] + + task "grafana" { + + driver = "[[ $c.nomad.driver ]]" + leader = true + + config { + image = "[[ $c.image ]]" + readonly_rootfs = true + pids_limit = 100 + command = "grafana" + args = [ + "server", + "--homepath=/opt/grafana", + "--config=/secrets/grafana.ini", + "--packaging=docker" + ] + } + +[[ template "common/vault.policies" $c ]] +[[ template "common/file_env" $c ]] + + # Basic grafana configuration file + template { + data = <<_EOT +[[ template "monitoring/grafana/grafana.ini" $c ]] +_EOT + destination = "secrets/grafana.ini" + uid = 103000 + perms = 400 + } + + # Mount volume in /data for persistence + volume_mount { + volume = "data" + destination = "/data" + } + [[ template "common/resources" $c ]] } } diff --git a/templates/alertmanager/nginx.conf b/templates/alertmanager/nginx.conf index 493e743..a27d027 100644 --- a/templates/alertmanager/nginx.conf +++ b/templates/alertmanager/nginx.conf @@ -5,7 +5,7 @@ server { proxy_ssl_certificate /secrets/alertmanager.bundle.pem; proxy_ssl_certificate_key /secrets/alertmanager.bundle.pem; proxy_ssl_verify on; - proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.monitoring; + proxy_ssl_name alertmanager-{{ env "NOMAD_ALLOC_INDEX" }}.[[ .instance ]].[[ .consul.domain ]]; proxy_ssl_trusted_certificate /local/monitoring.ca.pem; allow 127.0.0.1; deny all; diff --git a/templates/alertmanager/start.sh b/templates/alertmanager/start.sh index 88345bc..057641d 100644 --- a/templates/alertmanager/start.sh +++ b/templates/alertmanager/start.sh @@ -10,7 +10,7 @@ exec alertmanager \ --web.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_web-tls" }} \ --cluster.listen-address=0.0.0.0:{{ env "NOMAD_ALLOC_PORT_cluster" }} \ --cluster.advertise-address={{ env "NOMAD_HOST_ADDR_cluster" }} \ -{{- range service "[[ .instance ]]-am-gossip[[ .consul.suffix ]]" -}} +{{- range service "alertmanager-gossip[[ .consul.suffix ]]" -}} {{- if not (eq (env "NOMAD_ALLOC_INDEX") (index .ServiceMeta "alloc")) }} --cluster.peer={{ .Address }}:{{ .Port }} \ {{ end -}} diff --git a/templates/cluster-exporter/nginx.conf b/templates/cluster-exporter/nginx.conf index ceef2a1..740f4c3 100644 --- a/templates/cluster-exporter/nginx.conf +++ b/templates/cluster-exporter/nginx.conf @@ -24,7 +24,7 @@ server { return 405; } - set $consul_token "{{ with secret "consul/creds/[[ .instance ]]-cluster-exporter" }}{{ .Data.token }}{{ end }}"; + set $consul_token "{{ with secret "consul/creds/cluster-exporter[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}"; {{- range service "nomad-client" }} location /nomad-client/{{ .Node }} { diff --git a/templates/grafana/grafana.ini b/templates/grafana/grafana.ini new file mode 100644 index 0000000..88941dc --- /dev/null +++ b/templates/grafana/grafana.ini @@ -0,0 +1,37 @@ +[server] +http_addr = 127.0.0.1 +http_port = 3000 +root_url = [[ .monitoring.grafana.public_url ]] +serve_from_sub_path = [[ if eq (urlParse .monitoring.grafana.public_url).Path "" ]]false[[ else ]]true[[ end ]] + +[database] +type = postgres +name = [[ .postgres.database ]] +[[- if ne .postgres.pooler.engine "none" ]] +host = 127.0.0.1:[[ .postgres.pooler.port ]] +user = [[ .instance ]] +password = {{ env "NOMAD_ALLOC_ID" }} +ssl_mode = disable +[[- else ]] +host = [[ .postgres.host ]]:[[ .postgres.port ]] +user = [[ .postgres.user ]] +password = [[ .postgres.password ]] +[[ end ]] + +[remote_cache] +type = memcached +connstr = 127.0.0.1:11211 + +[analytics] +reporting_enabled = false +check_for_updates = false +check_for_plugin_updates = false + +[security] +cookie_secure = true +cookie_samesite = strict +x_xss_protection = true +secret_key = {{ with secret "[[ .vault.root ]]kv/service/[[ .instance ]]/grafana" }}{{ .Data.data.secret_key }}{{ end }} + +[dataproxy] +timeout = 120 diff --git a/templates/loki/loki.yml b/templates/loki/loki.yml index e554349..913aeb3 100644 --- a/templates/loki/loki.yml +++ b/templates/loki/loki.yml @@ -53,7 +53,7 @@ limits_config: max_query_parallelism: 128 ruler: - alertmanager_url: [[ .instance ]]-alertmanager-tls[[ .consul.suffix ]] + alertmanager_url: alertmanager-tls[[ .consul.suffix ]] enable_alertmanager_discovery: true alertmanager_client: tls_cert_path: /secrets/loki.bundle.pem diff --git a/templates/prometheus/prometheus.yml b/templates/prometheus/prometheus.yml index 974858b..7b69591 100644 --- a/templates/prometheus/prometheus.yml +++ b/templates/prometheus/prometheus.yml @@ -19,13 +19,13 @@ alerting: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }} datacenter: [[ .consul.datacenter ]] relabel_configs: # Only keep alertmanagers - source_labels: [__meta_consul_service] action: keep - regex: [[ .instance ]]-alertmanager-tls[[ .consul.suffix ]] + regex: alertmanager-tls[[ .consul.suffix ]] scrape_configs: @@ -40,7 +40,7 @@ scrape_configs: [[- end ]] [[- if gt (len .exporters.blackbox.http_probes) 0 ]] - +{{- if gt (len (service "blackbox-exporter[[ .consul.suffix ]]")) 0 }} # Blackbox Exporter HTTP targets - job_name: http_probe metrics_path: /probe @@ -52,6 +52,7 @@ scrape_configs: params: module: ["http_2xx"] static_configs: + {{ range $idx, $instance := service "blackbox-exporter[[ .consul.suffix ]]" }} - targets: [[- range $http_probe := .exporters.blackbox.http_probes ]] - [[ $http_probe ]] @@ -62,11 +63,13 @@ scrape_configs: - source_labels: [__param_target] target_label: instance - target_label: __address__ - replacement: {{ range $idx, $instance := service "[[ .instance ]]-blackbox-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }} + replacement: {{ .Address }}:{{ .Port }} + {{ end }} +{{- end }} [[- end ]] [[- if gt (len .exporters.blackbox.tcp_probes) 0 ]] - +{{ if gt (len (service "blackbox-exporter[[ .consul.suffix ]]")) 0 }} # Blackbox Exporter TCP targets - job_name: tcp_probe metrics_path: /probe @@ -78,6 +81,7 @@ scrape_configs: params: module: ["tcp_connect"] static_configs: + { range $idx, $instance := service "blackbox-exporter[[ .consul.suffix ]]" }} [[- range $target := .exporters.blackbox.tcp_probes ]] - [[ $target ]] [[- end ]] @@ -87,7 +91,9 @@ scrape_configs: - source_labels: [__param_target] target_label: instance - target_label: __address__ - replacement: {{ range $idx, $instance := service "[[ .instance ]]-blackbox-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }} + replacement: {{ .Address }}:{{ .Port }} + {{ end }} +{{- end }} [[- end ]] # Cluster services @@ -100,7 +106,7 @@ scrape_configs: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }} datacenter: [[ .consul.datacenter ]] relabel_configs: @@ -117,7 +123,7 @@ scrape_configs: - source_labels: [__meta_consul_service] regex: (.+) - replacement: {{ range $idx, $instance := service "[[ .instance ]]-cluster-exporter" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }} + replacement: {{ range $idx, $instance := service "cluster-exporter[[ .consul.suffix ]]" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }} target_label: __address__ # Rewrite the job labels to the name of the service @@ -143,7 +149,7 @@ scrape_configs: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }} datacenter: [[ .consul.datacenter ]] relabel_configs: @@ -203,7 +209,7 @@ scrape_configs: consul_sd_configs: - server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500 scheme: http - token: {{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]-prometheus" }}{{ .Data.token }}{{ end }} + token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }} datacenter: [[ .consul.datacenter ]] relabel_configs: diff --git a/variables.yml b/variables.yml index 816bbe7..9075974 100644 --- a/variables.yml +++ b/variables.yml @@ -6,6 +6,10 @@ vault: pki: path: '[[ .prometheus.vault_pki ]]' ou: Monitoring + rand_secrets: + - path: grafana + fields: + - secret_key monitoring: @@ -40,7 +44,7 @@ monitoring: memory: 32 vault: policies: - - '[[ .instance ]]-consul-exporter[[ .consul.suffix ]]' + - 'consul-exporter[[ .consul.suffix ]]' cluster: image: nginxinc/nginx-unprivileged:alpine @@ -50,12 +54,12 @@ monitoring: memory: 15 vault: policies: - - '[[ .instance ]]-cluster-exporter[[ .consul.suffix ]]' + - 'cluster-exporter[[ .consul.suffix ]]' - metrics prometheus: - version: 2.50.1 + version: 2.51.0 count: 1 @@ -70,12 +74,12 @@ monitoring: volumes: data: type: csi - source: '[[ .instance ]]-prometheus-data[[ .consul.suffix ]]' + source: 'prometheus-data' per_alloc: true vault: policies: - - '[[ .instance ]]-prometheus[[ .consul.suffix ]]' + - 'prometheus[[ .consul.suffix ]]' jobs: {} alert_rules: {} @@ -110,7 +114,7 @@ monitoring: strip_prefix: false volumes: data: - source: '[[ .instance ]]-alertmanager-data[[ .consul.suffix ]]' + source: 'alertmanager-data' type: csi per_alloc: true prometheus: @@ -118,13 +122,13 @@ monitoring: vault: policies: - metrics - - '[[ .instance ]]-alertmanager[[ .consul.suffix ]]' + - 'alertmanager[[ .consul.suffix ]]' email: from: alertmanager@[[ .consul.domain ]] custom_config: {} loki: - version: 2.9.5 + version: 2.9.6 image: '[[ .docker.repo ]]loki:[[ .monitoring.loki.version ]]-1' env: {} resources: @@ -132,7 +136,7 @@ monitoring: memory: 512 vault: policies: - - '[[ .instance ]]-loki[[ .consul.suffix ]]' + - 'loki[[ .consul.suffix ]]' public_url: https://loki.example.org traefik: router: loki @@ -143,7 +147,7 @@ monitoring: volumes: data: type: csi - source: '[[ .instance ]]-loki-data[[ .consul.suffix ]]' + source: 'loki-data' vector: version: 0.36.1 @@ -159,7 +163,7 @@ monitoring: consul: connect: upstreams: - - destination_name: '[[ .instance ]]-loki[[ .consul.suffix ]]' + - destination_name: 'loki[[ .consul.suffix ]]' local_bind_port: 3100 fluentd: enabled: false @@ -181,6 +185,46 @@ monitoring: prometheus: metrics_url: http://127.0.0.1:9001/metrics + grafana: + version: 10.4.1 + image: '[[ .docker.repo ]]grafana:[[ .monitoring.grafana.version ]]-1' + env: {} + resources: + cpu: 100 + memory: 256 + public_url: https://grafana.example.org + plugins: + #- alexanderzobnin-zabbix-app + #- ddurieux-glpi-app + - grafana-clock-panel + - grafana-piechart-panel + traefik: + enabled: true + router: grafana + strip_prefix: false + consul: + connect: + upstreams: + - destination_name: postgres[[ .consul.suffix ]] + local_bind_port: 5432 + volumes: + data: + type: csi + source: 'grafana-data' + vault: + policies: + - 'grafana[[ .consul.suffix ]]' + database: + role: grafana + pgrole: grafana + postgres: + database: grafana + user: '{{ with secret "[[ .vault.root ]]database/creds/grafana" }}{{ .Data.username }}{{ end }}' + password: '{{ with secret "[[ .vault.root ]]database/creds/grafana" }}{{ .Data.password }}{{ end }}' + pooler: + mode: session + prometheus: + metrics_url: http://localhost:3000[[ (urlParse .monitoring.grafana.public_url).Path ]]/metrics prometheus: enabled: true diff --git a/vault/policies/monitoring-alertmanager.hcl b/vault/policies/alertmanager.hcl similarity index 72% rename from vault/policies/monitoring-alertmanager.hcl rename to vault/policies/alertmanager.hcl index 82970a3..443de06 100644 --- a/vault/policies/monitoring-alertmanager.hcl +++ b/vault/policies/alertmanager.hcl @@ -1,5 +1,5 @@ [[- $c := merge .monitoring.alertmanager .monitoring . ]] -path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-alertmanager" { +path "[[ $c.vault.pki.path ]]/issue/alertmanager" { capabilities = ["update"] } diff --git a/vault/policies/cluster-exporter.hcl b/vault/policies/cluster-exporter.hcl new file mode 100644 index 0000000..052d66e --- /dev/null +++ b/vault/policies/cluster-exporter.hcl @@ -0,0 +1,19 @@ +# Read vault metrics +path "sys/metrics" { + capabilities = ["read", "list"] +} + +# Get a cert for Nomad +path "pki/nomad/issue/cluster-exporter" { + capabilities = ["update"] +} + +# Get a cert for Consul +path "pki/consul/issue/cluster-exporter" { + capabilities = ["update"] +} + +# Get a consul token +path "consul/creds/cluster-exporter" { + capabilities = ["read"] +} diff --git a/vault/policies/consul-exporter.hcl b/vault/policies/consul-exporter.hcl new file mode 100644 index 0000000..f01d53b --- /dev/null +++ b/vault/policies/consul-exporter.hcl @@ -0,0 +1,3 @@ +path "consul/creds/consul-exporter" { + capabilities = ["read"] +} diff --git a/vault/policies/grafana.hcl b/vault/policies/grafana.hcl new file mode 100644 index 0000000..39c9e73 --- /dev/null +++ b/vault/policies/grafana.hcl @@ -0,0 +1,7 @@ +path "[[ .vault.root ]]database/creds/[[ .monitoring.grafana.vault.database.role ]]" { + capabilities = ["read"] +} + +path "[[ .vault.root ]]kv/data/service/[[ .instance ]]/grafana" { + capabilities = ["read"] +} diff --git a/vault/policies/monitoring-loki.hcl b/vault/policies/loki.hcl similarity index 73% rename from vault/policies/monitoring-loki.hcl rename to vault/policies/loki.hcl index bee4e15..4521638 100644 --- a/vault/policies/monitoring-loki.hcl +++ b/vault/policies/loki.hcl @@ -1,5 +1,5 @@ [[- $c := merge .monitoring.loki .monitoring . ]] -path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-loki" { +path "[[ $c.vault.pki.path ]]/issue/loki" { capabilities = ["update"] } diff --git a/vault/policies/monitoring-cluster-exporter.hcl b/vault/policies/monitoring-cluster-exporter.hcl deleted file mode 100644 index 7a4eb51..0000000 --- a/vault/policies/monitoring-cluster-exporter.hcl +++ /dev/null @@ -1,20 +0,0 @@ -[[- $c := merge .monitoring.exporters.cluster .monitoring.exporters .monitoring . ]] -# Read vault metrics -path "sys/metrics" { - capabilities = ["read", "list"] -} - -# Get a cert for Nomad -path "pki/nomad/issue/[[ .instance ]]-cluster-exporter" { - capabilities = ["update"] -} - -# Get a cert for Consul -path "pki/consul/issue/[[ .instance ]]-cluster-exporter" { - capabilities = ["update"] -} - -# Get a consul token -path "consul/creds/[[ .instance ]]-cluster-exporter" { - capabilities = ["read"] -} diff --git a/vault/policies/monitoring-consul-exporter.hcl b/vault/policies/monitoring-consul-exporter.hcl deleted file mode 100644 index dfd6ce6..0000000 --- a/vault/policies/monitoring-consul-exporter.hcl +++ /dev/null @@ -1,4 +0,0 @@ -[[- $c := merge .monitoring.exporters.consul .monitoring.exporters .monitoring . ]] -path "[[ $c.vault.root ]]consul/creds/[[ .instance ]]-consul-exporter" { - capabilities = ["read"] -} diff --git a/vault/policies/monitoring-prometheus.hcl b/vault/policies/prometheus.hcl similarity index 61% rename from vault/policies/monitoring-prometheus.hcl rename to vault/policies/prometheus.hcl index 0f8e595..7f93109 100644 --- a/vault/policies/monitoring-prometheus.hcl +++ b/vault/policies/prometheus.hcl @@ -1,5 +1,5 @@ [[- $c := merge .monitoring.prometheus .monitoring . ]] -path "[[ $c.vault.pki.path ]]/issue/[[ .instance ]]-prometheus" { +path "[[ $c.vault.pki.path ]]/issue/prometheus" { capabilities = ["update"] } @@ -7,6 +7,6 @@ path "[[ $c.vault.root ]]kv/service/[[ .instance ]]/prometheus" { capabilities = ["read"] } -path "[[ $c.vault.root ]]consul/creds/[[ .instance ]]-prometheus" { +path "[[ $c.vault.root ]]consul/creds/prometheus" { capabilities = ["read"] }