diff --git a/example/monitoring-agent.nomad.hcl b/example/monitoring-agent.nomad.hcl index 616ecea..c296da0 100644 --- a/example/monitoring-agent.nomad.hcl +++ b/example/monitoring-agent.nomad.hcl @@ -535,4 +535,145 @@ _EOT } } + + group "consul-agent-exporter" { + + + shutdown_delay = "6s" + + + + ephemeral_disk { + # Use minimal ephemeral disk + size = 101 + } + + + network { + mode = "bridge" + port "metrics" {} + } + + service { + name = "consul-agent" + meta { + metrics-port = "${NOMAD_HOST_PORT_metrics}" + alloc = "${node.unique.name}" + datacenter = "${NOMAD_DC}" + group = "${NOMAD_GROUP_NAME}" + job = "${NOMAD_JOB_NAME}" + namespace = "${NOMAD_NAMESPACE}" + node = "${node.unique.name}" + region = "${NOMAD_REGION}" + } + + } + + task "consul-agent-metrics-proxy" { + driver = "docker" + + config { + image = "nginxinc/nginx-unprivileged:alpine" + readonly_rootfs = true + + mount { + type = "tmpfs" + target = "/tmp" + tmpfs_options { + size = 3000000 + } + } + + volumes = [ + "secrets/nginx.conf:/etc/nginx/conf.d/default.conf:ro" + ] + } + + + vault { + policies = ["metrics", "cluster-exporter"] + env = false + disable_file = true + change_mode = "noop" + } + + + + # Use a template block instead of env {} so we can fetch values from vault + template { + data = <<_EOT +LANG=fr_FR.utf8 +TZ=Europe/Paris +_EOT + destination = "secrets/.env" + perms = 400 + env = true + } + + # Get a certificate from vault to protect the metrics endpoint + template { + data = <<_EOT +{{- with pkiCert "pki/monitoring/issue/metrics" (printf "ip_sans=%s" (env "NOMAD_HOST_IP_metrics")) }} +{{ .Cert }} +{{ .Key }} +{{- end }} +_EOT + destination = "secrets/metrics.bundle.pem" + } + + # Get the root CA + template { + data = <<_EOT +{{ with secret "pki/monitoring/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }} +_EOT + destination = "local/monitoring.ca.pem" + } + + template { + data = <<_EOT +server { + listen {{ env "NOMAD_ALLOC_PORT_metrics" }} ssl; + http2 on; + + ssl_certificate /secrets/metrics.bundle.pem; + ssl_certificate_key /secrets/metrics.bundle.pem; + ssl_client_certificate /local/monitoring.ca.pem; + ssl_verify_client on; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 1h; + ssl_session_tickets off; + gzip on; + gzip_types + text/plain; + gzip_vary on; + + server_tokens off; + + if ($request_method !~ ^(GET|HEAD)$ ) { + return 405; + } + + set $consul_token "{{ with secret "consul/creds/cluster-exporter" }}{{ .Data.token }}{{ end }}"; + + location /metrics { + proxy_pass http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/agent/metrics?format=prometheus; + proxy_set_header X-Consul-Token $consul_token; + } +} + +_EOT + destination = "secrets/nginx.conf" + } + + + resources { + cpu = 10 + memory = 15 + memory_max = 24 + } + + } + } } diff --git a/monitoring-agent.nomad.hcl b/monitoring-agent.nomad.hcl index 85bc5a0..e24f756 100644 --- a/monitoring-agent.nomad.hcl +++ b/monitoring-agent.nomad.hcl @@ -282,6 +282,49 @@ _EOT propagation_mode = "host-to-task" } +[[ template "common/resources" $c ]] + } + } +[[- end ]] + +[[- if .monitoring.agent.consul_agent_exporter.enabled ]] + + group "consul-agent-exporter" { +[[- $c := merge .monitoring.agent.consul_agent_exporter .monitoring.agent .monitoring . ]] + +[[ template "common/group_start" $c ]] + + network { + mode = "bridge" + port "metrics" {} + } + + service { + name = "consul-agent" +[[ template "common/service_meta" $c ]] + } + + task "consul-agent-metrics-proxy" { + driver = "[[ $c.nomad.driver ]]" + + config { +[[ template "common/image" $c ]] +[[ template "common/tmpfs" "/tmp" ]] + volumes = [ + "secrets/nginx.conf:/etc/nginx/conf.d/default.conf:ro" + ] + } + +[[ template "common/vault.policies" $c ]] +[[ template "common/file_env" $c ]] +[[ template "common/metrics_cert" $c ]] + template { + data = <<_EOT +[[ template "monitoring/agent/consul_agent_nginx.conf" $c ]] +_EOT + destination = "secrets/nginx.conf" + } + [[ template "common/resources" $c ]] } } diff --git a/templates/agent/consul_agent_nginx.conf b/templates/agent/consul_agent_nginx.conf new file mode 100644 index 0000000..d3da4ba --- /dev/null +++ b/templates/agent/consul_agent_nginx.conf @@ -0,0 +1,31 @@ +server { + listen {{ env "NOMAD_ALLOC_PORT_metrics" }} ssl; + http2 on; + + ssl_certificate /secrets/metrics.bundle.pem; + ssl_certificate_key /secrets/metrics.bundle.pem; + ssl_client_certificate /local/monitoring.ca.pem; + ssl_verify_client on; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_session_cache shared:SSL:10m; + ssl_session_timeout 1h; + ssl_session_tickets off; + gzip on; + gzip_types + text/plain; + gzip_vary on; + + server_tokens off; + + if ($request_method !~ ^(GET|HEAD)$ ) { + return 405; + } + + set $consul_token "{{ with secret "consul/creds/cluster-exporter[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}"; + + location /metrics { + proxy_pass http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/agent/metrics?format=prometheus; + proxy_set_header X-Consul-Token $consul_token; + } +} diff --git a/variables.yml b/variables.yml index 431bd2f..a1a7659 100644 --- a/variables.yml +++ b/variables.yml @@ -465,7 +465,7 @@ monitoring: # there's no need to add a metrics_proxy task. Instead, we grant the metrics policy to vector so it can get # a certificate from vault policies: - - metrics[[ .consul.suffix ]] + - metrics consul: connect: upstreams: @@ -522,7 +522,7 @@ monitoring: # This exporter can handle mTLS itself, so no need to create a metrics_proxy task, instead, grant the metrics policy # So it can get a certificate from vault policies: - - metrics[[ .consul.suffix ]] + - metrics # Args to add to the exporter on start args: - '--collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/(docker|containers)/.+|opt/nomad/data/(alloc|client))($|/)' @@ -542,6 +542,22 @@ monitoring: source: host_root read_only: true + # Consul agents are not registered as services in the catalog + # so cannot be discovered. This adds a small nginx proxy which expose metrics of the local consul agent of + # every node (runs as a system job) + consul_agent_exporter: + enabled: true + image: nginxinc/nginx-unprivileged:alpine + env: {} + resources: + cpu: 10 + memory: 15 + memory_max: 24 + vault: + policies: + - metrics + - cluster-exporter[[ .consul.suffix ]] + # Enable globaly prometheus for this bundle :-) prometheus: enabled: true