monitoring/monitoring-exporters.nomad.hcl

254 lines
6.7 KiB
HCL

job "[[ .instance ]]-exporters" {
[[- $c := merge .monitoring.exporters . ]]
[[ template "common/job_start" $c ]]
# Run exporters. Use a separated job so exporters can run in a distinct node_pool
group "exporters" {
count = [[ $c.count ]]
network {
mode = "bridge"
port "ping" {}
port "blackbox" {}
port "consul" {}
port "cluster" {}
}
service {
name = "ping-exporter[[ .consul.suffix ]]"
port = "ping"
meta {
alloc = "${NOMAD_ALLOC_INDEX}"
metrics-port = "${NOMAD_HOST_PORT_ping}"
}
}
service {
name = "blackbox-exporter[[ .consul.suffix ]]"
port = "blackbox"
meta {
alloc = "${NOMAD_ALLOC_INDEX}"
}
}
service {
name = "consul-exporter[[ .consul.suffix ]]"
port = "ping"
meta {
alloc = "${NOMAD_ALLOC_INDEX}"
metrics-port = "${NOMAD_HOST_PORT_consul}"
}
}
service {
name = "cluster-exporter[[ .consul.suffix ]]"
port = "cluster"
meta {
alloc = "${NOMAD_ALLOC_INDEX}"
}
}
[[- if gt (len $c.ping.probes) 0 ]]
[[- $e := merge $c.ping $c ]]
# Ping exporter will collect ICMP ping stats and expose them
# Note : we could do it with blackbox, but as pings require privileges, it's better to grant it
# to a smaller, more focused container. This one only handle icmp pings check, and only from the configuration file
task "ping-exporter" {
driver = "[[ $e.nomad.driver ]]"
config {
image = "[[ $e.image ]]"
readonly_rootfs = true
pids_limit = 30
# Pings require privileges
privileged = true
userns_mode = "host"
command = "ping_exporter"
args = [
"--web.listen-address=127.0.0.1:9427",
"--config.path=/local/config.yml"
]
}
[[ template "common/file_env" $e ]]
template {
data = <<_EOT
[[ template "monitoring/ping_exporter/config.yml" $e ]]
_EOT
destination = "local/config.yml"
}
[[ template "common/resources" $e ]]
}
[[- end ]]
[[- if or (gt (len $c.blackbox.tcp_probes) 0) (gt (len $c.blackbox.http_probes) 0) ]]
[[- $e := merge $c.blackbox $c ]]
# Blackbox exporter will probe http/tcp targets and expose them
# for prometheus
task "blackbox-exporter" {
driver = "[[ $e.nomad.driver ]]"
config {
image = "[[ $e.image ]]"
readonly_rootfs = true
pids_limit = 30
}
[[ template "common/file_env" $e ]]
[[ template "common/resources" $e ]]
}
[[- end ]]
# Export consul services status to prometheus
task "consul-exporter" {
[[- $e := merge $c.consul $c ]]
driver = "[[ $e.nomad.driver ]]"
config {
image = "[[ $e.image ]]"
readonly_rootfs = true
pids_limit = 30
command = "/local/consul-exporter"
}
[[ template "common/file_env" $e ]]
[[ template "common/vault.policies" $e ]]
template {
data = <<_EOT
[[ template "monitoring/consul-exporter/start.sh" $e ]]
_EOT
destination = "local/consul-exporter"
perms = 755
}
template {
data = <<_EOT
CONSUL_HTTP_TOKEN={{ with secret "consul/creds/consul-exporter[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
_EOT
destination = "secrets/.consul.env"
uid = 100000
gid = 100000
perms = 400
env = true
}
[[ template "common/resources" $e ]]
}
# The cluster metrics exposes prometheus metrics from the various nodes of the cluster
# Nomad, Consul and Vault
# It also exposes the other exporters metrics with mTLS
task "cluster-metrics-proxy" {
[[- $e := merge $c.cluster $c ]]
driver = "[[ $e.nomad.driver ]]"
user = 8685
lifecycle {
hook = "poststart"
sidecar = true
}
config {
image = "[[ $e.image ]]"
readonly_rootfs = true
pids_limit = 30
# Mount the config in nginx conf dir
volumes = [
"secrets/metrics.conf:/etc/nginx/conf.d/default.conf"
]
[[ template "common/tmpfs" "/tmp" ]]
}
[[ template "common/vault.policies" $e ]]
# This is the main nginx configuration, which will proxypass requests to the real metrics endpoints
template {
data =<<_EOT
[[ template "monitoring/cluster-exporter/nginx.conf" $e ]]
_EOT
destination = "secrets/metrics.conf"
perms = "0440"
uid = 108685
gid = 100000
change_mode = "signal"
change_signal = "SIGHUP"
}
# Get certificate to add mTLS to metrics endpoints
template {
data =<<_EOT
{{- with pkiCert "[[ .prometheus.vault_pki ]]/issue/metrics" (printf "ip_sans=%s" (env "NOMAD_HOST_IP_cluster")) }}
{{ .Cert }}
{{ .Key }}
{{- end }}
_EOT
destination = "secrets/metrics.bundle.pem"
change_mode = "signal"
change_signal = "SIGHUP"
}
# Get the CA for the monitoring PKI
template {
data =<<_EOT
{{ with secret "[[ .vault.root ]]pki/monitoring/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
_EOT
destination = "local/monitoring.ca.pem"
}
# Get a Nomad client certificate
template {
data = <<_EOT
{{- with pkiCert "pki/nomad/issue/cluster-exporter[[ .consul.suffix ]]" "common_name=metrics-proxy.nomad.[[ .consul.domain ]]" "ttl=24h" }}
{{ .Data.Cert }}
{{ .Data.Key }}
{{- end }}
_EOT
destination = "secrets/nomad_client_bundle.pem"
perms = "0400"
uid = 108685
gid = 100000
change_mode = "signal"
change_signal = "SIGHUP"
}
# The CA chain for Nomad
template {
data = <<_EOT
{{ with secret "pki/nomad/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
_EOT
destination = "local/nomad_ca.crt"
}
# Same for Consul
template {
data = <<_EOT
{{- with pkiCert "pki/consul/issue/cluster-exporter[[ .consul.suffix ]]" "common_name=metrics-proxy.consul.[[ .consul.domain ]]" "ttl=24h" }}
{{ .Data.Cert }}
{{ .Data.Key }}
{{- end }}
_EOT
destination = "secrets/consul_client_bundle.pem"
perms = "0400"
uid = 108685
gid = 100000
change_mode = "signal"
change_signal = "SIGHUP"
}
template {
data = <<_EOT
{{ with secret "pki/consul/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
_EOT
destination = "local/consul_ca.crt"
}
[[ template "common/resources" $e ]]
}
}
}