2024-03-25 12:27:46 +01:00
job "[[ .instance ]]-agent" {
[ [ - $c : = merge . monitoring . agent . monitoring . ] ]
2024-03-25 22:23:31 +01:00
2024-03-25 12:27:46 +01:00
[ [ template "common/job_start" $ c ] ]
2024-03-25 22:23:31 +01:00
2024-03-25 12:27:46 +01:00
type = "system"
2024-03-25 14:54:13 +01:00
# This group will collect logs from the allocation running on the node
# It uses nomad-vector-logger to query the Nomad API and discover running allocations
# and then vector to read logs from all the discovered allocations. Logs are fowarded to loki through the service mesh
2024-03-25 12:27:46 +01:00
group "logs-collector" {
[ [ $c : = merge $ c . vector $ c ] ]
network {
mode = "bridge"
port "metrics" {}
}
# Try harder to restart tasks if they fail
restart {
attempts = 20
interval = "5m"
mode = "delay"
}
[ [ template "common/volumes" $ c ] ]
service {
name = "vector-agent[[ .consul.suffix ]]"
[ [ template "common/service_meta" $ c ] ]
[ [ template "common/connect" $ c ] ]
}
task "nomad-vector-logger" {
[ [ - $n : = merge $ c . nomad_vector_logger $ c ] ]
driver = "[[ $n.nomad.driver ]]"
# Use a random user instead of root
user = 3987
config {
2024-03-25 22:23:31 +01:00
image = "[[ $n.image ]]"
2024-03-25 12:27:46 +01:00
readonly_rootfs = true
2024-03-25 22:23:31 +01:00
pids_limit = 50
2024-03-25 12:27:46 +01:00
# Nomad Vector Logger needs to run on the host's network namespace
# so it can reach the Nomad Agent API on localhost:4646
network_mode = "host"
# Host network namespace requires disabling user namespace
userns_mode = "host"
2024-03-25 22:23:31 +01:00
command = "nomad-vector-logger"
args = [
2024-03-25 12:27:46 +01:00
"--config" ,
"/local/nomad-vector-logger.toml"
]
}
# We want to run Nomad Vector Logger before vector agent
lifecycle {
hook = "prestart"
sidecar = true
}
[ [ template "common/vault.policies" $ n ] ]
[ [ template "common/file_env" $ n ] ]
# Env to access Nomad API
template {
data = < < _EOT
NOMAD_TOKEN = {{ with secret "nomad/creds/nomad-vector-logger[[ .consul.suffix ]]" }}{{ . Data . secret_id }}{{ end }}
NOMAD_ADDR = https : / / localhost : 4646
NOMAD_CLIENT_CERT = / secrets / nomad . bundle . pem
NOMAD_CLIENT_KEY = / secrets / nomad . bundle . pem
NOMAD_CACERT = / local / nomad . ca . pem
_EOT
destination = "secrets/.nomad-vector-logger.env"
perms = 400
env = true
}
# The main configuration file for nomad-vector-logger
template {
data = < < _EOT
[ [ template "monitoring/agent/nomad-vector-logger.toml" $ n ] ]
_EOT
destination = "local/nomad-vector-logger.toml"
}
2024-03-25 22:23:31 +01:00
# Disable the default nomad.toml template, as we provide our own nomad.yml template
2024-03-25 12:27:46 +01:00
template {
2024-03-25 22:23:31 +01:00
data = "# Disable the default toml template"
2024-03-25 12:27:46 +01:00
destination = "local/template/nomad.toml"
}
# The vector configuration template used to generate the vector conf
template {
data = < < _EOT
[ [ template "monitoring/agent/vector-template.yml" $ n ] ]
_EOT
destination = "local/template/nomad.yml"
# {{ }} is used by the template, so prevent consul-template to interprete it
left_delimiter = "{{{"
right_delimiter = "}}}"
}
# Get a client cert for the Nomad API
template {
data = < < _EOT
{{- with pkiCert "pki/nomad/issue/nomad-vector-logger[[ .consul.suffix ]]"
"common_name = nomad - vector - logger [ [ . consul . suffix ] ] . nomad . [ [ . consul . domain ] ] "
"ttl = 72 h " }}
{{ . Cert }}
{{ . Key }}
{{- end }}
_EOT
destination = "secrets/nomad.bundle.pem"
uid = 3987
perms = "0400"
}
# The CA chain to validate Nomad certificates
template {
data = < < _EOT
{{ with secret "pki/nomad/cert/ca_chain" }}{{ . Data . ca_chain }}{{ end }}
_EOT
destination = "local/nomad.ca.pem"
}
# The main config file
volume_mount {
volume = "nomad"
destination = "/nomad"
read_only = true
}
[ [ template "common/resources" $ n ] ]
}
# Nomad Vector Logger can take a few seconds to generate the initial configuration file
# This task ensure the file exists before vector is started (to prevent an error as the
# transform_nomad_alloc_* sources won't have anything before the file exists)
task "wait-for-vector-conf" {
driver = "[[ $c.nomad.driver ]]"
config {
2024-03-25 22:23:31 +01:00
image = "busybox:latest"
readonly_rootfs = true
pids_limit = 20
command = "sh"
args = [
2024-03-25 12:27:46 +01:00
"-c" ,
2024-03-25 22:23:31 +01:00
"echo 'Waiting for config file /alloc/data/vector_conf/nomad.yml to be generated'; until ls /alloc/data/vector_conf/nomad.yml >/dev/null 2>&1; do echo '.'; sleep 1; done"
2024-03-25 12:27:46 +01:00
]
}
lifecycle {
hook = "prestart"
}
# The task will shutdown once the config is available, so just
# allocate very few resources
resources {
cpu = 10
memory = 10
}
}
# The main vector task, which will read logs using the config file generated by Nomad Vector Logger
task "vector" {
driver = "[[ $c.nomad.driver ]]"
leader = true
config {
2024-03-25 22:23:31 +01:00
image = "[[ $c.image ]]"
userns_mode = "host"
readonly_rootfs = true
2024-03-28 09:39:59 +01:00
pids_limit = 1000
2024-03-25 22:23:31 +01:00
args = [
2024-03-25 12:27:46 +01:00
"--watch-config" ,
"--config", "/local/vector.yml" ,
"--config-dir", "/alloc/data/vector_conf"
]
}
[ [ template "common/vault.policies" $ c ] ]
env {
NODE_UNIQUE_NAME = "${node.unique.name}"
}
[ [ template "common/metrics_cert" $ c ] ]
2024-03-25 22:23:31 +01:00
[ [ template "common/artifacts" $ c ] ]
2024-03-25 12:27:46 +01:00
2024-03-25 22:23:31 +01:00
# Main vector configuration
2024-03-25 12:27:46 +01:00
template {
data = < < _EOT
[ [ template "monitoring/agent/vector.yml" $ c ] ]
_EOT
destination = "local/vector.yml"
left_delimiter = "{{{"
right_delimiter = "}}}"
wait {
min = "5s"
max = "30s"
}
}
volume_mount {
volume = "nomad"
destination = "/nomad"
read_only = true
}
volume_mount {
volume = "data"
destination = "/data"
read_only = false
}
2024-03-25 14:54:13 +01:00
[ [ template "common/resources" $ c ] ]
}
}
2024-03-25 22:23:31 +01:00
[ [ - if . monitoring . agent . node_exporter . enabled ] ]
2024-03-25 14:54:13 +01:00
# This group runs the prometheus node-exporter to expose prometheus metrics from the node
group "node-exporter" {
[ [ - $c : = merge . monitoring . agent . node_exporter . monitoring . agent . monitoring . ] ]
network {
mode = "bridge"
port "metrics" {}
}
[ [ template "common/volumes" $ c ] ]
service {
name = "node-exporter[[.consul.suffix ]]"
[ [ template "common/service_meta" $ c ] ]
}
task "node-exporter" {
driver = "[[ $c.nomad.driver ]]"
2024-04-24 15:49:44 +02:00
user = 100320
2024-03-25 14:54:13 +01:00
config {
2024-03-25 22:23:31 +01:00
image = "[[ $c.image ]]"
pid_mode = "host"
userns_mode = "host"
2024-03-25 14:54:13 +01:00
readonly_rootfs = true
2024-03-25 22:23:31 +01:00
pids_limit = 50
command = "/usr/local/bin/node_exporter"
args = [
2024-03-25 14:54:13 +01:00
"--path.rootfs = / host " ,
"--web.config.file = / local / tls . yml " ,
2024-03-25 22:23:31 +01:00
"--web.listen-address = : ${ N O M A D _ A L L O C _ P O R T _ m e t r i c s } " ,
[ [ - range $arg : = $ c . args ] ]
"[[ $arg ]]" ,
[ [ - end ] ]
2024-03-25 14:54:13 +01:00
]
}
[ [ template "common/vault.policies" $ c ] ]
[ [ template "common/metrics_cert" $ c ] ]
2024-03-25 22:23:31 +01:00
[ [ template "common/artifacts" $ c ] ]
2024-03-25 14:54:13 +01:00
template {
data = < < _EOT
[ [ template "monitoring/agent/node-exporter.yml" $ c ] ]
_EOT
destination = "local/tls.yml"
}
volume_mount {
volume = "host"
destination = "/host"
read_only = true
propagation_mode = "host-to-task"
}
2024-04-21 22:03:24 +02:00
[ [ template "common/resources" $ c ] ]
}
}
[ [ - end ] ]
[ [ - if . monitoring . agent . consul_agent_exporter . enabled ] ]
group "consul-agent-exporter" {
[ [ - $c : = merge . monitoring . agent . consul_agent_exporter . monitoring . agent . monitoring . ] ]
[ [ template "common/group_start" $ c ] ]
network {
mode = "bridge"
port "metrics" {}
}
service {
name = "consul-agent"
[ [ template "common/service_meta" $ c ] ]
}
task "consul-agent-metrics-proxy" {
driver = "[[ $c.nomad.driver ]]"
config {
[ [ template "common/image" $ c ] ]
[ [ template "common/tmpfs" "/tmp" ] ]
volumes = [
"secrets/nginx.conf:/etc/nginx/conf.d/default.conf:ro"
]
}
[ [ template "common/vault.policies" $ c ] ]
[ [ template "common/file_env" $ c ] ]
[ [ template "common/metrics_cert" $ c ] ]
template {
data = < < _EOT
[ [ template "monitoring/agent/consul_agent_nginx.conf" $ c ] ]
_EOT
destination = "secrets/nginx.conf"
}
2024-03-25 12:27:46 +01:00
[ [ template "common/resources" $ c ] ]
}
}
2024-03-25 22:23:31 +01:00
[ [ - end ] ]
2024-03-25 12:27:46 +01:00
}