job "[[ .instance ]]-agent" { [[- $c := merge .monitoring.agent .monitoring . ]] [[ template "common/job_start" $c ]] type = "system" # This group will collect logs from the allocation running on the node # It uses nomad-vector-logger to query the Nomad API and discover running allocations # and then vector to read logs from all the discovered allocations. Logs are fowarded to loki through the service mesh group "logs-collector" { [[ $c := merge $c.vector $c ]] network { mode = "bridge" port "metrics" {} [[- if .monitoring.agent.log_collection.fluentd.enabled ]] port "fluent" { static = [[ .monitoring.agent.log_collection.fluentd.port ]] } [[- end ]] } # Try harder to restart tasks if they fail restart { attempts = 20 interval = "5m" mode = "delay" } [[ template "common/volumes" $c ]] service { name = "vector-agent[[ .consul.suffix ]]" [[ template "common/service_meta" $c ]] [[ template "common/connect" $c ]] } [[- if .monitoring.agent.log_collection.files.enabled ]] task "nomad-vector-logger" { [[- $n := merge $c.nomad_vector_logger $c ]] driver = "[[ $n.nomad.driver ]]" # Use a random user instead of root user = 3987 config { image = "[[ $n.image ]]" readonly_rootfs = true pids_limit = 50 # Nomad Vector Logger needs to run on the host's network namespace # so it can reach the Nomad Agent API on localhost:4646 network_mode = "host" # Host network namespace requires disabling user namespace userns_mode = "host" command = "nomad-vector-logger" args = [ "--config", "/local/nomad-vector-logger.toml" ] } # We want to run Nomad Vector Logger before vector agent lifecycle { hook = "prestart" sidecar = true } [[ template "common/vault.policies" $n ]] [[ template "common/file_env" $n ]] # Env to access Nomad API template { data = <<_EOT NOMAD_TOKEN={{ with secret "nomad/creds/nomad-vector-logger[[ .consul.suffix ]]" }}{{ .Data.secret_id }}{{ end }} NOMAD_ADDR=https://localhost:4646 NOMAD_CLIENT_CERT=/secrets/nomad.bundle.pem NOMAD_CLIENT_KEY=/secrets/nomad.bundle.pem NOMAD_CACERT=/local/nomad.ca.pem _EOT destination = "secrets/.nomad-vector-logger.env" perms = 400 env = true } # The main configuration file for nomad-vector-logger template { data = <<_EOT [[ template "monitoring/agent/nomad-vector-logger.toml" $n ]] _EOT destination = "local/nomad-vector-logger.toml" } # Disable the default nomad.toml template, as we provide our own nomad.yml template template { data = "# Disable the default toml template" destination = "local/template/nomad.toml" } # The vector configuration template used to generate the vector conf template { data = <<_EOT [[ template "monitoring/agent/vector-template.yml" $n ]] _EOT destination = "local/template/nomad.yml" # {{ }} is used by the template, so prevent consul-template to interprete it left_delimiter = "{{{" right_delimiter = "}}}" } # Get a client cert for the Nomad API template { data = <<_EOT {{- with pkiCert "pki/nomad/issue/nomad-vector-logger[[ .consul.suffix ]]" "common_name=nomad-vector-logger[[ .consul.suffix ]].nomad.[[ .consul.domain ]]" "ttl=72h" }} {{ .Cert }} {{ .Key }} {{- end }} _EOT destination = "secrets/nomad.bundle.pem" uid = 3987 perms = "0400" } # The CA chain to validate Nomad certificates template { data = <<_EOT {{ with secret "pki/nomad/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }} _EOT destination = "local/nomad.ca.pem" } # Nomad vector logger needs read only access to the log dir volume_mount { volume = "nomad" destination = "/nomad" read_only = true } [[ template "common/resources" $n ]] } # Nomad Vector Logger can take a few seconds to generate the initial configuration file # This task ensure the file exists before vector is started (to prevent an error as the # transform_nomad_alloc_* sources won't have anything before the file exists) task "wait-for-vector-conf" { driver = "[[ $c.nomad.driver ]]" config { image = "busybox:latest" readonly_rootfs = true pids_limit = 20 command = "sh" args = [ "-c", "echo 'Waiting for config file /alloc/data/vector_conf/nomad.yml to be generated'; until ls /alloc/data/vector_conf/nomad.yml >/dev/null 2>&1; do echo '.'; sleep 1; done" ] } lifecycle { hook = "prestart" } # The task will shutdown once the config is available, so just # allocate very few resources resources { cpu = 10 memory = 10 } } [[- end ]] # The main vector task, which will read logs using the config file generated by Nomad Vector Logger task "vector" { driver = "[[ $c.nomad.driver ]]" leader = true config { image = "[[ $c.image ]]" userns_mode = "host" readonly_rootfs = true pids_limit = 1000 args = [ "--config", "/local/vector.yml", [[- if .monitoring.agent.log_collection.files.enabled ]] "--config-dir", "/alloc/data/vector_conf", "--watch-config" [[- end ]] ] } [[ template "common/vault.policies" $c ]] env { NODE_UNIQUE_NAME = "${node.unique.name}" } [[ template "common/metrics_cert" $c ]] [[ template "common/artifacts" $c ]] # Main vector configuration template { data =<<_EOT [[ template "monitoring/agent/vector.yml" $c ]] _EOT destination = "local/vector.yml" left_delimiter = "{{{" right_delimiter = "}}}" wait { min = "5s" max = "30s" } } [[- if or .monitoring.agent.log_collection.files.enabled .monitoring.agent.log_collection.create_nomad_logs.enabled ]] volume_mount { volume = "nomad" destination = "/nomad" [[- /* Reading logs only need read only access. Unlike creating Nomad's logs which require read/write */]] [[- if .monitoring.agent.log_collection.files.enabled ]] read_only = true [[- end ]] } [[- end ]] volume_mount { volume = "data" destination = "/data" read_only = false } [[ template "common/resources" $c ]] } [[- /* when creating Nomad log files, we need to handle rotation*/]] [[- if .monitoring.agent.log_collection.create_nomad_logs.enabled ]] task "logrotate" { driver = "[[ $c.nomad.driver ]]" lifecycle { hook = "poststart" sidecar = true } config { image = "[[.docker.repo ]][[ .docker.base_images.alpine.image ]]" readonly_rootfs = true userns_mode = "host" pids_limit = 100 command = "/local/rotate.sh" } template { data = <<_EOT [[ tmpl.Exec "monitoring/agent/rotate.sh" | replaceAll "${" "$${" ]] _EOT destination = "local/rotate.sh" uid = 100000 gid = 100000 perms = 755 } volume_mount { volume = "nomad" destination = "/nomad" } resources { cpu = 10 memory = 10 memory_max = 30 } } [[- end ]] } [[- if .monitoring.agent.node_exporter.enabled ]] # This group runs the prometheus node-exporter to expose prometheus metrics from the node group "node-exporter" { [[- $c := merge .monitoring.agent.node_exporter .monitoring.agent .monitoring . ]] network { mode = "bridge" port "metrics" {} } [[ template "common/volumes" $c ]] service { name = "node-exporter[[.consul.suffix ]]" [[ template "common/service_meta" $c ]] } task "node-exporter" { driver = "[[ $c.nomad.driver ]]" user = 100320 config { image = "[[ $c.image ]]" pid_mode = "host" userns_mode = "host" readonly_rootfs = true pids_limit = 50 command = "/usr/local/bin/node_exporter" args = [ "--path.rootfs=/host", "--web.config.file=/local/tls.yml", "--web.listen-address=:${NOMAD_ALLOC_PORT_metrics}", [[- range $arg := $c.args ]] "[[ $arg ]]", [[- end ]] ] } [[ template "common/vault.policies" $c ]] [[ template "common/metrics_cert" $c ]] [[ template "common/artifacts" $c ]] template { data = <<_EOT [[ template "monitoring/agent/node-exporter.yml" $c ]] _EOT destination = "local/tls.yml" } volume_mount { volume = "host" destination = "/host" read_only = true propagation_mode = "host-to-task" } [[ template "common/resources" $c ]] } } [[- end ]] [[- if .monitoring.agent.consul_agent_exporter.enabled ]] group "consul-agent-exporter" { [[- $c := merge .monitoring.agent.consul_agent_exporter .monitoring.agent .monitoring . ]] [[ template "common/group_start" $c ]] network { mode = "bridge" port "metrics" {} } service { name = "consul-agent" [[ template "common/service_meta" $c ]] } task "consul-agent-metrics-proxy" { driver = "[[ $c.nomad.driver ]]" config { [[ template "common/image" $c ]] [[ template "common/tmpfs" "/tmp" ]] volumes = [ "secrets/nginx.conf:/etc/nginx/conf.d/default.conf:ro" ] } [[ template "common/vault.policies" $c ]] [[ template "common/file_env" $c ]] [[ template "common/metrics_cert" $c ]] template { data = <<_EOT [[ template "monitoring/agent/consul_agent_nginx.conf" $c ]] _EOT destination = "secrets/nginx.conf" } [[ template "common/resources" $c ]] } } [[- end ]] }