334 lines
8.5 KiB
HCL
334 lines
8.5 KiB
HCL
job "[[ .instance ]]-agent" {
|
|
|
|
[[- $c := merge .monitoring.agent .monitoring . ]]
|
|
|
|
[[ template "common/job_start" $c ]]
|
|
|
|
type = "system"
|
|
|
|
# This group will collect logs from the allocation running on the node
|
|
# It uses nomad-vector-logger to query the Nomad API and discover running allocations
|
|
# and then vector to read logs from all the discovered allocations. Logs are fowarded to loki through the service mesh
|
|
group "logs-collector" {
|
|
|
|
[[ $c := merge $c.vector $c ]]
|
|
|
|
network {
|
|
mode = "bridge"
|
|
port "metrics" {}
|
|
}
|
|
|
|
# Try harder to restart tasks if they fail
|
|
restart {
|
|
attempts = 20
|
|
interval = "5m"
|
|
mode = "delay"
|
|
}
|
|
|
|
[[ template "common/volumes" $c ]]
|
|
|
|
service {
|
|
name = "vector-agent[[ .consul.suffix ]]"
|
|
[[ template "common/service_meta" $c ]]
|
|
[[ template "common/connect" $c ]]
|
|
}
|
|
|
|
task "nomad-vector-logger" {
|
|
[[- $n := merge $c.nomad_vector_logger $c ]]
|
|
|
|
driver = "[[ $n.nomad.driver ]]"
|
|
# Use a random user instead of root
|
|
user = 3987
|
|
|
|
config {
|
|
image = "[[ $n.image ]]"
|
|
readonly_rootfs = true
|
|
pids_limit = 50
|
|
# Nomad Vector Logger needs to run on the host's network namespace
|
|
# so it can reach the Nomad Agent API on localhost:4646
|
|
network_mode = "host"
|
|
# Host network namespace requires disabling user namespace
|
|
userns_mode = "host"
|
|
command = "nomad-vector-logger"
|
|
args = [
|
|
"--config",
|
|
"/local/nomad-vector-logger.toml"
|
|
]
|
|
}
|
|
|
|
# We want to run Nomad Vector Logger before vector agent
|
|
lifecycle {
|
|
hook = "prestart"
|
|
sidecar = true
|
|
}
|
|
|
|
[[ template "common/vault.policies" $n ]]
|
|
[[ template "common/file_env" $n ]]
|
|
|
|
# Env to access Nomad API
|
|
template {
|
|
data = <<_EOT
|
|
NOMAD_TOKEN={{ with secret "nomad/creds/nomad-vector-logger[[ .consul.suffix ]]" }}{{ .Data.secret_id }}{{ end }}
|
|
NOMAD_ADDR=https://localhost:4646
|
|
NOMAD_CLIENT_CERT=/secrets/nomad.bundle.pem
|
|
NOMAD_CLIENT_KEY=/secrets/nomad.bundle.pem
|
|
NOMAD_CACERT=/local/nomad.ca.pem
|
|
_EOT
|
|
destination = "secrets/.nomad-vector-logger.env"
|
|
perms = 400
|
|
env = true
|
|
}
|
|
|
|
# The main configuration file for nomad-vector-logger
|
|
template {
|
|
data = <<_EOT
|
|
[[ template "monitoring/agent/nomad-vector-logger.toml" $n ]]
|
|
_EOT
|
|
destination = "local/nomad-vector-logger.toml"
|
|
}
|
|
|
|
# Disable the default nomad.toml template, as we provide our own nomad.yml template
|
|
template {
|
|
data = "# Disable the default toml template"
|
|
destination = "local/template/nomad.toml"
|
|
}
|
|
|
|
# The vector configuration template used to generate the vector conf
|
|
template {
|
|
data = <<_EOT
|
|
[[ template "monitoring/agent/vector-template.yml" $n ]]
|
|
_EOT
|
|
destination = "local/template/nomad.yml"
|
|
# {{ }} is used by the template, so prevent consul-template to interprete it
|
|
left_delimiter = "{{{"
|
|
right_delimiter = "}}}"
|
|
}
|
|
|
|
# Get a client cert for the Nomad API
|
|
template {
|
|
data = <<_EOT
|
|
{{- with pkiCert "pki/nomad/issue/nomad-vector-logger[[ .consul.suffix ]]"
|
|
"common_name=nomad-vector-logger[[ .consul.suffix ]].nomad.[[ .consul.domain ]]"
|
|
"ttl=72h" }}
|
|
{{ .Cert }}
|
|
{{ .Key }}
|
|
{{- end }}
|
|
_EOT
|
|
destination = "secrets/nomad.bundle.pem"
|
|
uid = 3987
|
|
perms = "0400"
|
|
}
|
|
|
|
# The CA chain to validate Nomad certificates
|
|
template {
|
|
data = <<_EOT
|
|
{{ with secret "pki/nomad/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
|
|
_EOT
|
|
destination = "local/nomad.ca.pem"
|
|
}
|
|
|
|
# The main config file
|
|
volume_mount {
|
|
volume = "nomad"
|
|
destination = "/nomad"
|
|
read_only = true
|
|
}
|
|
|
|
[[ template "common/resources" $n ]]
|
|
}
|
|
|
|
# Nomad Vector Logger can take a few seconds to generate the initial configuration file
|
|
# This task ensure the file exists before vector is started (to prevent an error as the
|
|
# transform_nomad_alloc_* sources won't have anything before the file exists)
|
|
task "wait-for-vector-conf" {
|
|
|
|
driver = "[[ $c.nomad.driver ]]"
|
|
|
|
config {
|
|
image = "busybox:latest"
|
|
readonly_rootfs = true
|
|
pids_limit = 20
|
|
command = "sh"
|
|
args = [
|
|
"-c",
|
|
"echo 'Waiting for config file /alloc/data/vector_conf/nomad.yml to be generated'; until ls /alloc/data/vector_conf/nomad.yml >/dev/null 2>&1; do echo '.'; sleep 1; done"
|
|
]
|
|
}
|
|
|
|
lifecycle {
|
|
hook = "prestart"
|
|
}
|
|
|
|
# The task will shutdown once the config is available, so just
|
|
# allocate very few resources
|
|
resources {
|
|
cpu = 10
|
|
memory = 10
|
|
}
|
|
}
|
|
|
|
# The main vector task, which will read logs using the config file generated by Nomad Vector Logger
|
|
task "vector" {
|
|
|
|
driver = "[[ $c.nomad.driver ]]"
|
|
leader = true
|
|
|
|
config {
|
|
image = "[[ $c.image ]]"
|
|
userns_mode = "host"
|
|
readonly_rootfs = true
|
|
pids_limit = 1000
|
|
args = [
|
|
"--watch-config",
|
|
"--config", "/local/vector.yml",
|
|
"--config-dir", "/alloc/data/vector_conf"
|
|
]
|
|
}
|
|
|
|
[[ template "common/vault.policies" $c ]]
|
|
|
|
env {
|
|
NODE_UNIQUE_NAME = "${node.unique.name}"
|
|
}
|
|
|
|
[[ template "common/metrics_cert" $c ]]
|
|
[[ template "common/artifacts" $c ]]
|
|
|
|
# Main vector configuration
|
|
template {
|
|
data =<<_EOT
|
|
[[ template "monitoring/agent/vector.yml" $c ]]
|
|
_EOT
|
|
destination = "local/vector.yml"
|
|
left_delimiter = "{{{"
|
|
right_delimiter = "}}}"
|
|
|
|
wait {
|
|
min = "5s"
|
|
max = "30s"
|
|
}
|
|
}
|
|
|
|
volume_mount {
|
|
volume = "nomad"
|
|
destination = "/nomad"
|
|
read_only = true
|
|
}
|
|
|
|
volume_mount {
|
|
volume = "data"
|
|
destination = "/data"
|
|
read_only = false
|
|
}
|
|
|
|
[[ template "common/resources" $c ]]
|
|
}
|
|
}
|
|
|
|
[[- if .monitoring.agent.node_exporter.enabled ]]
|
|
|
|
# This group runs the prometheus node-exporter to expose prometheus metrics from the node
|
|
group "node-exporter" {
|
|
|
|
[[- $c := merge .monitoring.agent.node_exporter .monitoring.agent .monitoring . ]]
|
|
|
|
network {
|
|
mode = "bridge"
|
|
port "metrics" {}
|
|
}
|
|
|
|
[[ template "common/volumes" $c ]]
|
|
|
|
service {
|
|
name = "node-exporter[[.consul.suffix ]]"
|
|
[[ template "common/service_meta" $c ]]
|
|
}
|
|
|
|
task "node-exporter" {
|
|
driver = "[[ $c.nomad.driver ]]"
|
|
user = 100320
|
|
|
|
config {
|
|
image = "[[ $c.image ]]"
|
|
pid_mode = "host"
|
|
userns_mode = "host"
|
|
readonly_rootfs = true
|
|
pids_limit = 50
|
|
command = "/usr/local/bin/node_exporter"
|
|
args = [
|
|
"--path.rootfs=/host",
|
|
"--web.config.file=/local/tls.yml",
|
|
"--web.listen-address=:${NOMAD_ALLOC_PORT_metrics}",
|
|
[[- range $arg := $c.args ]]
|
|
"[[ $arg ]]",
|
|
[[- end ]]
|
|
]
|
|
}
|
|
|
|
[[ template "common/vault.policies" $c ]]
|
|
[[ template "common/metrics_cert" $c ]]
|
|
[[ template "common/artifacts" $c ]]
|
|
|
|
template {
|
|
data = <<_EOT
|
|
[[ template "monitoring/agent/node-exporter.yml" $c ]]
|
|
_EOT
|
|
destination = "local/tls.yml"
|
|
}
|
|
|
|
volume_mount {
|
|
volume = "host"
|
|
destination = "/host"
|
|
read_only = true
|
|
propagation_mode = "host-to-task"
|
|
}
|
|
|
|
[[ template "common/resources" $c ]]
|
|
}
|
|
}
|
|
[[- end ]]
|
|
|
|
[[- if .monitoring.agent.consul_agent_exporter.enabled ]]
|
|
|
|
group "consul-agent-exporter" {
|
|
[[- $c := merge .monitoring.agent.consul_agent_exporter .monitoring.agent .monitoring . ]]
|
|
|
|
[[ template "common/group_start" $c ]]
|
|
|
|
network {
|
|
mode = "bridge"
|
|
port "metrics" {}
|
|
}
|
|
|
|
service {
|
|
name = "consul-agent"
|
|
[[ template "common/service_meta" $c ]]
|
|
}
|
|
|
|
task "consul-agent-metrics-proxy" {
|
|
driver = "[[ $c.nomad.driver ]]"
|
|
|
|
config {
|
|
[[ template "common/image" $c ]]
|
|
[[ template "common/tmpfs" "/tmp" ]]
|
|
volumes = [
|
|
"secrets/nginx.conf:/etc/nginx/conf.d/default.conf:ro"
|
|
]
|
|
}
|
|
|
|
[[ template "common/vault.policies" $c ]]
|
|
[[ template "common/file_env" $c ]]
|
|
[[ template "common/metrics_cert" $c ]]
|
|
template {
|
|
data = <<_EOT
|
|
[[ template "monitoring/agent/consul_agent_nginx.conf" $c ]]
|
|
_EOT
|
|
destination = "secrets/nginx.conf"
|
|
}
|
|
|
|
[[ template "common/resources" $c ]]
|
|
}
|
|
}
|
|
[[- end ]]
|
|
}
|