monitoring/templates/prometheus/prometheus.yml

268 lines
8.8 KiB
YAML

global:
scrape_interval: 15s
evaluation_interval: 15s
#query_log_file: /dev/stdout
external_labels:
cluster: [[ .consul.domain ]]
env: [[ getenv "NOMAD_NAMESPACE" ]]
rule_files:
- /local/rules/*.yml
alerting:
alertmanagers:
- scheme: https
tls_config:
ca_file: /local/monitoring.ca.pem
cert_file: /secrets/prometheus.bundle.pem
key_file: /secrets/prometheus.bundle.pem
consul_sd_configs:
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
scheme: http
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
datacenter: [[ .consul.datacenter ]]
relabel_configs:
# Only keep alertmanagers
- source_labels: [__meta_consul_service]
action: keep
regex: alertmanager-tls[[ .consul.suffix ]]
scrape_configs:
[[- range $k, $v := .jobs ]]
- job_name: [[ $k ]]
static_configs:
- targets:
[[- range $target := $v.targets ]]
- [[ $target ]]
[[- end ]]
[[- end ]]
[[- if gt (len .exporters.blackbox.http_probes) 0 ]]
{{- if gt (len (service "blackbox-exporter[[ .consul.suffix ]]")) 0 }}
# Blackbox Exporter HTTP targets
- job_name: http_probe
metrics_path: /probe
scheme: https
tls_config:
ca_file: /local/monitoring.ca.pem
cert_file: /secrets/prometheus.bundle.pem
key_file: /secrets/prometheus.bundle.pem
params:
module: ["http_2xx"]
static_configs:
{{- range $idx, $instance := service "blackbox-exporter[[ .consul.suffix ]]" }}
- targets:
[[- range $http_probe := .exporters.blackbox.http_probes ]]
- [[ $http_probe ]]
[[- end ]]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: {{ $instance.Address }}:{{ $instance.Port }}
{{- end }}
{{- end }}
[[- end ]]
[[- if gt (len .exporters.blackbox.tcp_probes) 0 ]]
{{ if gt (len (service "blackbox-exporter[[ .consul.suffix ]]")) 0 }}
# Blackbox Exporter TCP targets
- job_name: tcp_probe
metrics_path: /probe
scheme: https
tls_config:
ca_file: /local/monitoring.ca.pem
cert_file: /secrets/prometheus.bundle.pem
key_file: /secrets/prometheus.bundle.pem
params:
module: ["tcp_connect"]
static_configs:
{{- range $idx, $instance := service "blackbox-exporter[[ .consul.suffix ]]" }}
- targets:
[[- range $target := .exporters.blackbox.tcp_probes ]]
- [[ $target ]]
[[- end ]]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: {{ $instance.Address }}:{{ $instance.Port }}
{{- end }}
{{- end }}
[[- end ]]
[[- if gt (len .exporters.ping.probes) 0 ]]
# Ping exporter
- job_name: ping
scheme: https
tls_config:
ca_file: /local/monitoring.ca.pem
cert_file: /secrets/prometheus.bundle.pem
key_file: /secrets/prometheus.bundle.pem
static_configs:
{{- range $idx, $instance := service "ping-exporter[[ .consul.suffix ]]" }}
- targets: ["{{ $instance.Address }}:{{ $instance.Port }}"]
{{- end }}
[[- end ]]
# Cluster services
- job_name: cluster-services
scheme: https
tls_config:
ca_file: /local/monitoring.ca.pem
cert_file: /secrets/prometheus.bundle.pem
key_file: /secrets/prometheus.bundle.pem
consul_sd_configs:
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
scheme: http
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
datacenter: [[ .consul.datacenter ]]
relabel_configs:
# Drop anything which is not Nomad, Consul or Vault
# Other services will be monitored with another job
- source_labels: [__meta_consul_service]
action: keep
regex: (nomad(\-client)?|consul|vault)
- source_labels: [__meta_consul_service,__meta_consul_node]
regex: (.+);(.+)
replacement: ${1}/${2}
target_label: __metrics_path__
- source_labels: [__meta_consul_service]
regex: (.+)
replacement: {{ range $idx, $instance := service "cluster-exporter[[ .consul.suffix ]]" }}{{ if eq $idx 0 }}{{ .Address }}:{{ .Port }}{{ end }}{{ end }}
target_label: __address__
# Rewrite the job labels to the name of the service
- source_labels: [__meta_consul_service]
regex: (.+)
replacement: ${1}
target_label: job
# Rewrite the instance labels
- source_labels: [__meta_consul_node]
regex: (.+)
replacement: ${1}
target_label: instance
# regular services discovered from the Consul Catalog
- job_name: consul-services
scheme: https
tls_config:
ca_file: /local/monitoring.ca.pem
cert_file: /secrets/prometheus.bundle.pem
key_file: /secrets/prometheus.bundle.pem
consul_sd_configs:
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
scheme: http
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
datacenter: [[ .consul.datacenter ]]
relabel_configs:
# Drop sidecar's service to prevent duplicate. Sidecar themselves are treated in another job
- source_labels: [__meta_consul_service]
action: drop
regex: (.+)-sidecar-proxy
# Drop Nomad, Consul and vault, already handled
- source_labels: [__meta_consul_service]
action: drop
regex: (nomad(\-client)?|consul|vault)
[[- if not (has .namespaces "*") ]]
# Only monitor services from the namespace configured
- source_labels: [__meta_consul_service_metadata_namespace]
regex: ^[[ $namespaces := coll.Slice ]][[ range $ns := .namespaces ]][[ $ns = $ns | regexp.Replace "^\\*$" ".+" ]][[ $namespaces = append $ns $namespaces ]][[ end ]][[ join $namespaces "|" ]]$
action: keep
[[- end ]]
# Only keep services having a metrics-port set
- source_labels: [__meta_consul_service_metadata_metrics_port]
regex: \d+
action: keep
# Get metrics path from metadata
- source_labels: [__meta_consul_service_metadata_metrics_path]
target_label: __metrics_path__
regex: (.+)
# Rewrite the scheme if needed
- source_labels: [__meta_consul_service_metadata_metrics_scheme]
regex: (https?)
replacement: ${1}
target_label: __scheme__
# Rewrite the address to use the metrics port
- source_labels: [__address__, __meta_consul_service_metadata_metrics_port]
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: ${1}:${2}
target_label: __address__
# Rewrite the job labels to the name of the service
- source_labels: [__meta_consul_service]
regex: (.+)
replacement: ${1}
target_label: job
# Set the default alloc to 0 if not set
- source_labels: [__meta_consul_service_metadata_alloc]
regex: ^$
replacement: 0
target_label: __meta_consul_service_metadata_alloc
# Rewerite the instance label to be service-alloc
- source_labels: [__meta_consul_service, __meta_consul_service_metadata_alloc]
regex: (.+);([a-zA-Z\d\-\.]+)
replacement: ${1}-${2}
target_label: instance
# envoy sidecars from consul
- job_name: consul-envoy-services
consul_sd_configs:
- server: {{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
scheme: http
token: {{ with secret "consul/creds/prometheus[[ .consul.suffix ]]" }}{{ .Data.token }}{{ end }}
datacenter: [[ .consul.datacenter ]]
relabel_configs:
# Only keep sidecar-service with a envoy-metrics-port defined
- source_labels: [__meta_consul_service, __meta_consul_service_metadata_envoy_metrics_port]
action: keep
regex: (.+)-sidecar-proxy;\d+
# Rewrite the address to use the envoy-metrics-port
- source_labels: [__address__, __meta_consul_service_metadata_envoy_metrics_port]
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: ${1}:${2}
target_label: __address__
# Rewrite the job label
- source_labels: [__meta_consul_service]
regex: (.+)
replacement: ${1}
target_label: job
# Set the default alloc to 0 if not set
- source_labels: [__meta_consul_service_metadata_alloc]
regex: ^$
replacement: 0
target_label: __meta_consul_service_metadata_alloc
# Rewerite the instance label to be service-alloc
- source_labels: [__meta_consul_service, __meta_consul_service_metadata_alloc]
regex: (.+);([a-zA-Z\d\-\.]+)
replacement: ${1}-${2}
target_label: instance