postgres/postgres.nomad.hcl

413 lines
11 KiB
HCL

[[ $c := merge .pg.server . -]]
job [[ .pg.instance | toJSON ]] {
[[ template "common/job_start" $c ]]
group "server" {
ephemeral_disk {
# Use minimal ephemeral disk
size = 101
}
# Force different instances to run on distinct nodes
constraint {
operator = "distinct_hosts"
value = "true"
}
count = [[ .pg.server.recovery | ternary 1 .pg.server.count ]]
network {
mode = "bridge"
[[- if $c.prometheus.enabled ]]
port "metrics" {}
port "patroni-metrics" {}
[[- end ]]
# Patroni API for node to check each others
port "patroni" {
to = 8080
}
# When running with patroni, nodes must reach each others postgres service
port "postgres" {
to = 5432
}
}
update {
# Set super high deadlines as recovery can take lots of time
healthy_deadline = "48h"
progress_deadline = "72h"
}
[[- if not .pg.server.recovery ]]
service {
name = "[[ .pg.instance ]][[ $c.consul.suffix ]]"
port = 5432
[[- template "common/prometheus_meta" $c ]]
[[- template "common/connect" $c ]]
tags = [
"postgres-${NOMAD_ALLOC_INDEX}",
[[- if .pg.server.traefik.enabled ]]
# Note : we don't add traefik.enable=true
# This will be done dynamically only on the current master node using the update_tags.sh script
"[[ $c.traefik.instance ]].tcp.routers.[[ .pg.instance ]][[ $c.consul.suffix ]].rule=HostSNI(`[[ if has .pg.server "public_url" ]][[ (urlParse .pg.server.public_url).Hostname ]][[ else ]]*[[ end ]]`)",
"[[ $c.traefik.instance ]].tcp.routers.[[ .pg.instance ]][[ $c.consul.suffix ]].tls=true",
"[[ $c.traefik.instance ]].tcp.routers.[[ .pg.instance ]][[ $c.consul.suffix ]].entrypoints=[[ join $c.traefik.entrypoints "," ]]",
[[- if gt (len .pg.server.traefik.middlewares) 0 ]]
"[[ $c.traefik.instance ]].tcp.routers.[[ .pg.instance ]][[ $c.consul.suffix ]].middlewares=[[ join $c.traefik.middlewares "," ]]",
[[- end ]]
[[- end ]]
]
# Use patroni health endpoint to verify postgres status
check {
name = "healthy"
type = "http"
port = "patroni"
path = "/health"
protocol = "https"
interval = "20s"
timeout = "10s"
# Patroni REST API is using a cert from a private CA
tls_skip_verify = true
}
# This check will ensure the current role is published in Consul tags (if the callback during a role change failed for example)
check {
name = "tags"
type = "script"
command = "/bin/sh"
args = [
"-c",
"ROLE=$(curl -k https://localhost:${NOMAD_PORT_patroni}/health | jq -r .role) && /local/update_tags.sh ensure_tags ${ROLE}"
]
task = "postgres"
interval = "60s"
timeout = "10s"
}
check {
name = "ready"
type = "script"
interval = "30s"
timeout = "10s"
task = "postgres"
command = "pg_isready"
}
# Patroni will run a script to update the tags (master / replica)
enable_tag_override = true
}
[[- if $c.prometheus.enabled ]]
# This service is just used to expose patroni metrics
service {
name = "[[ .pg.instance ]]-patroni[[ $c.consul.suffix ]]"
port = "patroni"
meta {
metrics-port = "${NOMAD_HOST_PORT_patroni_metrics}"
alloc = "${NOMAD_ALLOC_INDEX}"
}
}
[[- end ]]
[[- end ]]
volume "data" {
type = [[ .pg.volumes.data.type | toJSON ]]
source = [[ .pg.volumes.data.source | toJSON ]]
[[- if ne .pg.volumes.data.type "host" ]]
access_mode = "single-node-writer"
attachment_mode = "file-system"
[[- end ]]
per_alloc = true
}
volume "backup" {
type = [[ .pg.volumes.backup.type | toJSON ]]
source = [[ .pg.volumes.backup.source | toJSON ]]
[[- if ne .pg.volumes.backup.type "host" ]]
access_mode = "multi-node-multi-writer"
attachment_mode = "file-system"
[[- end ]]
}
task "postgres" {
driver = [[ $c.nomad.driver | toJSON ]]
leader = true
[[- if not .pg.server.recovery ]]
kill_timeout = "10m"
[[- end ]]
config {
image = [[ .pg.server.image | toJSON ]]
# Set shm_size to half of the total size
shm_size = [[ mul .pg.server.resources.memory 524288 ]]
volumes = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"]
[[- if .pg.server.recovery ]]
# Running in recovery mode : just start the container and wait
# so we can enter it and manually recover what we need to
command = "sleep"
args = ["infinity"]
[[- else ]]
command = "patroni"
args = ["/secrets/patroni.yml"]
pids_limit = 700
[[- end ]]
}
vault {
policies = ["[[ .pg.instance ]][[ $c.consul.suffix ]]"]
env = false
disable_file = true
}
[[- template "common/file_env" $c.env ]]
template {
data = <<_EOT
# Get a Consul token from vault, so we're able to update the tags in Consul from the containers
CONSUL_HTTP_TOKEN={{ with secret "[[ .vault.prefix ]]consul/creds/[[ .pg.instance ]]" }}{{ .Data.token }}{{ end }}
PATRONICTL_CONFIG_FILE=/secrets/patroni.yml
PGBACKREST_STANZA=[[ .pg.instance ]]
_EOT
destination = "secrets/pg.env"
uid = 100000
gid = 100026
perms = 440
change_mode = "noop"
env = true
}
# Scripts to update tags attached to the service in consul catalog
# with either master or replica
template {
data =<<_EOT
[[ template "postgres/serviceformat.jq.tpl" . ]]
_EOT
destination = "local/serviceformat.jq"
change_mode = "noop"
}
template {
data =<<_EOT
[[ template "postgres/update_tags.sh.tpl" . ]]
_EOT
destination = "local/update_tags.sh"
perms = 755
change_mode = "noop"
}
# A small entrypoint scriptlet to ensure /alloc/data/postgres dir exists
template {
data =<<_EOT
#!/bin/sh
set -eu
mkdir -p /alloc/data/postgres
_EOT
destination = "local/mkdir-socket.sh"
perms = 755
}
# Patroni main configuration file
template {
data =<<_EOT
[[ template "postgres/patroni.yml.tpl" . ]]
_EOT
destination = "secrets/patroni.yml"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# Post bootstrap script, to create users
template {
data =<<_EOT
[[ template "postgres/create_users.sh.tpl" . ]]
_EOT
destination = "secrets/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Post bootstrap wrapper, as /secrets is mounted with noexec
template {
data =<<_EOT
#!/bin/sh
set -euo pipefail
sh /secrets/create_users.sh
_EOT
destination = "local/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Obtain a certificate from Vault
template {
data = <<_EOT
{{ with pkiCert
"[[ .vault.prefix ]]pki/[[ .pg.instance ]]/issue/postgres-server"
"common_name=[[ .pg.instance ]].service.[[ .consul.domain ]]"
(printf "ip_sans=%s" (env "NOMAD_IP_patroni")) "ttl=72h" }}
{{ .Cert }}
{{ .Key }}
{{ end }}
_EOT
destination = "secrets/postgres.bundle.pem"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# CA certificate chains
template {
data = <<_EOT
{{ with secret "[[ .vault.prefix ]]pki/[[ .pg.instance ]]/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
_EOT
destination = "local/postgres.ca.pem"
change_mode = "signal"
change_signal = "SIGHUP"
}
# Mount the persistent volume in /data
volume_mount {
volume = "data"
destination = "/data"
}
# Mount the backup volume (which can be used for PITR with pgbackrest)
volume_mount {
volume = "backup"
destination = "/backup"
}
[[ template "common/resources" .pg.server.resources ]]
}
[[- if and (not .pg.server.recovery) (or .pg.backup.pgbackrest.enabled .pg.backup.dumps.enabled) ]]
[[ $c = merge .pg.backup .pg.server . -]]
task "backup" {
driver = [[ $c.nomad.driver | toJSON ]]
config {
image = [[ .pg.server.image | toJSON ]]
command = "supercronic"
args = [
"/secrets/backup.cron"
]
readonly_rootfs = true
pids_limit = 100
}
lifecycle {
hook = "poststart"
sidecar = true
}
env {
PGBACKREST_STANZA = [[ .pg.instance | toJSON ]]
# Use the socket from the shared dir
PGHOST = "/alloc/data/postgres"
}
[[- template "common/file_env" $c.env ]]
template {
data =<<_EOT
[[ template "postgres/backup.cron.tpl" . ]]
_EOT
destination = "secrets/backup.cron"
}
template {
data =<<_EOT
[[ template "postgres/dump.sh.tpl" $c ]]
_EOT
destination = "local/dump.sh"
uid = 100000
gid = 100000
perms = 755
}
volume_mount {
volume = "backup"
destination = "/backup"
}
volume_mount {
volume = "data"
destination = "/data"
}
[[ template "common/resources" .pg.backup.resources ]]
}
[[- end ]]
[[- if and $c.prometheus.enabled (not .pg.server.recovery) ]]
[[ template "common/task.metrics_proxy" merge (dict "prometheus" (dict "additional_proxy_conf" (tmpl.Exec "postgres/nginx_patroni.conf.tpl" $c))) $c ]]
task "exporter" {
driver = [[ $c.nomad.driver | toJSON ]]
user = 9187
lifecycle {
hook = "poststart"
sidecar = true
}
config {
image = [[ .pg.exporter.image | toJSON ]]
args = [
"--web.listen-address=127.0.0.1:9187"
]
readonly_rootfs = true
pids_limit = 20
}
vault {
policies = ["[[ .pg.instance ]][[ $c.consul.suffix ]]"]
env = false
disable_file = true
}
env {
[[ template "common/env" .pg.exporter.env ]]
}
template {
data = <<_EOT
PG_EXPORTER_AUTO_DISCOVER_DATABASES=true
DATA_SOURCE_URI=127.0.0.1:5432/postgres
DATA_SOURCE_USER=monitor
DATA_SOURCE_PASS={{ with secret "[[ .vault.prefix ]]kv/service/postgres" }}{{ .Data.data.monitor_pwd | sprig_squote }}{{ end }}
_EOT
destination = "secrets/env"
perms = "0400"
uid = 100000
gid = 100000
env = true
}
resources {
cpu = 10
memory = 64
}
}
[[- end ]]
}
}