postgres/postgres-server.nomad.hcl

364 lines
10 KiB
HCL

job "[[ .instance ]]-server" {
[[- $c := merge .pg . ]]
[[ template "common/job_start" $c ]]
group "server" {
[[- $c := merge $c.server $c ]]
[[- /* Ensure count is 1 when in recovery mode */]]
[[ template "common/group_start" merge (dict "count" (ternary 1 $c.count $c.recovery)) $c ]]
network {
mode = "bridge"
[[- if conv.ToBool $c.prometheus.enabled ]]
port "metrics" {}
port "patroni-metrics" {}
[[- end ]]
# Patroni API for nodes to check each others
port "patroni" {
to = 8080
}
# When running with patroni, nodes must reach each others postgres service, so we expose a port
port "postgres" {
to = 5432
}
}
[[- if not $c.recovery ]]
service {
name = "[[ .instance ]][[ $c.consul.suffix ]]"
port = 5432
[[ template "common/service_meta" $c ]]
[[ template "common/connect" $c ]]
tags = [
"postgres-${NOMAD_ALLOC_INDEX}",
[[- if $c.traefik.enabled ]]
# Note : we don't add traefik.enable=true
# This will be done dynamically only on the current master node using the update_tags.sh script
"[[ $c.traefik.instance ]].tcp.routers.[[ .instance ]][[ .consul.suffix ]].rule=HostSNI(`[[ if has .pg.server "public_url" ]][[ (urlParse .pg.server.public_url).Hostname ]][[ else ]]*[[ end ]]`)",
"[[ $c.traefik.instance ]].tcp.routers.[[ .instance ]][[ .consul.suffix ]].tls=true",
"[[ $c.traefik.instance ]].tcp.routers.[[ .instance ]][[ .consul.suffix ]].entrypoints=[[ join $c.traefik.entrypoints "," ]]",
[[- if gt (len $c.traefik.tcp_middlewares) 0 ]]
"[[ $c.traefik.instance ]].tcp.routers.[[ .instance ]][[ .consul.suffix ]].middlewares=[[ range $k, $v := $c.traefik.tcp_middlewares ]][[ $v ]][[ end ]]",
[[- end ]]
[[- end ]]
]
# Use patroni health endpoint to verify postgres status
check {
name = "healthy"
type = "http"
port = "patroni"
path = "/health"
protocol = "https"
interval = "[[ $c.consul.check.interval ]]"
timeout = "[[ $c.consul.check.timeout ]]"
# Patroni REST API is using a cert from a private CA
tls_skip_verify = true
}
# This check will ensure the current role is published in Consul tags (if the callback during a role change failed for example)
check {
name = "tags"
type = "script"
command = "/local/update_tags.sh"
task = "postgres"
interval = "[[ $c.consul.check.interval ]]"
timeout = "[[ $c.consul.check.timeout ]]"
}
check {
name = "ready"
type = "script"
interval = "[[ $c.consul.check.interval ]]"
timeout = "[[ $c.consul.check.timeout ]]"
task = "postgres"
command = "pg_isready"
}
# Patroni will run a script to update the tags (master / replica)
enable_tag_override = true
}
[[- if conv.ToBool $c.prometheus.enabled ]]
# This service is just used to expose patroni metrics
service {
name = "[[ .instance ]]-patroni[[ $c.consul.suffix ]]"
port = "patroni"
meta {
metrics-port = "${NOMAD_HOST_PORT_patroni_metrics}"
alloc = "${NOMAD_ALLOC_INDEX}"
}
}
[[- end ]]
[[- end ]]
[[ template "common/volumes" $c ]]
task "postgres" {
driver = "[[ $c.nomad.driver ]]"
leader = true
[[- if not $c.recovery ]]
kill_timeout = "10m"
[[- end ]]
config {
image = "[[ $c.image ]]"
# Set shm_size to half of the total size
shm_size = [[ mul $c.resources.memory 524288 ]]
volumes = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"]
[[- if $c.recovery ]]
# Running in recovery mode : just start the container and wait
# so we can enter it and manually recover what we need to
command = "sleep"
args = ["infinity"]
[[- else ]]
command = "patroni"
args = ["/secrets/patroni.yml"]
pids_limit = 700
[[- end ]]
}
[[ template "common/vault.policies" $c ]]
[[ template "common/file_env" $c ]]
template {
data = <<_EOT
# Get a Consul token from vault, so we're able to update the tags in Consul from the containers
CONSUL_HTTP_TOKEN={{ with secret "[[ .vault.root ]]consul/creds/[[ .instance ]]" }}{{ .Data.token }}{{ end }}
PATRONICTL_CONFIG_FILE=/secrets/patroni.yml
_EOT
destination = "secrets/pg.env"
uid = 100000
gid = 100026
perms = 440
change_mode = "noop"
env = true
}
# Scripts to update tags attached to the service in consul catalog
# with either master or replica
template {
data =<<_EOT
[[ template "postgres/serviceformat.jq.tpl" $c ]]
_EOT
destination = "local/serviceformat.jq"
change_mode = "noop"
}
template {
data =<<_EOT
[[ template "postgres/update_tags.sh.tpl" $c ]]
_EOT
destination = "local/update_tags.sh"
perms = 755
change_mode = "noop"
}
# A small entrypoint scriptlet to ensure /alloc/data/postgres dir exists
template {
data =<<_EOT
#!/bin/sh
set -eu
mkdir -p /alloc/data/postgres
_EOT
destination = "local/mkdir-socket.sh"
perms = 755
}
# Patroni main configuration file
template {
data =<<_EOT
[[ template "postgres/patroni.yml.tpl" $c ]]
_EOT
destination = "secrets/patroni.yml"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# Post bootstrap script, to create users
template {
data =<<_EOT
[[ template "postgres/create_users.sh.tpl" $c ]]
_EOT
destination = "secrets/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Post bootstrap wrapper, as /secrets is mounted with noexec
template {
data =<<_EOT
#!/bin/sh
set -euo pipefail
sh /secrets/create_users.sh
_EOT
destination = "local/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Obtain a certificate from Vault
template {
data = <<_EOT
{{ with pkiCert
"[[ $c.vault.pki.path ]]/issue/postgres-server"
"common_name=[[ .instance ]][[ .consul.suffix ]].service.[[ .consul.domain ]]"
(printf "alt_name=%s.[[ .instance ]][[ .consul.suffix ]].service.[[ .consul.domain ]]" (env "NOMAD_ALLOC_INDEX"))
(printf "ip_sans=%s" (env "NOMAD_IP_patroni")) "ttl=72h" }}
{{ .Cert }}
{{ .Key }}
{{ end }}
_EOT
destination = "secrets/postgres.bundle.pem"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# CA certificate chains
template {
data = <<_EOT
{{ with secret "[[ $c.vault.pki.path ]]/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
_EOT
destination = "local/postgres.ca.pem"
change_mode = "signal"
change_signal = "SIGHUP"
}
# Mount the persistent volume in /data
volume_mount {
volume = "data"
destination = "/data"
}
# Mount the backup volume (which can be used for PITR with pgbackrest)
volume_mount {
volume = "backup"
destination = "/backup"
}
[[ template "common/resources" $c ]]
}
[[- if and (not .pg.server.recovery) (or .pg.backup.pgbackrest.enabled .pg.backup.dumps.enabled) ]]
[[ $b := merge .pg.backup .pg.server .pg . -]]
task "backup" {
driver = "[[ $c.nomad.driver ]]"
config {
image = "[[ $c.image ]]"
command = "minit"
readonly_rootfs = true
pids_limit = 100
}
lifecycle {
hook = "poststart"
sidecar = true
}
env {
# Use the socket from the shared dir
PGHOST = "/alloc/data/postgres"
MINIT_UNIT_DIR = "/local/minit.d"
}
[[- template "common/file_env" $c ]]
template {
data =<<_EOT
[[ template "postgres/backup.minit.yml.tpl" . ]]
_EOT
destination = "local/minit.d/backup.yml"
}
template {
data =<<_EOT
[[ template "postgres/dump.sh.tpl" $c ]]
_EOT
destination = "local/dump.sh"
uid = 100000
gid = 100000
perms = 755
}
volume_mount {
volume = "backup"
destination = "/backup"
}
volume_mount {
volume = "data"
destination = "/data"
}
[[ template "common/resources" $b ]]
}
[[- end ]]
[[- if and (conv.ToBool $c.prometheus.enabled) (not .pg.server.recovery) ]]
[[ $e := merge .pg.exporter .pg . ]]
[[ template "common/task.metrics_proxy" merge (dict "prometheus" (dict "additional_proxy_conf" (tmpl.Exec "postgres/nginx_patroni.conf.tpl" $e))) $c ]]
task "exporter" {
driver = "[[ $e.nomad.driver ]]"
user = 9187
lifecycle {
hook = "poststart"
sidecar = true
}
config {
image = "[[ $e.image ]]"
command = "postgres_exporter"
args = [
"--web.listen-address=127.0.0.1:9187"
]
readonly_rootfs = true
pids_limit = 20
}
[[ template "common/vault.policies" $c ]]
[[ template "common/file_env" $e ]]
template {
data = <<_EOT
PG_EXPORTER_AUTO_DISCOVER_DATABASES=true
DATA_SOURCE_URI=127.0.0.1:5432/postgres
DATA_SOURCE_USER=monitor
DATA_SOURCE_PASS={{ with secret "[[ .vault.root ]]kv/service/[[ .instance ]]" }}{{ .Data.data.monitor_pwd | sprig_squote }}{{ end }}
_EOT
destination = "secrets/env"
perms = "0400"
uid = 100000
gid = 100000
env = true
}
resources {
cpu = 10
memory = 64
}
}
[[- end ]]
}
}