postgres/example/postgres.nomad.hcl

489 lines
13 KiB
HCL

job "postgres" {
datacenters = ["dc1"]
priority = 80
group "server" {
ephemeral_disk {
# Use minimal ephemeral disk
size = 101
}
# Force different instances to run on distinct nodes
constraint {
operator = "distinct_hosts"
value = "true"
}
count = "1"
network {
mode = "bridge"
# Patroni API for node to check each others
port "patroni" {
to = 8080
}
# When running with patroni, nodes must reach each others postgres service
port "postgres" {
to = 5432
}
}
update {
# Set super high deadlines as recovery can take lots of time
healthy_deadline = "48h"
progress_deadline = "72h"
}
service {
name = "postgres"
port = 5432
connect {
sidecar_service {
}
sidecar_task {
config {
args = [
"-c",
"${NOMAD_SECRETS_DIR}/envoy_bootstrap.json",
"-l",
"${meta.connect.log_level}",
"--concurrency",
"${meta.connect.proxy_concurrency}",
"--disable-hot-restart"
]
}
resources {
cpu = 50
memory = 64
}
}
}
tags = [
"postgres-${NOMAD_ALLOC_INDEX}",
# Note : we don't add traefik.enable=true
# This will be done dynamically only on the current master node using the update_tags.sh script
"traefik.tcp.routers.postgres.rule=HostSNI(`*`)",
"traefik.tcp.routers.postgres.tls=true",
"traefik.tcp.routers.postgres.entrypoints=postgres",
]
# Use patroni health endpoint to verify postgres status
check {
name = "healthy"
type = "http"
port = "patroni"
path = "/health"
protocol = "https"
interval = "20s"
timeout = "10s"
# Patroni REST API is using a cert from a private CA
tls_skip_verify = true
}
# This check will ensure the current role is published in Consul tags (if the callback during a role change failed for example)
check {
name = "tags"
type = "script"
command = "/bin/sh"
args = [
"-c",
"ROLE=$(curl -k https://localhost:${NOMAD_PORT_patroni}/health | jq -r .role) && /local/update_tags.sh ensure_tags ${ROLE}"
]
task = "postgres"
interval = "60s"
timeout = "10s"
}
check {
name = "ready"
type = "script"
interval = "30s"
timeout = "10s"
task = "postgres"
command = "pg_isready"
}
# Patroni will run a script to update the tags (master / replica)
enable_tag_override = true
}
volume "backup" {
source = "postgres-backup"
type = "csi"
access_mode = "multi-node-multi-writer"
attachment_mode = "file-system"
}
volume "data" {
source = "postgres-data"
type = "csi"
access_mode = "single-node-writer"
attachment_mode = "file-system"
per_alloc = true
}
task "postgres" {
driver = "docker"
leader = true
kill_timeout = "10m"
config {
image = "danielberteaud/patroni:15.24.1-2"
# Set shm_size to half of the total size
shm_size = 536870912
volumes = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"]
command = "patroni"
args = ["/secrets/patroni.yml"]
pids_limit = 700
}
vault {
policies = ["postgres"]
env = false
disable_file = true
}
# Use a template block instead of env {} so we can fetch values from vault
template {
data = <<_EOT
LANG=fr_FR.utf8
PGBACKREST_STANZA=postgres
TZ=Europe/Paris
_EOT
destination = "secrets/.env"
perms = 400
env = true
}
template {
data = <<_EOT
# Get a Consul token from vault, so we're able to update the tags in Consul from the containers
CONSUL_HTTP_TOKEN={{ with secret "/consul/creds/postgres" }}{{ .Data.token }}{{ end }}
PATRONICTL_CONFIG_FILE=/secrets/patroni.yml
_EOT
destination = "secrets/pg.env"
uid = 100000
gid = 100026
perms = 440
change_mode = "noop"
env = true
}
# Scripts to update tags attached to the service in consul catalog
# with either master or replica
template {
data = <<_EOT
def nest_service_params:
# Select all objects with keys that start with the the name 'Service'
# Remove 'Service' prefix from key names
with_entries(select(.key | startswith("Service")) | .key = (.key | sub("^Service"; "")))
;
def create_svc_txn:
# Select our own instance
select(.ServiceTags[] | contains($mytag))
# Add needed tags
| .ServiceTags -= ["master", "replica", "uninitialized"]
| .ServiceTags += [$role]
| if $role == "master" then .ServiceTags += ["traefik.enable=true"] else .ServiceTags -= ["traefik.enable=true"] end
# Rewrite keys to remove the Service prefix
| nest_service_params as $nested_params
| $nested_params
;
. | map(create_svc_txn) | .[]
_EOT
destination = "local/serviceformat.jq"
change_mode = "noop"
}
template {
data = <<_EOT
#!/bin/sh
# vim: syntax=sh
set -eo pipefail
EVENT=$1
NEW_ROLE=$2
source /secrets/pg.env
# translate promoted = master and demoted = recplica
if [ "${NEW_ROLE}" = "promoted" ]; then
NEW_ROLE="master"
elif [ "${NEW_ROLE}" = "demoted" ]; then
NEW_ROLE="replica"
fi
CURL_OPTS="--connect-timeout 5 --max-time 10 --retry 5 --retry-delay 1 --retry-max-time 40 --retry-connrefused"
# Update tags on the main service
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres |\
jq --from-file /local/serviceformat.jq --arg role ${NEW_ROLE} --arg mytag postgres-{{ env "NOMAD_ALLOC_INDEX" }} |\
curl ${CORL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" -X PUT -d @- http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/agent/service/register
# Update tags on the sidecar service (connect-proxy)
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres-sidecar-proxy |\
jq --from-file /local/serviceformat.jq --arg role ${NEW_ROLE} --arg mytag postgres-{{ env "NOMAD_ALLOC_INDEX" }} |\
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" -X PUT -d @- http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/agent/service/register
_EOT
destination = "local/update_tags.sh"
perms = 755
change_mode = "noop"
}
# A small entrypoint scriptlet to ensure /alloc/data/postgres dir exists
template {
data = <<_EOT
#!/bin/sh
set -eu
mkdir -p /alloc/data/postgres
_EOT
destination = "local/mkdir-socket.sh"
perms = 755
}
# Patroni main configuration file
template {
data = <<_EOT
name: postgres-{{ env "NOMAD_ALLOC_INDEX" }}
scope: postgres
consul:
url: http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
token: {{ with secret "consul/creds/postgres" }}{{ .Data.token }}{{ end }}
bootstrap:
dcs:
synchronous_mode: False
initdb:
- data-checksum
- encoding: UTF-8
#- locale-provider: icu
#- icu-locale: fr_FR.utf8
post_bootstrap: /local/create_users.sh
postgresql:
create_replica_methods:
- basebackup
callbacks:
on_role_change: /local/update_tags.sh
on_start: /local/update_tags.sh
connect_address: {{ env "NOMAD_HOST_ADDR_postgres" }}
bin_dir: /usr/pgsql-15/bin
data_dir: /data/db/15
listen: 0.0.0.0:{{ env "NOMAD_ALLOC_PORT_postgres" }}
use_pg_rewind: True
#remove_data_directory_on_rewind_failure: True
pg_hba:
- local all postgres peer
- local replication postgres peer
- local all postgres scram-sha-256
- host all all 127.0.0.0/8 scram-sha-256
- host replication backup 127.0.0.0/8 scram-sha-256
- hostssl replication replicator 0.0.0.0/0 cert clientcert=verify-full map=patroni-map
- hostssl postgres rewind 0.0.0.0/0 cert clientcert=verify-full map=patroni-map
- hostssl all all 0.0.0.0/0 cert clientcert=verify-full
pg_ident:
- patroni-map postgres.service.consul postgres
- patroni-map postgres.service.consul replicator
- patroni-map postgres.service.consul rewind
parameters:
ssl: on
ssl_cert_file: /secrets/postgres.bundle.pem
ssl_key_file: /secrets/postgres.bundle.pem
ssl_ca_file: /local/postgres.ca.pem
#ssl_crl_file: /local/postgres.crl.pem
# Add a socket in /alloc/data/postgres
# so other tasks in the same group can reach it
unix_socket_directories: /run/postgresql, /alloc/data/postgres
autovacuum_analyze_scale_factor: 0.05
autovacuum_analyze_threshold: 500
autovacuum_vacuum_scale_factor: 0.1
autovacuum_vacuum_threshold: 500
datestyle: ISO, DMY
log_connections: on
log_destination: stderr
log_directory: /proc/1/fd
log_disconnections: on
log_filename: 1
log_line_prefix: '[%m] u=%u,d=%d,a=%a,c=%h,xid=%x '
log_min_duration_statement: 2000
log_statement: ddl
log_timezone: {{ env "TZ" }}
maintenance_work_mem: 51MB
shared_buffers: 512MB
timezone: {{ env "TZ" }}
wal_compression: zstd
wal_keep_size: 512
work_mem: 10MB
recovery_conf:
authentication:
superuser:
username: postgres
password: '{{ with secret "/kv/service/postgres" }}{{ .Data.data.pg_pwd }}{{ end }}'
sslmode: verify-ca
sslrootcert: /local/postgres.ca.pem
replication:
username: replicator
sslmode: verify-ca
sslrootcert: /local/postgres.ca.pem
sslcert: /secrets/postgres.bundle.pem
sslkey: /secrets/postgres.bundle.pem
rewind:
username: rewind
sslmode: verify-ca
sslrootcert: /local/postgres.ca.pem
sslcert: /secrets/postgres.bundle.pem
sslkey: /secrets/postgres.bundle.pem
restapi:
connect_address: {{ env "NOMAD_HOST_ADDR_patroni" }}
listen: 0.0.0.0:{{ env "NOMAD_ALLOC_PORT_patroni" }}
keyfile: /secrets/postgres.bundle.pem
certfile: /secrets/postgres.bundle.pem
cafile: /local/postgres.ca.pem
verify_client: optional
authentication:
username: patroni
password: '{{ with secret "/kv/service/postgres" }}{{ .Data.data.api_pwd }}{{ end }}'
ctl:
insecure: False
keyfile: /secrets/postgres.bundle.pem
certfile: /secrets/postgres.bundle.pem
cafile: /local/postgres.ca.pem
watchdog:
mode: off
_EOT
destination = "secrets/patroni.yml"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# Post bootstrap script, to create users
template {
data = <<_EOT
#!/bin/sh
set -euo pipefail
# Create roles needed for patroni
{{ with secret "/kv/service/postgres" }}
psql <<'_EOSQL'
ALTER ROLE postgres WITH SUPERUSER LOGIN PASSWORD '{{ .Data.data.pg_pwd }}';
CREATE ROLE replicator WITH LOGIN REPLICATION PASSWORD '{{ .Data.data.replicator_pwd }}';
CREATE ROLE rewind WITH LOGIN PASSWORD '{{ .Data.data.rewind_pwd }}';
CREATE ROLE vault WITH LOGIN SUPERUSER PASSWORD '{{ .Data.data.vault_initial_pwd }}';
CREATE ROLE monitor WITH LOGIN PASSWORD '{{ .Data.data.monitor_pwd }}';
GRANT "pg_monitor" TO "monitor";
_EOSQL
{{ end }}
_EOT
destination = "secrets/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Post bootstrap wrapper, as /secrets is mounted with noexec
template {
data = <<_EOT
#!/bin/sh
set -euo pipefail
sh /secrets/create_users.sh
_EOT
destination = "local/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Obtain a certificate from Vault
template {
data = <<_EOT
{{ with pkiCert
"/pki/postgres/issue/postgres-server"
"common_name=postgres.service.consul"
(printf "alt_name=%s.postgres.service.consul" (env "NOMAD_ALLOC_INDEX"))
(printf "ip_sans=%s" (env "NOMAD_IP_patroni")) "ttl=72h" }}
{{ .Cert }}
{{ .Key }}
{{ end }}
_EOT
destination = "secrets/postgres.bundle.pem"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# CA certificate chains
template {
data = <<_EOT
{{ with secret "/pki/postgres/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
_EOT
destination = "local/postgres.ca.pem"
change_mode = "signal"
change_signal = "SIGHUP"
}
# Mount the persistent volume in /data
volume_mount {
volume = "data"
destination = "/data"
}
# Mount the backup volume (which can be used for PITR with pgbackrest)
volume_mount {
volume = "backup"
destination = "/backup"
}
resources {
cpu = 1000
memory = 1024
}
}
}
}