postgres/example/postgres-server.nomad.hcl

549 lines
16 KiB
HCL

job "postgres-server" {
datacenters = ["dc1"]
region = "global"
priority = 80
group "server" {
count = 1
shutdown_delay = "6s"
constraint {
operator = "distinct_hosts"
value = "true"
}
update {
healthy_deadline = "2h"
progress_deadline = "3h"
}
ephemeral_disk {
# Use minimal ephemeral disk
size = 101
}
network {
mode = "bridge"
# Patroni API for nodes to check each others
port "patroni" {
to = 8080
}
# When running with patroni, nodes must reach each others postgres service, so we expose a port
port "postgres" {
to = 5432
}
}
service {
name = "postgres"
port = 5432
meta {
alloc = "${NOMAD_ALLOC_INDEX}"
datacenter = "${NOMAD_DC}"
group = "${NOMAD_GROUP_NAME}"
job = "${NOMAD_JOB_NAME}"
namespace = "${NOMAD_NAMESPACE}"
node = "${node.unique.name}"
region = "${NOMAD_REGION}"
}
connect {
sidecar_service {
}
sidecar_task {
logs {
disabled = false
}
config {
args = [
"-c",
"${NOMAD_SECRETS_DIR}/envoy_bootstrap.json",
"-l",
"${meta.connect.log_level}",
"--concurrency",
"${meta.connect.proxy_concurrency}",
"--disable-hot-restart"
]
}
resources {
cpu = 50
memory = 64
}
}
}
tags = [
"postgres-${NOMAD_ALLOC_INDEX}",
# Note : we don't add traefik.enable=true
# This will be done dynamically only on the current master node using the update_tags.sh script
"traefik.tcp.routers.postgres.rule=HostSNI(`*`)",
"traefik.tcp.routers.postgres.tls=true",
"traefik.tcp.routers.postgres.entrypoints=postgres",
]
# Use patroni health endpoint to verify postgres status
check {
name = "healthy"
type = "http"
port = "patroni"
path = "/health"
protocol = "https"
# Patroni REST API is using a cert from a private CA
tls_skip_verify = true
interval = "30s"
timeout = "5s"
}
# This check will ensure the current role is published in Consul tags (if the callback during a role change failed for example)
check {
name = "tags"
type = "script"
command = "/local/update_tags.sh"
task = "postgres"
interval = "30s"
timeout = "5s"
}
check {
name = "ready"
type = "script"
interval = "30s"
timeout = "5s"
task = "postgres"
command = "pg_isready"
}
# Patroni will run a script to update the tags (master / replica)
enable_tag_override = true
}
volume "backup" {
source = "postgres-backup"
type = "csi"
access_mode = "multi-node-multi-writer"
attachment_mode = "file-system"
}
volume "data" {
source = "postgres-data"
type = "csi"
access_mode = "single-node-writer"
attachment_mode = "file-system"
per_alloc = true
}
task "postgres" {
driver = "docker"
leader = true
kill_timeout = "10m"
config {
image = "danielberteaud/patroni:15-3.3.0-2"
# Set shm_size to half of the total size
shm_size = 536870912
volumes = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"]
command = "patroni"
args = ["/secrets/patroni.yml"]
pids_limit = 700
}
vault {
policies = ["postgres"]
env = false
disable_file = true
change_mode = "noop"
}
# Use a template block instead of env {} so we can fetch values from vault
template {
data = <<_EOT
LANG=fr_FR.utf8
PGBACKREST_STANZA=postgres
TZ=Europe/Paris
_EOT
destination = "secrets/.env"
perms = 400
env = true
}
template {
data = <<_EOT
# Get a Consul token from vault, so we're able to update the tags in Consul from the containers
CONSUL_HTTP_TOKEN={{ with secret "consul/creds/postgres" }}{{ .Data.token }}{{ end }}
PATRONICTL_CONFIG_FILE=/secrets/patroni.yml
_EOT
destination = "secrets/pg.env"
uid = 100000
gid = 100026
perms = 440
change_mode = "noop"
env = true
}
# Scripts to update tags attached to the service in consul catalog
# with either master or replica
template {
data = <<_EOT
def nest_service_params:
# Select all objects with keys that start with the the name 'Service'
# Remove 'Service' prefix from key names
with_entries(select(.key | startswith("Service")) | .key = (.key | sub("^Service"; "")))
;
def create_svc_txn:
# Select our own instance
select(.ServiceTags[] | contains($mytag))
# Add needed tags, remove unwanted ones
| .ServiceTags -= ["master"]
| .ServiceTags -= ["replica"]
| .ServiceTags -= ["uninitialized"]
| .ServiceTags -= ["traefik.enable=true"]
| .ServiceTags += [$role]
| if $role == "master" then .ServiceTags += ["traefik.enable=true"] else .ServiceTags -= ["traefik.enable=true"] end
# Rename 'ServiceName' to 'ServiceService'
# Will be transformed to 'Service.Service later'
| . + {ServiceService: .ServiceName}
| del(.ServiceName)
| nest_service_params as $nested_params
# Create transaction to update services using check-and-set option.
# If service has been modified since our last read, or no longer exists, the
# transaction will fail.
| {
Service: {
Verb: "cas",
Node: .Node,
# Nest service-related parameters under 'Service' key
# Add ModfyIndex key
Service: ($nested_params + {ModifyIndex: .ModifyIndex})
}
}
;
. | map(create_svc_txn)
_EOT
destination = "local/serviceformat.jq"
change_mode = "noop"
}
template {
data = <<_EOT
#!/bin/sh
# vim: syntax=sh
set -eo pipefail
EVENT=$1
NEW_ROLE=$2
if [ -z "${NEW_ROLE}" ]; then
# If new role is not given as argument, fetch it from patroni
NEW_ROLE=$(curl --silent --insecure https://localhost:${NOMAD_PORT_patroni}/health | jq -r .role)
fi
# Source env file to get our consul token
source /secrets/pg.env
# translate promoted = master and demoted = recplica
if [ "${NEW_ROLE}" = "promoted" ]; then
NEW_ROLE="master"
elif [ "${NEW_ROLE}" = "demoted" ]; then
NEW_ROLE="replica"
fi
CURL_OPTS="--connect-timeout 5 --max-time 10 --retry 5 --retry-delay 1 --retry-max-time 40 --retry-connrefused --silent"
# Update tags on the main service
SERVICE_HAS_TAG=$(curl ${CURL_OPTS} \
-H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" \
http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres |\
jq ".[] | select( .ServiceTags as \$tags | \"postgres-{{ env "NOMAD_ALLOC_INDEX" }}\" | IN(\$tags[]) ) | .ServiceTags | any(.==\"${NEW_ROLE}\")")
if [ "${SERVICE_HAS_TAG}" = "false" ]; then
echo "Updating tags for the main service"
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres |\
jq --from-file /local/serviceformat.jq --arg role "${NEW_ROLE}" --arg mytag postgres-{{ env "NOMAD_ALLOC_INDEX" }} |\
curl ${CORL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" -X PUT -d @- http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/txn > /dev/null
else
echo "Main service already has the expected ${NEW_ROLE} tag"
fi
# Update tags on the sidecar service (connect-proxy)
SIDECAR_HAS_TAG=$(curl ${CURL_OPTS} \
-H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" \
http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres-sidecar-proxy |\
jq ".[] | select( .ServiceTags as \$tags | \"postgres-{{ env "NOMAD_ALLOC_INDEX" }}\" | IN(\$tags[]) ) | .ServiceTags | any(.==\"${NEW_ROLE}\")")
if [ "${SIDECAR_HAS_TAG}" = "false" ]; then
echo "Updating tags for the sidecar"
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres-sidecar-proxy |\
jq --from-file /local/serviceformat.jq --arg role "${NEW_ROLE}" --arg mytag postgres-{{ env "NOMAD_ALLOC_INDEX" }} |\
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" -X PUT -d @- http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/txn > /dev/null
else
echo "Sidecar service already has the expected ${NEW_ROLE} tag"
fi
_EOT
destination = "local/update_tags.sh"
perms = 755
change_mode = "noop"
}
# A small entrypoint scriptlet to ensure /alloc/data/postgres dir exists
template {
data = <<_EOT
#!/bin/sh
set -eu
mkdir -p /alloc/data/postgres
_EOT
destination = "local/mkdir-socket.sh"
perms = 755
}
# Patroni main configuration file
template {
data = <<_EOT
name: postgres-{{ env "NOMAD_ALLOC_INDEX" }}
scope: postgres
consul:
url: http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
token: {{ with secret "consul/creds/postgres" }}{{ .Data.token }}{{ end }}
bootstrap:
dcs:
synchronous_mode: False
initdb:
- data-checksum
- encoding: UTF-8
post_bootstrap: /local/create_users.sh
postgresql:
create_replica_methods:
- basebackup
callbacks:
on_role_change: /local/update_tags.sh
on_start: /local/update_tags.sh
connect_address: {{ env "NOMAD_HOST_ADDR_postgres" }}
bin_dir: /usr/pgsql-15/bin
data_dir: /data/db/15
listen: 0.0.0.0:{{ env "NOMAD_ALLOC_PORT_postgres" }}
use_pg_rewind: True
#remove_data_directory_on_rewind_failure: True
pg_hba:
- local all postgres peer
- local replication postgres peer
- local all postgres scram-sha-256
- host all all 127.0.0.0/8 scram-sha-256
- host replication backup 127.0.0.0/8 scram-sha-256
- hostssl replication replicator 0.0.0.0/0 cert clientcert=verify-full map=patroni-map
- hostssl postgres rewind 0.0.0.0/0 cert clientcert=verify-full map=patroni-map
- hostssl all all 0.0.0.0/0 cert clientcert=verify-full
pg_ident:
- patroni-map postgres.service.consul postgres
- patroni-map postgres.service.consul replicator
- patroni-map postgres.service.consul rewind
parameters:
ssl: on
ssl_cert_file: /secrets/postgres.bundle.pem
ssl_key_file: /secrets/postgres.bundle.pem
ssl_ca_file: /local/postgres.ca.pem
#ssl_crl_file: /local/postgres.crl.pem
# Add a socket in /alloc/data/postgres
# so other tasks in the same group can reach it
unix_socket_directories: /run/postgresql, /alloc/data/postgres
autovacuum_analyze_scale_factor: 0.05
autovacuum_analyze_threshold: 500
autovacuum_vacuum_scale_factor: 0.1
autovacuum_vacuum_threshold: 500
datestyle: ISO, DMY
log_connections: on
log_destination: stderr
log_directory: /proc/1/fd
log_disconnections: on
log_filename: 1
log_line_prefix: '[%m] u=%u,d=%d,a=%a,c=%h,xid=%x '
log_min_duration_statement: 2000
log_statement: ddl
log_timezone: {{ env "TZ" }}
maintenance_work_mem: 51MB
shared_buffers: 512MB
timezone: {{ env "TZ" }}
wal_compression: zstd
wal_keep_size: 512
work_mem: 10MB
recovery_conf:
authentication:
superuser:
username: postgres
password: '{{ with secret "kv/service/postgres" }}{{ .Data.data.pg_pwd }}{{ end }}'
sslmode: verify-ca
sslrootcert: /local/postgres.ca.pem
replication:
username: replicator
sslmode: verify-ca
sslrootcert: /local/postgres.ca.pem
sslcert: /secrets/postgres.bundle.pem
sslkey: /secrets/postgres.bundle.pem
rewind:
username: rewind
sslmode: verify-ca
sslrootcert: /local/postgres.ca.pem
sslcert: /secrets/postgres.bundle.pem
sslkey: /secrets/postgres.bundle.pem
restapi:
connect_address: {{ env "NOMAD_HOST_ADDR_patroni" }}
listen: 0.0.0.0:{{ env "NOMAD_ALLOC_PORT_patroni" }}
keyfile: /secrets/postgres.bundle.pem
certfile: /secrets/postgres.bundle.pem
cafile: /local/postgres.ca.pem
verify_client: optional
authentication:
username: patroni
password: '{{ with secret "kv/service/postgres" }}{{ .Data.data.api_pwd }}{{ end }}'
ctl:
insecure: False
keyfile: /secrets/postgres.bundle.pem
certfile: /secrets/postgres.bundle.pem
cafile: /local/postgres.ca.pem
watchdog:
mode: off
_EOT
destination = "secrets/patroni.yml"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# Post bootstrap script, to create users
template {
data = <<_EOT
#!/bin/sh
set -euo pipefail
# Create roles needed for patroni
{{ with secret "kv/service/postgres" }}
psql <<'_EOSQL'
ALTER ROLE postgres WITH SUPERUSER LOGIN PASSWORD '{{ .Data.data.pg_pwd }}';
CREATE ROLE replicator WITH LOGIN REPLICATION PASSWORD '{{ .Data.data.replicator_pwd }}';
CREATE ROLE rewind WITH LOGIN PASSWORD '{{ .Data.data.rewind_pwd }}';
CREATE ROLE vault WITH LOGIN SUPERUSER PASSWORD '{{ .Data.data.vault_initial_pwd }}';
CREATE ROLE monitor WITH LOGIN PASSWORD '{{ .Data.data.monitor_pwd }}';
GRANT "pg_monitor" TO "monitor";
_EOSQL
{{ end }}
_EOT
destination = "secrets/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Post bootstrap wrapper, as /secrets is mounted with noexec
template {
data = <<_EOT
#!/bin/sh
set -euo pipefail
sh /secrets/create_users.sh
_EOT
destination = "local/create_users.sh"
perms = "0750"
uid = 100026
gid = 100026
change_mode = "noop"
}
# Obtain a certificate from Vault
template {
data = <<_EOT
{{ with pkiCert
"pki/postgres/issue/postgres-server"
"common_name=postgres.service.consul"
(printf "alt_name=%s.postgres.service.consul" (env "NOMAD_ALLOC_INDEX"))
(printf "ip_sans=%s" (env "NOMAD_IP_patroni")) "ttl=72h" }}
{{ .Cert }}
{{ .Key }}
{{ end }}
_EOT
destination = "secrets/postgres.bundle.pem"
perms = "0400"
uid = 100026
gid = 100026
change_mode = "signal"
change_signal = "SIGHUP"
}
# CA certificate chains
template {
data = <<_EOT
{{ with secret "pki/postgres/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
_EOT
destination = "local/postgres.ca.pem"
change_mode = "signal"
change_signal = "SIGHUP"
}
# Mount the persistent volume in /data
volume_mount {
volume = "data"
destination = "/data"
}
# Mount the backup volume (which can be used for PITR with pgbackrest)
volume_mount {
volume = "backup"
destination = "/backup"
}
resources {
cpu = 1000
memory = 1024
}
}
}
}