549 lines
16 KiB
HCL
549 lines
16 KiB
HCL
job "postgres-server" {
|
|
|
|
|
|
datacenters = ["dc1"]
|
|
region = "global"
|
|
priority = 80
|
|
|
|
|
|
group "server" {
|
|
|
|
count = 1
|
|
shutdown_delay = "6s"
|
|
|
|
|
|
constraint {
|
|
operator = "distinct_hosts"
|
|
value = "true"
|
|
}
|
|
|
|
|
|
update {
|
|
healthy_deadline = "2h"
|
|
progress_deadline = "3h"
|
|
}
|
|
|
|
ephemeral_disk {
|
|
# Use minimal ephemeral disk
|
|
size = 101
|
|
}
|
|
|
|
|
|
network {
|
|
mode = "bridge"
|
|
# Patroni API for nodes to check each others
|
|
port "patroni" {
|
|
to = 8080
|
|
}
|
|
# When running with patroni, nodes must reach each others postgres service, so we expose a port
|
|
port "postgres" {
|
|
to = 5432
|
|
}
|
|
}
|
|
service {
|
|
name = "postgres"
|
|
port = 5432
|
|
meta {
|
|
alloc = "${NOMAD_ALLOC_INDEX}"
|
|
datacenter = "${NOMAD_DC}"
|
|
group = "${NOMAD_GROUP_NAME}"
|
|
job = "${NOMAD_JOB_NAME}"
|
|
namespace = "${NOMAD_NAMESPACE}"
|
|
node = "${node.unique.name}"
|
|
region = "${NOMAD_REGION}"
|
|
}
|
|
|
|
connect {
|
|
sidecar_service {
|
|
}
|
|
sidecar_task {
|
|
|
|
logs {
|
|
disabled = false
|
|
}
|
|
|
|
config {
|
|
args = [
|
|
"-c",
|
|
"${NOMAD_SECRETS_DIR}/envoy_bootstrap.json",
|
|
"-l",
|
|
"${meta.connect.log_level}",
|
|
"--concurrency",
|
|
"${meta.connect.proxy_concurrency}",
|
|
"--disable-hot-restart"
|
|
]
|
|
}
|
|
|
|
resources {
|
|
cpu = 50
|
|
memory = 64
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
tags = [
|
|
"postgres-${NOMAD_ALLOC_INDEX}",
|
|
# Note : we don't add traefik.enable=true
|
|
# This will be done dynamically only on the current master node using the update_tags.sh script
|
|
"traefik.tcp.routers.postgres.rule=HostSNI(`*`)",
|
|
"traefik.tcp.routers.postgres.tls=true",
|
|
"traefik.tcp.routers.postgres.entrypoints=postgres",
|
|
]
|
|
|
|
# Use patroni health endpoint to verify postgres status
|
|
check {
|
|
name = "healthy"
|
|
type = "http"
|
|
port = "patroni"
|
|
path = "/health"
|
|
protocol = "https"
|
|
# Patroni REST API is using a cert from a private CA
|
|
tls_skip_verify = true
|
|
interval = "30s"
|
|
timeout = "5s"
|
|
|
|
}
|
|
|
|
# This check will ensure the current role is published in Consul tags (if the callback during a role change failed for example)
|
|
check {
|
|
name = "tags"
|
|
type = "script"
|
|
command = "/local/update_tags.sh"
|
|
task = "postgres"
|
|
interval = "30s"
|
|
timeout = "5s"
|
|
}
|
|
|
|
check {
|
|
name = "ready"
|
|
type = "script"
|
|
interval = "30s"
|
|
timeout = "5s"
|
|
task = "postgres"
|
|
command = "pg_isready"
|
|
}
|
|
|
|
# Patroni will run a script to update the tags (master / replica)
|
|
enable_tag_override = true
|
|
}
|
|
|
|
|
|
|
|
volume "backup" {
|
|
source = "postgres-backup"
|
|
type = "csi"
|
|
access_mode = "multi-node-multi-writer"
|
|
attachment_mode = "file-system"
|
|
}
|
|
|
|
volume "data" {
|
|
source = "postgres-data"
|
|
type = "csi"
|
|
access_mode = "single-node-writer"
|
|
attachment_mode = "file-system"
|
|
per_alloc = true
|
|
}
|
|
|
|
|
|
task "postgres" {
|
|
driver = "docker"
|
|
leader = true
|
|
kill_timeout = "10m"
|
|
|
|
config {
|
|
image = "danielberteaud/patroni:15-3.3.0-2"
|
|
# Set shm_size to half of the total size
|
|
shm_size = 536870912
|
|
volumes = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"]
|
|
command = "patroni"
|
|
args = ["/secrets/patroni.yml"]
|
|
pids_limit = 700
|
|
}
|
|
|
|
|
|
vault {
|
|
policies = ["postgres"]
|
|
env = false
|
|
disable_file = true
|
|
change_mode = "noop"
|
|
}
|
|
|
|
|
|
|
|
# Use a template block instead of env {} so we can fetch values from vault
|
|
template {
|
|
data = <<_EOT
|
|
LANG=fr_FR.utf8
|
|
PGBACKREST_STANZA=postgres
|
|
TZ=Europe/Paris
|
|
_EOT
|
|
destination = "secrets/.env"
|
|
perms = 400
|
|
env = true
|
|
}
|
|
|
|
|
|
template {
|
|
data = <<_EOT
|
|
# Get a Consul token from vault, so we're able to update the tags in Consul from the containers
|
|
CONSUL_HTTP_TOKEN={{ with secret "consul/creds/postgres" }}{{ .Data.token }}{{ end }}
|
|
PATRONICTL_CONFIG_FILE=/secrets/patroni.yml
|
|
_EOT
|
|
destination = "secrets/pg.env"
|
|
uid = 100000
|
|
gid = 100026
|
|
perms = 440
|
|
change_mode = "noop"
|
|
env = true
|
|
}
|
|
|
|
# Scripts to update tags attached to the service in consul catalog
|
|
# with either master or replica
|
|
template {
|
|
data = <<_EOT
|
|
def nest_service_params:
|
|
# Select all objects with keys that start with the the name 'Service'
|
|
# Remove 'Service' prefix from key names
|
|
with_entries(select(.key | startswith("Service")) | .key = (.key | sub("^Service"; "")))
|
|
;
|
|
|
|
def create_svc_txn:
|
|
# Select our own instance
|
|
select(.ServiceTags[] | contains($mytag))
|
|
|
|
# Add needed tags, remove unwanted ones
|
|
| .ServiceTags -= ["master"]
|
|
| .ServiceTags -= ["replica"]
|
|
| .ServiceTags -= ["uninitialized"]
|
|
| .ServiceTags -= ["traefik.enable=true"]
|
|
| .ServiceTags += [$role]
|
|
| if $role == "master" then .ServiceTags += ["traefik.enable=true"] else .ServiceTags -= ["traefik.enable=true"] end
|
|
|
|
# Rename 'ServiceName' to 'ServiceService'
|
|
# Will be transformed to 'Service.Service later'
|
|
| . + {ServiceService: .ServiceName}
|
|
| del(.ServiceName)
|
|
| nest_service_params as $nested_params
|
|
|
|
# Create transaction to update services using check-and-set option.
|
|
# If service has been modified since our last read, or no longer exists, the
|
|
# transaction will fail.
|
|
| {
|
|
Service: {
|
|
Verb: "cas",
|
|
Node: .Node,
|
|
|
|
# Nest service-related parameters under 'Service' key
|
|
# Add ModfyIndex key
|
|
Service: ($nested_params + {ModifyIndex: .ModifyIndex})
|
|
}
|
|
}
|
|
;
|
|
|
|
. | map(create_svc_txn)
|
|
|
|
_EOT
|
|
destination = "local/serviceformat.jq"
|
|
change_mode = "noop"
|
|
}
|
|
|
|
template {
|
|
data = <<_EOT
|
|
#!/bin/sh
|
|
# vim: syntax=sh
|
|
|
|
set -eo pipefail
|
|
|
|
EVENT=$1
|
|
NEW_ROLE=$2
|
|
|
|
if [ -z "${NEW_ROLE}" ]; then
|
|
# If new role is not given as argument, fetch it from patroni
|
|
NEW_ROLE=$(curl --silent --insecure https://localhost:${NOMAD_PORT_patroni}/health | jq -r .role)
|
|
fi
|
|
|
|
# Source env file to get our consul token
|
|
source /secrets/pg.env
|
|
|
|
# translate promoted = master and demoted = recplica
|
|
if [ "${NEW_ROLE}" = "promoted" ]; then
|
|
NEW_ROLE="master"
|
|
elif [ "${NEW_ROLE}" = "demoted" ]; then
|
|
NEW_ROLE="replica"
|
|
fi
|
|
|
|
CURL_OPTS="--connect-timeout 5 --max-time 10 --retry 5 --retry-delay 1 --retry-max-time 40 --retry-connrefused --silent"
|
|
|
|
# Update tags on the main service
|
|
SERVICE_HAS_TAG=$(curl ${CURL_OPTS} \
|
|
-H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" \
|
|
http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres |\
|
|
jq ".[] | select( .ServiceTags as \$tags | \"postgres-{{ env "NOMAD_ALLOC_INDEX" }}\" | IN(\$tags[]) ) | .ServiceTags | any(.==\"${NEW_ROLE}\")")
|
|
if [ "${SERVICE_HAS_TAG}" = "false" ]; then
|
|
echo "Updating tags for the main service"
|
|
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres |\
|
|
jq --from-file /local/serviceformat.jq --arg role "${NEW_ROLE}" --arg mytag postgres-{{ env "NOMAD_ALLOC_INDEX" }} |\
|
|
curl ${CORL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" -X PUT -d @- http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/txn > /dev/null
|
|
else
|
|
echo "Main service already has the expected ${NEW_ROLE} tag"
|
|
fi
|
|
|
|
# Update tags on the sidecar service (connect-proxy)
|
|
SIDECAR_HAS_TAG=$(curl ${CURL_OPTS} \
|
|
-H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" \
|
|
http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres-sidecar-proxy |\
|
|
jq ".[] | select( .ServiceTags as \$tags | \"postgres-{{ env "NOMAD_ALLOC_INDEX" }}\" | IN(\$tags[]) ) | .ServiceTags | any(.==\"${NEW_ROLE}\")")
|
|
if [ "${SIDECAR_HAS_TAG}" = "false" ]; then
|
|
echo "Updating tags for the sidecar"
|
|
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/catalog/service/postgres-sidecar-proxy |\
|
|
jq --from-file /local/serviceformat.jq --arg role "${NEW_ROLE}" --arg mytag postgres-{{ env "NOMAD_ALLOC_INDEX" }} |\
|
|
curl ${CURL_OPTS} -H "X-Consul-Token: ${CONSUL_HTTP_TOKEN}" -X PUT -d @- http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500/v1/txn > /dev/null
|
|
else
|
|
echo "Sidecar service already has the expected ${NEW_ROLE} tag"
|
|
fi
|
|
|
|
_EOT
|
|
destination = "local/update_tags.sh"
|
|
perms = 755
|
|
change_mode = "noop"
|
|
}
|
|
|
|
# A small entrypoint scriptlet to ensure /alloc/data/postgres dir exists
|
|
template {
|
|
data = <<_EOT
|
|
#!/bin/sh
|
|
|
|
set -eu
|
|
mkdir -p /alloc/data/postgres
|
|
_EOT
|
|
destination = "local/mkdir-socket.sh"
|
|
perms = 755
|
|
}
|
|
|
|
# Patroni main configuration file
|
|
template {
|
|
data = <<_EOT
|
|
name: postgres-{{ env "NOMAD_ALLOC_INDEX" }}
|
|
scope: postgres
|
|
|
|
consul:
|
|
url: http://{{ sockaddr "GetInterfaceIP \"nomad\"" }}:8500
|
|
token: {{ with secret "consul/creds/postgres" }}{{ .Data.token }}{{ end }}
|
|
|
|
bootstrap:
|
|
dcs:
|
|
synchronous_mode: False
|
|
|
|
initdb:
|
|
- data-checksum
|
|
- encoding: UTF-8
|
|
|
|
post_bootstrap: /local/create_users.sh
|
|
|
|
postgresql:
|
|
|
|
create_replica_methods:
|
|
- basebackup
|
|
|
|
callbacks:
|
|
on_role_change: /local/update_tags.sh
|
|
on_start: /local/update_tags.sh
|
|
|
|
connect_address: {{ env "NOMAD_HOST_ADDR_postgres" }}
|
|
bin_dir: /usr/pgsql-15/bin
|
|
data_dir: /data/db/15
|
|
listen: 0.0.0.0:{{ env "NOMAD_ALLOC_PORT_postgres" }}
|
|
use_pg_rewind: True
|
|
#remove_data_directory_on_rewind_failure: True
|
|
|
|
pg_hba:
|
|
- local all postgres peer
|
|
- local replication postgres peer
|
|
- local all postgres scram-sha-256
|
|
- host all all 127.0.0.0/8 scram-sha-256
|
|
- host replication backup 127.0.0.0/8 scram-sha-256
|
|
- hostssl replication replicator 0.0.0.0/0 cert clientcert=verify-full map=patroni-map
|
|
- hostssl postgres rewind 0.0.0.0/0 cert clientcert=verify-full map=patroni-map
|
|
- hostssl all all 0.0.0.0/0 cert clientcert=verify-full
|
|
|
|
pg_ident:
|
|
- patroni-map postgres.service.consul postgres
|
|
- patroni-map postgres.service.consul replicator
|
|
- patroni-map postgres.service.consul rewind
|
|
|
|
parameters:
|
|
ssl: on
|
|
ssl_cert_file: /secrets/postgres.bundle.pem
|
|
ssl_key_file: /secrets/postgres.bundle.pem
|
|
ssl_ca_file: /local/postgres.ca.pem
|
|
#ssl_crl_file: /local/postgres.crl.pem
|
|
# Add a socket in /alloc/data/postgres
|
|
# so other tasks in the same group can reach it
|
|
unix_socket_directories: /run/postgresql, /alloc/data/postgres
|
|
autovacuum_analyze_scale_factor: 0.05
|
|
autovacuum_analyze_threshold: 500
|
|
autovacuum_vacuum_scale_factor: 0.1
|
|
autovacuum_vacuum_threshold: 500
|
|
datestyle: ISO, DMY
|
|
log_connections: on
|
|
log_destination: stderr
|
|
log_directory: /proc/1/fd
|
|
log_disconnections: on
|
|
log_filename: 1
|
|
log_line_prefix: '[%m] u=%u,d=%d,a=%a,c=%h,xid=%x '
|
|
log_min_duration_statement: 2000
|
|
log_statement: ddl
|
|
log_timezone: {{ env "TZ" }}
|
|
maintenance_work_mem: 51MB
|
|
shared_buffers: 512MB
|
|
timezone: {{ env "TZ" }}
|
|
wal_compression: zstd
|
|
wal_keep_size: 512
|
|
work_mem: 10MB
|
|
|
|
recovery_conf:
|
|
|
|
authentication:
|
|
superuser:
|
|
username: postgres
|
|
password: '{{ with secret "kv/service/postgres" }}{{ .Data.data.pg_pwd }}{{ end }}'
|
|
sslmode: verify-ca
|
|
sslrootcert: /local/postgres.ca.pem
|
|
|
|
replication:
|
|
username: replicator
|
|
sslmode: verify-ca
|
|
sslrootcert: /local/postgres.ca.pem
|
|
sslcert: /secrets/postgres.bundle.pem
|
|
sslkey: /secrets/postgres.bundle.pem
|
|
|
|
rewind:
|
|
username: rewind
|
|
sslmode: verify-ca
|
|
sslrootcert: /local/postgres.ca.pem
|
|
sslcert: /secrets/postgres.bundle.pem
|
|
sslkey: /secrets/postgres.bundle.pem
|
|
|
|
restapi:
|
|
connect_address: {{ env "NOMAD_HOST_ADDR_patroni" }}
|
|
listen: 0.0.0.0:{{ env "NOMAD_ALLOC_PORT_patroni" }}
|
|
keyfile: /secrets/postgres.bundle.pem
|
|
certfile: /secrets/postgres.bundle.pem
|
|
cafile: /local/postgres.ca.pem
|
|
verify_client: optional
|
|
authentication:
|
|
username: patroni
|
|
password: '{{ with secret "kv/service/postgres" }}{{ .Data.data.api_pwd }}{{ end }}'
|
|
|
|
ctl:
|
|
insecure: False
|
|
keyfile: /secrets/postgres.bundle.pem
|
|
certfile: /secrets/postgres.bundle.pem
|
|
cafile: /local/postgres.ca.pem
|
|
|
|
watchdog:
|
|
mode: off
|
|
|
|
_EOT
|
|
destination = "secrets/patroni.yml"
|
|
perms = "0400"
|
|
uid = 100026
|
|
gid = 100026
|
|
change_mode = "signal"
|
|
change_signal = "SIGHUP"
|
|
}
|
|
|
|
# Post bootstrap script, to create users
|
|
template {
|
|
data = <<_EOT
|
|
#!/bin/sh
|
|
|
|
set -euo pipefail
|
|
|
|
# Create roles needed for patroni
|
|
{{ with secret "kv/service/postgres" }}
|
|
psql <<'_EOSQL'
|
|
ALTER ROLE postgres WITH SUPERUSER LOGIN PASSWORD '{{ .Data.data.pg_pwd }}';
|
|
CREATE ROLE replicator WITH LOGIN REPLICATION PASSWORD '{{ .Data.data.replicator_pwd }}';
|
|
CREATE ROLE rewind WITH LOGIN PASSWORD '{{ .Data.data.rewind_pwd }}';
|
|
CREATE ROLE vault WITH LOGIN SUPERUSER PASSWORD '{{ .Data.data.vault_initial_pwd }}';
|
|
CREATE ROLE monitor WITH LOGIN PASSWORD '{{ .Data.data.monitor_pwd }}';
|
|
GRANT "pg_monitor" TO "monitor";
|
|
_EOSQL
|
|
{{ end }}
|
|
|
|
_EOT
|
|
destination = "secrets/create_users.sh"
|
|
perms = "0750"
|
|
uid = 100026
|
|
gid = 100026
|
|
change_mode = "noop"
|
|
}
|
|
|
|
# Post bootstrap wrapper, as /secrets is mounted with noexec
|
|
template {
|
|
data = <<_EOT
|
|
#!/bin/sh
|
|
set -euo pipefail
|
|
sh /secrets/create_users.sh
|
|
_EOT
|
|
destination = "local/create_users.sh"
|
|
perms = "0750"
|
|
uid = 100026
|
|
gid = 100026
|
|
change_mode = "noop"
|
|
}
|
|
|
|
# Obtain a certificate from Vault
|
|
template {
|
|
data = <<_EOT
|
|
{{ with pkiCert
|
|
"pki/postgres/issue/postgres-server"
|
|
"common_name=postgres.service.consul"
|
|
(printf "alt_name=%s.postgres.service.consul" (env "NOMAD_ALLOC_INDEX"))
|
|
(printf "ip_sans=%s" (env "NOMAD_IP_patroni")) "ttl=72h" }}
|
|
{{ .Cert }}
|
|
{{ .Key }}
|
|
{{ end }}
|
|
_EOT
|
|
destination = "secrets/postgres.bundle.pem"
|
|
perms = "0400"
|
|
uid = 100026
|
|
gid = 100026
|
|
change_mode = "signal"
|
|
change_signal = "SIGHUP"
|
|
}
|
|
|
|
# CA certificate chains
|
|
template {
|
|
data = <<_EOT
|
|
{{ with secret "pki/postgres/cert/ca_chain" }}{{ .Data.ca_chain }}{{ end }}
|
|
_EOT
|
|
destination = "local/postgres.ca.pem"
|
|
change_mode = "signal"
|
|
change_signal = "SIGHUP"
|
|
}
|
|
|
|
# Mount the persistent volume in /data
|
|
volume_mount {
|
|
volume = "data"
|
|
destination = "/data"
|
|
}
|
|
# Mount the backup volume (which can be used for PITR with pgbackrest)
|
|
volume_mount {
|
|
volume = "backup"
|
|
destination = "/backup"
|
|
}
|
|
|
|
|
|
resources {
|
|
cpu = 1000
|
|
memory = 1024
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|