diff --git a/example/manage.nomad.hcl b/example/manage.nomad.hcl index fc5ab8f..097fa82 100644 --- a/example/manage.nomad.hcl +++ b/example/manage.nomad.hcl @@ -9,8 +9,7 @@ job "postgres-manage" { datacenters = ["dc1"] region = "global" - - + priority = 80 group "manage" { diff --git a/example/postgres.nomad.hcl b/example/postgres.nomad.hcl index de3c8e5..3b7bd22 100644 --- a/example/postgres.nomad.hcl +++ b/example/postgres.nomad.hcl @@ -6,38 +6,40 @@ job "postgres" { priority = 80 - constraint { - operator = "distinct_hosts" - value = "true" - } - - - group "server" { + count = 1 + shutdown_delay = "6s" + + + constraint { + operator = "distinct_hosts" + value = "true" + } + + + update { + healthy_deadline = "2h" + progress_deadline = "3h" + } + ephemeral_disk { # Use minimal ephemeral disk size = 101 } - count = "1" + network { mode = "bridge" - # Patroni API for node to check each others + # Patroni API for nodes to check each others port "patroni" { to = 8080 } - # When running with patroni, nodes must reach each others postgres service + # When running with patroni, nodes must reach each others postgres service, so we expose a port port "postgres" { to = 5432 } } - - update { - # Set super high deadlines as recovery can take lots of time - healthy_deadline = "48h" - progress_deadline = "72h" - } service { name = "postgres" port = 5432 @@ -121,12 +123,14 @@ job "postgres" { } + volume "backup" { source = "postgres-backup" type = "csi" access_mode = "multi-node-multi-writer" attachment_mode = "file-system" } + volume "data" { source = "postgres-data" type = "csi" diff --git a/postgres.nomad.hcl b/postgres.nomad.hcl index a832487..c8334e4 100644 --- a/postgres.nomad.hcl +++ b/postgres.nomad.hcl @@ -1,40 +1,32 @@ job "[[ .instance ]]" { -[[- $c := merge .pg.server .pg . ]] +[[- $c := merge .pg . ]] [[ template "common/job_start" $c ]] group "server" { +[[- $c := merge $c.server $c ]] - ephemeral_disk { - # Use minimal ephemeral disk - size = 101 - } +[[- /* Ensure count is 1 when in recovery mode */]] +[[ template "common/group_start" merge (dict "count" (ternary 1 $c.count $c.recovery)) $c ]] - count = "[[ $c.recovery | ternary 1 $c.count ]]" network { mode = "bridge" [[- if conv.ToBool $c.prometheus.enabled ]] port "metrics" {} port "patroni-metrics" {} [[- end ]] - # Patroni API for node to check each others + # Patroni API for nodes to check each others port "patroni" { to = 8080 } - # When running with patroni, nodes must reach each others postgres service + # When running with patroni, nodes must reach each others postgres service, so we expose a port port "postgres" { to = 5432 } } - update { - # Set super high deadlines as recovery can take lots of time - healthy_deadline = "48h" - progress_deadline = "72h" - } - -[[- if not .pg.server.recovery ]] +[[- if not $c.recovery ]] service { name = "[[ .instance ]][[ $c.consul.suffix ]]" port = 5432 @@ -116,9 +108,9 @@ job "[[ .instance ]]" { config { image = "[[ $c.image ]]" # Set shm_size to half of the total size - shm_size = [[ mul .pg.server.resources.memory 524288 ]] + shm_size = [[ mul $c.resources.memory 524288 ]] volumes = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"] -[[- if .pg.server.recovery ]] +[[- if $c.recovery ]] # Running in recovery mode : just start the container and wait # so we can enter it and manually recover what we need to command = "sleep" diff --git a/variables.yml b/variables.yml index bb9ad05..cd210b7 100644 --- a/variables.yml +++ b/variables.yml @@ -6,6 +6,10 @@ instance: postgres pg: + nomad: + # Set higher priority for the postgres job + priority: 80 + vault: pki: ou: Postgres @@ -61,12 +65,14 @@ pg: PGBACKREST_STANZA: '[[ .instance ]]' nomad: - # Set higher priority for the postgres job - priority: 80 # Enforce running on distinct hosts constraints: - operator: distinct_hosts value: true + update: + # When running in recovery mode, use huge deadlines as it can take a lot of time + healthy_deadline: '[[ .pg.server.recovery | ternary "48h" "2h" ]]' + progress_deadline: '[[ .pg.server.recovery | ternary "72h" "3h" ]]' # In recovery mode, neither patroni nor postgres will be started. The container will start and wait for manual recovery recovery: false