Small cleanup

2024-04-09 10:18:19 +02:00 · 2024-04-09 10:18:19 +02:00 · 79642188b4
parent 1951842adf
commit 79642188b4
4 changed files with 38 additions and 37 deletions
--- a/example/manage.nomad.hcl
+++ b/example/manage.nomad.hcl
@ -9,8 +9,7 @@ job "postgres-manage" {
  datacenters = ["dc1"]
  region      = "global"
-
+  priority    = 80
  group "manage" {
--- a/example/postgres.nomad.hcl
+++ b/example/postgres.nomad.hcl
@ -6,38 +6,40 @@ job "postgres" {
  priority    = 80
  constraint {
    operator = "distinct_hosts"
    value    = "true"
  }
  group "server" {
    count          = 1
    shutdown_delay = "6s"
    constraint {
      operator = "distinct_hosts"
      value    = "true"
    }
    update {
      healthy_deadline  = "2h"
      progress_deadline = "3h"
    }
    ephemeral_disk {
      # Use minimal ephemeral disk
      size = 101
    }
-    count = "1"
+
    network {
      mode = "bridge"
-      # Patroni API for node to check each others
+      # Patroni API for nodes to check each others
      port "patroni" {
        to = 8080
      }
-      # When running with patroni, nodes must reach each others postgres service
+      # When running with patroni, nodes must reach each others postgres service, so we expose a port
      port "postgres" {
        to = 5432
      }
    }
    update {
      # Set super high deadlines as recovery can take lots of time
      healthy_deadline  = "48h"
      progress_deadline = "72h"
    }
    service {
      name = "postgres"
      port = 5432
@ -121,12 +123,14 @@ job "postgres" {
    }
    volume "backup" {
      source          = "postgres-backup"
      type            = "csi"
      access_mode     = "multi-node-multi-writer"
      attachment_mode = "file-system"
    }
    volume "data" {
      source          = "postgres-data"
      type            = "csi"
--- a/postgres.nomad.hcl
+++ b/postgres.nomad.hcl
@ -1,40 +1,32 @@
 job "[[ .instance ]]" {
-[[- $c := merge .pg.server .pg . ]]
+[[- $c := merge .pg . ]]
 [[ template "common/job_start" $c ]]
  group "server" {
 [[- $c := merge $c.server $c ]]
-    ephemeral_disk {
+[[- /* Ensure count is 1 when in recovery mode */]]
-      # Use minimal ephemeral disk
+[[ template "common/group_start" merge (dict "count" (ternary 1 $c.count $c.recovery)) $c ]]
      size = 101
    }
    count = "[[ $c.recovery | ternary 1 $c.count ]]"
    network {
      mode = "bridge"
 [[- if conv.ToBool $c.prometheus.enabled ]]
      port "metrics" {}
      port "patroni-metrics" {}
 [[- end ]]
-      # Patroni API for node to check each others
+      # Patroni API for nodes to check each others
      port "patroni" {
        to = 8080
      }
-      # When running with patroni, nodes must reach each others postgres service
+      # When running with patroni, nodes must reach each others postgres service, so we expose a port
      port "postgres" {
        to = 5432
      }
    }
-    update {
+[[- if not $c.recovery ]]
      # Set super high deadlines as recovery can take lots of time
      healthy_deadline  = "48h"
      progress_deadline = "72h"
    }
 [[- if not .pg.server.recovery ]]
    service {
      name = "[[ .instance ]][[ $c.consul.suffix ]]"
      port = 5432
@ -116,9 +108,9 @@ job "[[ .instance ]]" {
      config {
        image      = "[[ $c.image ]]"
        # Set shm_size to half of the total size
-        shm_size   = [[ mul .pg.server.resources.memory 524288 ]]
+        shm_size   = [[ mul $c.resources.memory 524288 ]]
        volumes    = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"]
-[[- if .pg.server.recovery ]]
+[[- if $c.recovery ]]
        # Running in recovery mode : just start the container and wait
        # so we can enter it and manually recover what we need to
        command    = "sleep"
--- a/variables.yml
+++ b/variables.yml
@ -6,6 +6,10 @@ instance: postgres
 pg:
  nomad:
    # Set higher priority for the postgres job
    priority: 80
  vault:
    pki:
      ou: Postgres
@ -61,12 +65,14 @@ pg:
      PGBACKREST_STANZA: '[[ .instance ]]'
    nomad:
      # Set higher priority for the postgres job
      priority: 80
      # Enforce running on distinct hosts
      constraints:
        - operator: distinct_hosts
          value: true
      update:
        # When running in recovery mode, use huge deadlines as it can take a lot of time
        healthy_deadline: '[[ .pg.server.recovery | ternary "48h" "2h" ]]'
        progress_deadline: '[[ .pg.server.recovery | ternary "72h" "3h" ]]'
    # In recovery mode, neither patroni nor postgres will be started. The container will start and wait for manual recovery
    recovery: false