Small cleanup

2024-04-09 10:18:19 +02:00 · 2024-04-09 10:18:19 +02:00 · 79642188b4
parent 1951842adf
commit 79642188b4
4 changed files with 38 additions and 37 deletions
--- a/example/manage.nomad.hcl
+++ b/example/manage.nomad.hcl
@ -9,8 +9,7 @@ job "postgres-manage" {

  datacenters = ["dc1"]
  region      = "global"
-
-
+  priority    = 80


  group "manage" {
--- a/example/postgres.nomad.hcl
+++ b/example/postgres.nomad.hcl
@ -6,38 +6,40 @@ job "postgres" {
  priority    = 80


-  constraint {
-    operator = "distinct_hosts"
-    value    = "true"
-  }
-
-
-
  group "server" {

+    count          = 1
+    shutdown_delay = "6s"
+
+
+    constraint {
+      operator = "distinct_hosts"
+      value    = "true"
+    }
+
+
+    update {
+      healthy_deadline  = "2h"
+      progress_deadline = "3h"
+    }
+
    ephemeral_disk {
      # Use minimal ephemeral disk
      size = 101
    }

-    count = "1"
+
    network {
      mode = "bridge"
-      # Patroni API for node to check each others
+      # Patroni API for nodes to check each others
      port "patroni" {
        to = 8080
      }
-      # When running with patroni, nodes must reach each others postgres service
+      # When running with patroni, nodes must reach each others postgres service, so we expose a port
      port "postgres" {
        to = 5432
      }
    }
-
-    update {
-      # Set super high deadlines as recovery can take lots of time
-      healthy_deadline  = "48h"
-      progress_deadline = "72h"
-    }
    service {
      name = "postgres"
      port = 5432
@ -121,12 +123,14 @@ job "postgres" {
    }


+
    volume "backup" {
      source          = "postgres-backup"
      type            = "csi"
      access_mode     = "multi-node-multi-writer"
      attachment_mode = "file-system"
    }
+
    volume "data" {
      source          = "postgres-data"
      type            = "csi"
--- a/postgres.nomad.hcl
+++ b/postgres.nomad.hcl
@ -1,40 +1,32 @@
 job "[[ .instance ]]" {

-[[- $c := merge .pg.server .pg . ]]
+[[- $c := merge .pg . ]]

 [[ template "common/job_start" $c ]]

  group "server" {
+[[- $c := merge $c.server $c ]]

-    ephemeral_disk {
-      # Use minimal ephemeral disk
-      size = 101
-    }
+[[- /* Ensure count is 1 when in recovery mode */]]
+[[ template "common/group_start" merge (dict "count" (ternary 1 $c.count $c.recovery)) $c ]]

-    count = "[[ $c.recovery | ternary 1 $c.count ]]"
    network {
      mode = "bridge"
 [[- if conv.ToBool $c.prometheus.enabled ]]
      port "metrics" {}
      port "patroni-metrics" {}
 [[- end ]]
-      # Patroni API for node to check each others
+      # Patroni API for nodes to check each others
      port "patroni" {
        to = 8080
      }
-      # When running with patroni, nodes must reach each others postgres service
+      # When running with patroni, nodes must reach each others postgres service, so we expose a port
      port "postgres" {
        to = 5432
      }
    }

-    update {
-      # Set super high deadlines as recovery can take lots of time
-      healthy_deadline  = "48h"
-      progress_deadline = "72h"
-    }
-
-[[- if not .pg.server.recovery ]]
+[[- if not $c.recovery ]]
    service {
      name = "[[ .instance ]][[ $c.consul.suffix ]]"
      port = 5432
@ -116,9 +108,9 @@ job "[[ .instance ]]" {
      config {
        image      = "[[ $c.image ]]"
        # Set shm_size to half of the total size
-        shm_size   = [[ mul .pg.server.resources.memory 524288 ]]
+        shm_size   = [[ mul $c.resources.memory 524288 ]]
        volumes    = ["local/mkdir-socket.sh:/entrypoint.d/70-mkdir-socket.sh"]
-[[- if .pg.server.recovery ]]
+[[- if $c.recovery ]]
        # Running in recovery mode : just start the container and wait
        # so we can enter it and manually recover what we need to
        command    = "sleep"
--- a/variables.yml
+++ b/variables.yml
@ -6,6 +6,10 @@ instance: postgres

 pg:

+  nomad:
+    # Set higher priority for the postgres job
+    priority: 80
+
  vault:
    pki:
      ou: Postgres
@ -61,12 +65,14 @@ pg:
      PGBACKREST_STANZA: '[[ .instance ]]'

    nomad:
-      # Set higher priority for the postgres job
-      priority: 80
      # Enforce running on distinct hosts
      constraints:
        - operator: distinct_hosts
          value: true
+      update:
+        # When running in recovery mode, use huge deadlines as it can take a lot of time
+        healthy_deadline: '[[ .pg.server.recovery | ternary "48h" "2h" ]]'
+        progress_deadline: '[[ .pg.server.recovery | ternary "72h" "3h" ]]'

    # In recovery mode, neither patroni nor postgres will be started. The container will start and wait for manual recovery
    recovery: false