From 6db813b7c8d7475d3df31b79739a00cebfbbbb7a Mon Sep 17 00:00:00 2001 From: Daniel Berteaud Date: Mon, 20 Nov 2023 00:21:06 +0100 Subject: [PATCH] Paperless bundle --- bundles.yml | 4 + consul/config/service-defaults/paperless.hcl | 3 + .../config/service-intentions/paperless.hcl | 16 ++ images/paperless-ngx/Dockerfile | 115 ++++++++++ .../root/entrypoint.d/10-mkdir.sh | 7 + .../root/entrypoint.d/20-migrate.sh | 21 ++ .../paperless-ngx/root/etc/supervisord.conf | 50 ++++ .../root/usr/local/bin/paperless | 22 ++ init/vault-database | 8 + paperless-ngx.nomad.hcl | 216 ++++++++++++++++++ prep.d/10-mv-conf.sh | 1 + prep.d/10-rand-pwd.sh | 17 ++ variables.yml | 110 +++++++++ vault/policies/paperless.hcl | 7 + 14 files changed, 597 insertions(+) create mode 100644 bundles.yml create mode 100644 consul/config/service-defaults/paperless.hcl create mode 100644 consul/config/service-intentions/paperless.hcl create mode 100644 images/paperless-ngx/Dockerfile create mode 100755 images/paperless-ngx/root/entrypoint.d/10-mkdir.sh create mode 100755 images/paperless-ngx/root/entrypoint.d/20-migrate.sh create mode 100644 images/paperless-ngx/root/etc/supervisord.conf create mode 100755 images/paperless-ngx/root/usr/local/bin/paperless create mode 100755 init/vault-database create mode 100644 paperless-ngx.nomad.hcl create mode 100755 prep.d/10-mv-conf.sh create mode 100755 prep.d/10-rand-pwd.sh create mode 100644 variables.yml create mode 100644 vault/policies/paperless.hcl diff --git a/bundles.yml b/bundles.yml new file mode 100644 index 0000000..5b9120e --- /dev/null +++ b/bundles.yml @@ -0,0 +1,4 @@ +--- + +dependencies: + - url: ../common.git diff --git a/consul/config/service-defaults/paperless.hcl b/consul/config/service-defaults/paperless.hcl new file mode 100644 index 0000000..2176512 --- /dev/null +++ b/consul/config/service-defaults/paperless.hcl @@ -0,0 +1,3 @@ +Kind = "service-defaults" +Name = "[[ .paperless.instance ]][[ .consul.suffix ]]" +Protocol = "http" diff --git a/consul/config/service-intentions/paperless.hcl b/consul/config/service-intentions/paperless.hcl new file mode 100644 index 0000000..a7643ed --- /dev/null +++ b/consul/config/service-intentions/paperless.hcl @@ -0,0 +1,16 @@ +Kind = "service-intentions" +Name = "[[ .paperless.instance ]][[ .consul.suffix ]]" +Sources = [ + { + Name = "[[ .traefik.instance ]]" + Permissions = [ + { + Action = "allow" + HTTP { + PathPrefix = "/" + Methods = ["GET", "HEAD", "POST", "OPTIONS", "PUT", "DELETE", "PATCH"] + } + } + ] + } +] diff --git a/images/paperless-ngx/Dockerfile b/images/paperless-ngx/Dockerfile new file mode 100644 index 0000000..a932c31 --- /dev/null +++ b/images/paperless-ngx/Dockerfile @@ -0,0 +1,115 @@ +# syntax=docker/dockerfile:labs + +FROM python:3.9-alpine AS builder + +ARG PAPERLESS_VERSION=1.17.4 + +WORKDIR /opt + +RUN set -euxo pipefail &&\ + apk --no-cache add \ + git \ + make \ + cmake \ + build-base \ + gfortran \ + mariadb-dev \ + libpq-dev \ + freetype-dev \ + imagemagick-dev \ + libxml2-dev \ + openblas-dev \ + qpdf-dev \ + tar \ + xz \ + curl \ + ca-certificates \ + rdfind \ + &&\ + curl -sSLO https://github.com/paperless-ngx/paperless-ngx/releases/download/v${PAPERLESS_VERSION}/paperless-ngx-v${PAPERLESS_VERSION}.tar.xz &&\ + tar xvJf paperless-ngx-v${PAPERLESS_VERSION}.tar.xz &&\ + rm paperless-ngx-v${PAPERLESS_VERSION}.tar.xz &&\ + python3 -m venv venv &&\ + source venv/bin/activate &&\ + mv paperless-ngx paperless &&\ + cd paperless &&\ + pip --no-cache-dir install -r requirements.txt &&\ + ./src/manage.py collectstatic --no-input &&\ + rdfind /opt + +FROM python:3.9-alpine +MAINTAINER [[ .docker.maintainer ]] + +ENV LANG=[[ .locale.lang ]] \ + TZ=[[ .locale.tz ]] \ + PAPERLESS_MODE=all-in-one \ + PATH=/opt/venv/bin:${PATH} \ + PAPERLESS_REDIS=redis://127.0.0.1:6379/0 \ + PAPERLESS_DBENGINE=postgresql \ + PAPERLESS_DBHOST=127.0.0.1 \ + PAPERLESS_DBPORT=5432 \ + PAPERLESS_DBNAME=paperless \ + PAPERLESS_DBUSER=paperless \ + PAPERLESS_DBPASS=paperless \ + PAPERLESS_CONSUMPTION_DIR=/input \ + PAPERLESS_CONSUMER_POLLING=60 \ + PAPERLESS_DATA_DIR=/data \ + PAPERLESS_MEDIA_ROOT=/data \ + PAPERLESS_TRASH_DIR=/data/trash \ + PAPERLESS_ADMIN_USER=admin \ + PAPERLESS_ADMIN_PASSWORD=password \ + PAPERLESS_STATICDIR=/opt/paperless/static \ + PAPERLESS_FILENAME_FORMAT={created_year}/{created_month}/{title} \ + PAPERLESS_SECRET_KEY=changeme \ + PAPERLESS_URL=https://paperless.example.org \ + PAPERLESS_OCR_LANGUAGE=fra \ + PAPERLESS_TIME_ZONE=[[ .locale.tz ]] \ + PAPERLESS_CONSUMER_DELETE_DUPLICATES=true \ + PAPERLESS_CONSUMER_RECURSIVE=true \ + PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=true \ + PAPERLESS_CONSUMER_POLLING=60 \ + PAPERLESS_WEBSERVER_WORKERS=1 \ + PAPERLESS_BIND_ADDR=0.0.0.0 \ + PAPERLESS_PORT=8994 \ + PAPERLESS_ADMIN_USER=admin \ + PAPERLESS_ADMIN_PASSWORD=admin \ + PAPERLESS_ADMIN_EMAIL=admin@localhost + +ADD https://git.lapiole.org/nomad/base_tools.git#master / +COPY --from=builder /opt /opt + +RUN set -euxo pipefail &&\ + apk --no-cache add \ + tini \ + unpaper \ + font-liberation \ + tesseract-ocr \ + tesseract-ocr-data-osd \ + tesseract-ocr-data-eng \ + tesseract-ocr-data-fra \ + optipng \ + libpq \ + zbar \ + poppler-utils \ + gnupg \ + imagemagick \ + ghostscript \ + qpdf \ + leptonica \ + libxml2 \ + libmagic \ + pngquant \ + zlib \ + supervisor \ + &&\ + addgroup --gid 8994 paperless &&\ + adduser --system --ingroup paperless --disabled-password --uid 8994 --home /opt/paperless --shell /sbin/nologin paperless &&\ + mkdir -p /data /input &&\ + chown paperless:paperless /data /input + +COPY root/ / + +EXPOSE ${PAPERLESS_PORT} +USER paperless +ENTRYPOINT ["tini", "--", "/entrypoint.sh"] +CMD ["paperless"] diff --git a/images/paperless-ngx/root/entrypoint.d/10-mkdir.sh b/images/paperless-ngx/root/entrypoint.d/10-mkdir.sh new file mode 100755 index 0000000..179fc54 --- /dev/null +++ b/images/paperless-ngx/root/entrypoint.d/10-mkdir.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -euo pipefail + +for DIR in /data /data/trash /data/media /data/tmp; do + mkdir -p ${DIR} +done diff --git a/images/paperless-ngx/root/entrypoint.d/20-migrate.sh b/images/paperless-ngx/root/entrypoint.d/20-migrate.sh new file mode 100755 index 0000000..679b1d3 --- /dev/null +++ b/images/paperless-ngx/root/entrypoint.d/20-migrate.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +set -euo pipefail + +if [ "${PAPERLESS_MODE}" != "all-in-one" -a "${PAPERLESS_MODE}" != "webserver" ]; then + echo "Not running migration" + exit 0 +fi + +source /opt/venv/bin/activate +echo "Migrating database" +cd /opt/paperless/src +./manage.py migrate + +if [ -n "${PAPERLESS_ADMIN_USER}" -a -n "${PAPERLESS_ADMIN_PASSWORD}" -a -n "${PAPERLESS_ADMIN_EMAIL}" ]; then + echo "Creating admin user ${PAPERLESS_ADMIN_USER}" + export DJANGO_SUPERUSER_PASSWORD="${PAPERLESS_ADMIN_PASSWORD}" + ./manage.py createsuperuser --noinput --username ${PAPERLESS_ADMIN_USER} --email ${PAPERLESS_ADMIN_EMAIL} ||\ + echo "Failed to create user ${PAPERLESS_ADMIN_USER} (maybe it already exists ?)" +fi + diff --git a/images/paperless-ngx/root/etc/supervisord.conf b/images/paperless-ngx/root/etc/supervisord.conf new file mode 100644 index 0000000..ff6d6b7 --- /dev/null +++ b/images/paperless-ngx/root/etc/supervisord.conf @@ -0,0 +1,50 @@ +[supervisord] +pidfile=/tmp/supervisord.pi +nodaemon=true +logfile=/dev/stdout +logfile_maxbytes=0 + +[program:webserver] +command=/opt/venv/bin/gunicorn -c /opt/paperless/gunicorn.conf.py --bind=%(ENV_PAPERLESS_BIND_ADDR)s:%(ENV_PAPERLESS_PORT)s paperless.asgi:application +stdout_logfile=/proc/self/fd/1 +stdout_logfile_backups=0 +stdout_logfile_maxbytes=0 +stderr_logfile=/proc/self/fd/2 +stderr_logfile_backups=0 +stderr_logfile_maxbytes=0 +autostart=true +autorestart=true + +[program:consumer] +command=/opt/venv/bin/python3 manage.py document_consumer +stdout_logfile=/proc/self/fd/1 +stdout_logfile_backups=0 +stdout_logfile_maxbytes=0 +stderr_logfile=/proc/self/fd/2 +stderr_logfile_backups=0 +stderr_logfile_maxbytes=0 +autostart=true +autorestart=true + +[program:scheduler] +command=/opt/venv/bin/celery --app paperless beat --loglevel INFO +stdout_logfile=/proc/self/fd/1 +stdout_logfile_backups=0 +stdout_logfile_maxbytes=0 +stderr_logfile=/proc/self/fd/2 +stderr_logfile_backups=0 +stderr_logfile_maxbytes=0 +autostart=true +autorestart=true + +[program:task-queue] +command=/opt/venv/bin/celery --app paperless worker --loglevel INFO +stdout_logfile=/proc/self/fd/1 +stdout_logfile_backups=0 +stdout_logfile_maxbytes=0 +stderr_logfile=/proc/self/fd/2 +stderr_logfile_backups=0 +stderr_logfile_maxbytes=0 +autostart=true +autorestart=true + diff --git a/images/paperless-ngx/root/usr/local/bin/paperless b/images/paperless-ngx/root/usr/local/bin/paperless new file mode 100755 index 0000000..3c057f6 --- /dev/null +++ b/images/paperless-ngx/root/usr/local/bin/paperless @@ -0,0 +1,22 @@ +#!/bin/sh + +set -euo pipefail + +source /opt/venv/bin/activate + +cd /opt/paperless/src + +if [ "${PAPERLESS_MODE}" = "all-in-one" ]; then + exec supervisor -c /etc/supervisord.conf -n +elif [ "${PAPERLESS_MODE}" = "webserver" ]; then + exec /opt/venv/bin/gunicorn \ + -c /opt/paperless/gunicorn.conf.py \ + --bind=${PAPERLESS_BIND_ADDR}:${PAPERLESS_PORT} \ + paperless.asgi:application +elif [ "${PAPERLESS_MODE}" = "consumer" ]; then + exec /opt/venv/bin/python3 manage.py document_consumer +elif [ "${PAPERLESS_MODE}" = "scheduler" ]; then + exec /opt/venv/bin/celery --app paperless beat --loglevel INFO +elif [ "${PAPERLESS_MODE}" = "task-queue" ]; then + exec /opt/venv/bin/celery --app paperless worker --loglevel INFO +fi diff --git a/init/vault-database b/init/vault-database new file mode 100755 index 0000000..eeeca8a --- /dev/null +++ b/init/vault-database @@ -0,0 +1,8 @@ +#!/bin/sh + +set -euo pipefail + +[[- template "common/vault.mkpgrole.sh.tpl" + dict "ctx" . + "config" (dict "role" .paperless.instance "database" "postgres") +]] diff --git a/paperless-ngx.nomad.hcl b/paperless-ngx.nomad.hcl new file mode 100644 index 0000000..2f3d554 --- /dev/null +++ b/paperless-ngx.nomad.hcl @@ -0,0 +1,216 @@ +[[ $c := merge .paperless . -]] + +job "[[ .paperless.instance ]]" { + +[[ template "common/job_start.tpl" $c ]] + + group "paperless" { + network { + mode = "bridge" + } + + volume "data" { + type = "[[ .paperless.volumes.data.type ]]" + source = "[[ .paperless.volumes.data.source ]]" +[[- if ne .paperless.volumes.data.type "host" ]] + access_mode = "single-node-writer" + attachment_mode = "file-system" +[[- end ]] + } + +[[- if .paperless.consumer.enabled ]] + volume "input" { + type = "[[ .paperless.volumes.input.type ]]" + source = "[[ .paperless.volumes.input.source ]]" + [[- if ne .paperless.volumes.input.type "host" ]] + access_mode = "single-node-writer" + attachment_mode = "file-system" + [[- end ]] + } +[[- end ]] + + service { + name = "[[ .paperless.instance ]][[ .consul.suffix ]]" + port = 8994 + +[[ template "common/connect.tpl" $c ]] + +[[ $c = merge .paperless.webserver . ]] + tags = [ + "[[ $c.traefik.instance ]].enable=[[ if $c.traefik.enabled ]]true[[ else ]]false[[ end ]]", + "[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].rule=Host(`[[ (urlParse $c.public_url).Hostname ]]`) + [[- if not (regexp.Match "^/?$" (urlParse $c.public_url).Path) ]] && PathPrefix(`[[ (urlParse $c.public_url).Path ]]`)[[ end ]]", + "[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].entrypoints=[[ join $c.traefik.entrypoints "," ]]", +[[- if not (regexp.Match "^/?$" (urlParse $c.public_url).Path) ]] + "[[ $c.traefik.instance ]].http.middlewares.[[ .paperless.instance ]][[ .consul.suffix ]]-prefix.stripprefix.prefixes=[[ (urlParse .paperless.public_url).Path ]]", + "[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].middlewares=[[ .paperless.instance ]][[ $c.consul.suffix ]]-prefix,[[ template "common/traefik_middlewares.tpl" $c.traefik ]]", +[[- else ]] + "[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].middlewares=[[ template "common/traefik_middlewares.tpl" $c.traefik ]]", +[[- end ]] + ] + } + +[[ template "common/task.redis.tpl" ]] + + task "webserver" { + driver = [[ $c.nomad.driver | toJSON ]] + + config { + image = [[ $c.image | toJSON ]] + readonly_rootfs = true + pids_limit = 100 + } + + vault { + policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"] + env = false + disable_file = true + } + + env { + PAPERLESS_MODE = "webserver" + PAPERLESS_BIND_ADDR = "127.0.0.1" + TMPDIR = "/alloc/tmp" + } + +[[ template "common/file_env.tpl" merge $c.env .paperless.env ]] + + volume_mount { + volume = "data" + destination = "/data" + } + + volume_mount { + volume = "input" + destination = "/input" + } + +[[ template "common/resources.tpl" $c.resources ]] + } + +[[- if .paperless.consumer.enabled ]] + [[ $c := merge .paperless.consumer . ]] + task "consumer" { + driver = [[ $c.nomad.driver | toJSON ]] + + lifecycle { + hook = "prestart" + sidecar = true + } + + config { + image = [[ $c.image | toJSON ]] + readonly_rootfs = true + pids_limit = 100 + } + + vault { + policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"] + env = false + disable_file = true + } + + env { + PAPERLESS_MODE = "consumer" + TMPDIR = "/alloc/tmp" + } + + [[ template "common/file_env.tpl" merge $c.env .paperless.env ]] + + volume_mount { + volume = "data" + destination = "/data" + } + + volume_mount { + volume = "input" + destination = "/input" + } + + [[ template "common/resources.tpl" $c.resources ]] + } +[[- end ]] + +[[ $c := merge .paperless.scheduler . ]] + task "scheduler" { + driver = [[ $c.nomad.driver | toJSON ]] + + lifecycle { + hook = "prestart" + sidecar = true + } + + config { + image = [[ $c.image | toJSON ]] + readonly_rootfs = true + pids_limit = 100 + } + + vault { + policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"] + env = false + disable_file = true + } + + env { + PAPERLESS_MODE = "scheduler" + TMPDIR = "/alloc/tmp" + } + +[[ template "common/file_env.tpl" merge $c.env .paperless.env ]] + + volume_mount { + volume = "data" + destination = "/data" + } + + volume_mount { + volume = "input" + destination = "/input" + } + +[[ template "common/resources.tpl" $c.resources ]] + } + +[[ $c := merge .paperless.task_queue . ]] + task "task-queue" { + driver = [[ $c.nomad.driver | toJSON ]] + + lifecycle { + hook = "prestart" + sidecar = true + } + + config { + image = [[ $c.image | toJSON ]] + readonly_rootfs = true + pids_limit = 300 + } + + vault { + policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"] + env = false + disable_file = true + } + + env { + PAPERLESS_MODE = "task-queue" + TMPDIR = "/alloc/tmp" + } + +[[ template "common/file_env.tpl" merge $c.env .paperless.env ]] + + volume_mount { + volume = "data" + destination = "/data" + } + + volume_mount { + volume = "input" + destination = "/input" + } + +[[ template "common/resources.tpl" $c.resources ]] + } + } +} diff --git a/prep.d/10-mv-conf.sh b/prep.d/10-mv-conf.sh new file mode 100755 index 0000000..50e2451 --- /dev/null +++ b/prep.d/10-mv-conf.sh @@ -0,0 +1 @@ +[[ template "common/mv_conf.sh.tpl" dict "ctx" . "services" (dict "paperless" .paperless.instance) ]] diff --git a/prep.d/10-rand-pwd.sh b/prep.d/10-rand-pwd.sh new file mode 100755 index 0000000..14145fd --- /dev/null +++ b/prep.d/10-rand-pwd.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +set -euo pipefail + +# Initialize random passwords if needed + +if ! vault kv list [[ .vault.prefix ]]kv/service 2>/dev/null | grep -q -E '^[[ .paperless.instance ]]$'; then + vault kv put [[ .vault.prefix ]]kv/service/[[ .paperless.instance ]] \ + secret_key=$(pwgen -s -n 50 1) +fi + +for PWD in secret_key; do + if ! vault kv get -field ${PWD} [[ .vault.prefix ]]kv/service/[[ .paperless.instance ]] >/dev/null 2>&1; then + vault kv patch [[ .vault.prefix ]]kv/service/[[ .paperless.instance ]] \ + ${PWD}=$(pwgen -s -n 50 1) + fi +done diff --git a/variables.yml b/variables.yml new file mode 100644 index 0000000..86c8b16 --- /dev/null +++ b/variables.yml @@ -0,0 +1,110 @@ +--- + +paperless: + + # Name of the instance (job and service name) + instance: paperless + + # Connect to the postgres service through the mesh + consul: + connect: + upstreams: + - destination_name: postgres[[ .consul.suffix ]] + local_bind_port: 5432 + + # Env var to set in the containers + # The ones here will be inherited by all containers + env: + PAPERLESS_DBUSER: '{{ with secret "[[ .vault.prefix ]]database/creds/[[ .paperless.instance ]]" }}{{ .Data.username }}{{ end }}' + PAPERLESS_DBPASS: '{{ with secret "[[ .vault.prefix ]]database/creds/[[ .paperless.instance ]]" }}{{ .Data.password }}{{ end }}' + PAPERLESS_SECRET_KEY: '{{ with secret "[[ .vault.prefix ]]kv/service/[[ .paperless.instance ]]" }}{{ .Data.data.secret_key }}{{ end }}' + PAPERLESS_DBNAME: '[[ .paperless.instance ]]' + PAPERLESS_CORS_ALLOWED_HOSTS: '[[ .paperless.webserver.public_url ]]' + PAPERLESS_URL: '[[ .paperless.webserver.public_url ]]' + PAPERLESS_CONVERT_TMPDIR: /alloc/data + PAPERLESS_USE_X_FORWARD_HOST: true + PAPERLESS_PROXY_SSL_HEADER: "'[\"HTTP_X_FORWARDED_PROTO\", \"https\"]'" + PAPERLESS_ENABLE_COMPRESSION: false + PAPERLESS_TRUSTED_PROXIES: 127.0.0.1 + + # This is the main task + webserver: + + # The Docker image to use + image: danielberteaud/paperless-ngx:1.17.4-1 + + # Env vars to set in the container + env: {} + + # Resource allocation + resources: + cpu: 300 + memory: 256 + + # The URL where BounCA will be available to users + public_url: https://paperless.example.org + + # Traefik settings + traefik: + enabled: true + + # The consumer will watch a folder (/input, where a volume is mounted) and + # import any files found. + consumer: + + # You can disable the consumer if you do not use it, it'll save some resources + enabled: true + + # The Docker image to use + image: '[[ .paperless.webserver.image ]]' + + # Env vars to set in the container + env: {} + + # Resource allocation + resources: + cpu: 100 + memory: 150 + + # Task scheduler + scheduler: + + # The Docker image to use + image: '[[ .paperless.webserver.image ]]' + + # Env vars to set in the container + env: {} + + # Resource allocation + resources: + cpu: 100 + memory: 200 + + # The worker doing the document processing + task_queue: + + # The Docker image to use + image: '[[ .paperless.webserver.image ]]' + + # Env vars to set in the container + env: {} + + # Resource allocation + resources: + cpu: 500 + memory: 384 + + # Volumes for data persistance and exchange + volumes: + + # This is the main volume where paperless keeps your documents + data: + type: csi + source: paperless-data + + # This is a volume paperless will watch (with the consumer task) and + # any document found in it will be imported. If the consumer is disabled + # you can omit this volumes as it'll not be added to the job + input: + type: csi + source: paperless-input diff --git a/vault/policies/paperless.hcl b/vault/policies/paperless.hcl new file mode 100644 index 0000000..a5cd8f1 --- /dev/null +++ b/vault/policies/paperless.hcl @@ -0,0 +1,7 @@ +path "[[ .vault.prefix ]]kv/data/service/[[ .paperless.instance ]]" { + capabilities = ["read"] +} + +path "[[ .vault.prefix ]]database/creds/[[ .paperless.instance ]]" { + capabilities = ["read"] +}