Paperless bundle

This commit is contained in:
Daniel Berteaud 2023-11-20 00:21:06 +01:00
parent 5fe0ae20de
commit 6db813b7c8
14 changed files with 597 additions and 0 deletions

4
bundles.yml Normal file
View File

@ -0,0 +1,4 @@
---
dependencies:
- url: ../common.git

View File

@ -0,0 +1,3 @@
Kind = "service-defaults"
Name = "[[ .paperless.instance ]][[ .consul.suffix ]]"
Protocol = "http"

View File

@ -0,0 +1,16 @@
Kind = "service-intentions"
Name = "[[ .paperless.instance ]][[ .consul.suffix ]]"
Sources = [
{
Name = "[[ .traefik.instance ]]"
Permissions = [
{
Action = "allow"
HTTP {
PathPrefix = "/"
Methods = ["GET", "HEAD", "POST", "OPTIONS", "PUT", "DELETE", "PATCH"]
}
}
]
}
]

View File

@ -0,0 +1,115 @@
# syntax=docker/dockerfile:labs
FROM python:3.9-alpine AS builder
ARG PAPERLESS_VERSION=1.17.4
WORKDIR /opt
RUN set -euxo pipefail &&\
apk --no-cache add \
git \
make \
cmake \
build-base \
gfortran \
mariadb-dev \
libpq-dev \
freetype-dev \
imagemagick-dev \
libxml2-dev \
openblas-dev \
qpdf-dev \
tar \
xz \
curl \
ca-certificates \
rdfind \
&&\
curl -sSLO https://github.com/paperless-ngx/paperless-ngx/releases/download/v${PAPERLESS_VERSION}/paperless-ngx-v${PAPERLESS_VERSION}.tar.xz &&\
tar xvJf paperless-ngx-v${PAPERLESS_VERSION}.tar.xz &&\
rm paperless-ngx-v${PAPERLESS_VERSION}.tar.xz &&\
python3 -m venv venv &&\
source venv/bin/activate &&\
mv paperless-ngx paperless &&\
cd paperless &&\
pip --no-cache-dir install -r requirements.txt &&\
./src/manage.py collectstatic --no-input &&\
rdfind /opt
FROM python:3.9-alpine
MAINTAINER [[ .docker.maintainer ]]
ENV LANG=[[ .locale.lang ]] \
TZ=[[ .locale.tz ]] \
PAPERLESS_MODE=all-in-one \
PATH=/opt/venv/bin:${PATH} \
PAPERLESS_REDIS=redis://127.0.0.1:6379/0 \
PAPERLESS_DBENGINE=postgresql \
PAPERLESS_DBHOST=127.0.0.1 \
PAPERLESS_DBPORT=5432 \
PAPERLESS_DBNAME=paperless \
PAPERLESS_DBUSER=paperless \
PAPERLESS_DBPASS=paperless \
PAPERLESS_CONSUMPTION_DIR=/input \
PAPERLESS_CONSUMER_POLLING=60 \
PAPERLESS_DATA_DIR=/data \
PAPERLESS_MEDIA_ROOT=/data \
PAPERLESS_TRASH_DIR=/data/trash \
PAPERLESS_ADMIN_USER=admin \
PAPERLESS_ADMIN_PASSWORD=password \
PAPERLESS_STATICDIR=/opt/paperless/static \
PAPERLESS_FILENAME_FORMAT={created_year}/{created_month}/{title} \
PAPERLESS_SECRET_KEY=changeme \
PAPERLESS_URL=https://paperless.example.org \
PAPERLESS_OCR_LANGUAGE=fra \
PAPERLESS_TIME_ZONE=[[ .locale.tz ]] \
PAPERLESS_CONSUMER_DELETE_DUPLICATES=true \
PAPERLESS_CONSUMER_RECURSIVE=true \
PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=true \
PAPERLESS_CONSUMER_POLLING=60 \
PAPERLESS_WEBSERVER_WORKERS=1 \
PAPERLESS_BIND_ADDR=0.0.0.0 \
PAPERLESS_PORT=8994 \
PAPERLESS_ADMIN_USER=admin \
PAPERLESS_ADMIN_PASSWORD=admin \
PAPERLESS_ADMIN_EMAIL=admin@localhost
ADD https://git.lapiole.org/nomad/base_tools.git#master /
COPY --from=builder /opt /opt
RUN set -euxo pipefail &&\
apk --no-cache add \
tini \
unpaper \
font-liberation \
tesseract-ocr \
tesseract-ocr-data-osd \
tesseract-ocr-data-eng \
tesseract-ocr-data-fra \
optipng \
libpq \
zbar \
poppler-utils \
gnupg \
imagemagick \
ghostscript \
qpdf \
leptonica \
libxml2 \
libmagic \
pngquant \
zlib \
supervisor \
&&\
addgroup --gid 8994 paperless &&\
adduser --system --ingroup paperless --disabled-password --uid 8994 --home /opt/paperless --shell /sbin/nologin paperless &&\
mkdir -p /data /input &&\
chown paperless:paperless /data /input
COPY root/ /
EXPOSE ${PAPERLESS_PORT}
USER paperless
ENTRYPOINT ["tini", "--", "/entrypoint.sh"]
CMD ["paperless"]

View File

@ -0,0 +1,7 @@
#!/bin/sh
set -euo pipefail
for DIR in /data /data/trash /data/media /data/tmp; do
mkdir -p ${DIR}
done

View File

@ -0,0 +1,21 @@
#!/bin/sh
set -euo pipefail
if [ "${PAPERLESS_MODE}" != "all-in-one" -a "${PAPERLESS_MODE}" != "webserver" ]; then
echo "Not running migration"
exit 0
fi
source /opt/venv/bin/activate
echo "Migrating database"
cd /opt/paperless/src
./manage.py migrate
if [ -n "${PAPERLESS_ADMIN_USER}" -a -n "${PAPERLESS_ADMIN_PASSWORD}" -a -n "${PAPERLESS_ADMIN_EMAIL}" ]; then
echo "Creating admin user ${PAPERLESS_ADMIN_USER}"
export DJANGO_SUPERUSER_PASSWORD="${PAPERLESS_ADMIN_PASSWORD}"
./manage.py createsuperuser --noinput --username ${PAPERLESS_ADMIN_USER} --email ${PAPERLESS_ADMIN_EMAIL} ||\
echo "Failed to create user ${PAPERLESS_ADMIN_USER} (maybe it already exists ?)"
fi

View File

@ -0,0 +1,50 @@
[supervisord]
pidfile=/tmp/supervisord.pi
nodaemon=true
logfile=/dev/stdout
logfile_maxbytes=0
[program:webserver]
command=/opt/venv/bin/gunicorn -c /opt/paperless/gunicorn.conf.py --bind=%(ENV_PAPERLESS_BIND_ADDR)s:%(ENV_PAPERLESS_PORT)s paperless.asgi:application
stdout_logfile=/proc/self/fd/1
stdout_logfile_backups=0
stdout_logfile_maxbytes=0
stderr_logfile=/proc/self/fd/2
stderr_logfile_backups=0
stderr_logfile_maxbytes=0
autostart=true
autorestart=true
[program:consumer]
command=/opt/venv/bin/python3 manage.py document_consumer
stdout_logfile=/proc/self/fd/1
stdout_logfile_backups=0
stdout_logfile_maxbytes=0
stderr_logfile=/proc/self/fd/2
stderr_logfile_backups=0
stderr_logfile_maxbytes=0
autostart=true
autorestart=true
[program:scheduler]
command=/opt/venv/bin/celery --app paperless beat --loglevel INFO
stdout_logfile=/proc/self/fd/1
stdout_logfile_backups=0
stdout_logfile_maxbytes=0
stderr_logfile=/proc/self/fd/2
stderr_logfile_backups=0
stderr_logfile_maxbytes=0
autostart=true
autorestart=true
[program:task-queue]
command=/opt/venv/bin/celery --app paperless worker --loglevel INFO
stdout_logfile=/proc/self/fd/1
stdout_logfile_backups=0
stdout_logfile_maxbytes=0
stderr_logfile=/proc/self/fd/2
stderr_logfile_backups=0
stderr_logfile_maxbytes=0
autostart=true
autorestart=true

View File

@ -0,0 +1,22 @@
#!/bin/sh
set -euo pipefail
source /opt/venv/bin/activate
cd /opt/paperless/src
if [ "${PAPERLESS_MODE}" = "all-in-one" ]; then
exec supervisor -c /etc/supervisord.conf -n
elif [ "${PAPERLESS_MODE}" = "webserver" ]; then
exec /opt/venv/bin/gunicorn \
-c /opt/paperless/gunicorn.conf.py \
--bind=${PAPERLESS_BIND_ADDR}:${PAPERLESS_PORT} \
paperless.asgi:application
elif [ "${PAPERLESS_MODE}" = "consumer" ]; then
exec /opt/venv/bin/python3 manage.py document_consumer
elif [ "${PAPERLESS_MODE}" = "scheduler" ]; then
exec /opt/venv/bin/celery --app paperless beat --loglevel INFO
elif [ "${PAPERLESS_MODE}" = "task-queue" ]; then
exec /opt/venv/bin/celery --app paperless worker --loglevel INFO
fi

8
init/vault-database Executable file
View File

@ -0,0 +1,8 @@
#!/bin/sh
set -euo pipefail
[[- template "common/vault.mkpgrole.sh.tpl"
dict "ctx" .
"config" (dict "role" .paperless.instance "database" "postgres")
]]

216
paperless-ngx.nomad.hcl Normal file
View File

@ -0,0 +1,216 @@
[[ $c := merge .paperless . -]]
job "[[ .paperless.instance ]]" {
[[ template "common/job_start.tpl" $c ]]
group "paperless" {
network {
mode = "bridge"
}
volume "data" {
type = "[[ .paperless.volumes.data.type ]]"
source = "[[ .paperless.volumes.data.source ]]"
[[- if ne .paperless.volumes.data.type "host" ]]
access_mode = "single-node-writer"
attachment_mode = "file-system"
[[- end ]]
}
[[- if .paperless.consumer.enabled ]]
volume "input" {
type = "[[ .paperless.volumes.input.type ]]"
source = "[[ .paperless.volumes.input.source ]]"
[[- if ne .paperless.volumes.input.type "host" ]]
access_mode = "single-node-writer"
attachment_mode = "file-system"
[[- end ]]
}
[[- end ]]
service {
name = "[[ .paperless.instance ]][[ .consul.suffix ]]"
port = 8994
[[ template "common/connect.tpl" $c ]]
[[ $c = merge .paperless.webserver . ]]
tags = [
"[[ $c.traefik.instance ]].enable=[[ if $c.traefik.enabled ]]true[[ else ]]false[[ end ]]",
"[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].rule=Host(`[[ (urlParse $c.public_url).Hostname ]]`)
[[- if not (regexp.Match "^/?$" (urlParse $c.public_url).Path) ]] && PathPrefix(`[[ (urlParse $c.public_url).Path ]]`)[[ end ]]",
"[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].entrypoints=[[ join $c.traefik.entrypoints "," ]]",
[[- if not (regexp.Match "^/?$" (urlParse $c.public_url).Path) ]]
"[[ $c.traefik.instance ]].http.middlewares.[[ .paperless.instance ]][[ .consul.suffix ]]-prefix.stripprefix.prefixes=[[ (urlParse .paperless.public_url).Path ]]",
"[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].middlewares=[[ .paperless.instance ]][[ $c.consul.suffix ]]-prefix,[[ template "common/traefik_middlewares.tpl" $c.traefik ]]",
[[- else ]]
"[[ $c.traefik.instance ]].http.routers.[[ .paperless.instance ]][[ .consul.suffix ]].middlewares=[[ template "common/traefik_middlewares.tpl" $c.traefik ]]",
[[- end ]]
]
}
[[ template "common/task.redis.tpl" ]]
task "webserver" {
driver = [[ $c.nomad.driver | toJSON ]]
config {
image = [[ $c.image | toJSON ]]
readonly_rootfs = true
pids_limit = 100
}
vault {
policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"]
env = false
disable_file = true
}
env {
PAPERLESS_MODE = "webserver"
PAPERLESS_BIND_ADDR = "127.0.0.1"
TMPDIR = "/alloc/tmp"
}
[[ template "common/file_env.tpl" merge $c.env .paperless.env ]]
volume_mount {
volume = "data"
destination = "/data"
}
volume_mount {
volume = "input"
destination = "/input"
}
[[ template "common/resources.tpl" $c.resources ]]
}
[[- if .paperless.consumer.enabled ]]
[[ $c := merge .paperless.consumer . ]]
task "consumer" {
driver = [[ $c.nomad.driver | toJSON ]]
lifecycle {
hook = "prestart"
sidecar = true
}
config {
image = [[ $c.image | toJSON ]]
readonly_rootfs = true
pids_limit = 100
}
vault {
policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"]
env = false
disable_file = true
}
env {
PAPERLESS_MODE = "consumer"
TMPDIR = "/alloc/tmp"
}
[[ template "common/file_env.tpl" merge $c.env .paperless.env ]]
volume_mount {
volume = "data"
destination = "/data"
}
volume_mount {
volume = "input"
destination = "/input"
}
[[ template "common/resources.tpl" $c.resources ]]
}
[[- end ]]
[[ $c := merge .paperless.scheduler . ]]
task "scheduler" {
driver = [[ $c.nomad.driver | toJSON ]]
lifecycle {
hook = "prestart"
sidecar = true
}
config {
image = [[ $c.image | toJSON ]]
readonly_rootfs = true
pids_limit = 100
}
vault {
policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"]
env = false
disable_file = true
}
env {
PAPERLESS_MODE = "scheduler"
TMPDIR = "/alloc/tmp"
}
[[ template "common/file_env.tpl" merge $c.env .paperless.env ]]
volume_mount {
volume = "data"
destination = "/data"
}
volume_mount {
volume = "input"
destination = "/input"
}
[[ template "common/resources.tpl" $c.resources ]]
}
[[ $c := merge .paperless.task_queue . ]]
task "task-queue" {
driver = [[ $c.nomad.driver | toJSON ]]
lifecycle {
hook = "prestart"
sidecar = true
}
config {
image = [[ $c.image | toJSON ]]
readonly_rootfs = true
pids_limit = 300
}
vault {
policies = ["[[ .paperless.instance ]][[ .consul.suffix ]]"]
env = false
disable_file = true
}
env {
PAPERLESS_MODE = "task-queue"
TMPDIR = "/alloc/tmp"
}
[[ template "common/file_env.tpl" merge $c.env .paperless.env ]]
volume_mount {
volume = "data"
destination = "/data"
}
volume_mount {
volume = "input"
destination = "/input"
}
[[ template "common/resources.tpl" $c.resources ]]
}
}
}

1
prep.d/10-mv-conf.sh Executable file
View File

@ -0,0 +1 @@
[[ template "common/mv_conf.sh.tpl" dict "ctx" . "services" (dict "paperless" .paperless.instance) ]]

17
prep.d/10-rand-pwd.sh Executable file
View File

@ -0,0 +1,17 @@
#!/bin/sh
set -euo pipefail
# Initialize random passwords if needed
if ! vault kv list [[ .vault.prefix ]]kv/service 2>/dev/null | grep -q -E '^[[ .paperless.instance ]]$'; then
vault kv put [[ .vault.prefix ]]kv/service/[[ .paperless.instance ]] \
secret_key=$(pwgen -s -n 50 1)
fi
for PWD in secret_key; do
if ! vault kv get -field ${PWD} [[ .vault.prefix ]]kv/service/[[ .paperless.instance ]] >/dev/null 2>&1; then
vault kv patch [[ .vault.prefix ]]kv/service/[[ .paperless.instance ]] \
${PWD}=$(pwgen -s -n 50 1)
fi
done

110
variables.yml Normal file
View File

@ -0,0 +1,110 @@
---
paperless:
# Name of the instance (job and service name)
instance: paperless
# Connect to the postgres service through the mesh
consul:
connect:
upstreams:
- destination_name: postgres[[ .consul.suffix ]]
local_bind_port: 5432
# Env var to set in the containers
# The ones here will be inherited by all containers
env:
PAPERLESS_DBUSER: '{{ with secret "[[ .vault.prefix ]]database/creds/[[ .paperless.instance ]]" }}{{ .Data.username }}{{ end }}'
PAPERLESS_DBPASS: '{{ with secret "[[ .vault.prefix ]]database/creds/[[ .paperless.instance ]]" }}{{ .Data.password }}{{ end }}'
PAPERLESS_SECRET_KEY: '{{ with secret "[[ .vault.prefix ]]kv/service/[[ .paperless.instance ]]" }}{{ .Data.data.secret_key }}{{ end }}'
PAPERLESS_DBNAME: '[[ .paperless.instance ]]'
PAPERLESS_CORS_ALLOWED_HOSTS: '[[ .paperless.webserver.public_url ]]'
PAPERLESS_URL: '[[ .paperless.webserver.public_url ]]'
PAPERLESS_CONVERT_TMPDIR: /alloc/data
PAPERLESS_USE_X_FORWARD_HOST: true
PAPERLESS_PROXY_SSL_HEADER: "'[\"HTTP_X_FORWARDED_PROTO\", \"https\"]'"
PAPERLESS_ENABLE_COMPRESSION: false
PAPERLESS_TRUSTED_PROXIES: 127.0.0.1
# This is the main task
webserver:
# The Docker image to use
image: danielberteaud/paperless-ngx:1.17.4-1
# Env vars to set in the container
env: {}
# Resource allocation
resources:
cpu: 300
memory: 256
# The URL where BounCA will be available to users
public_url: https://paperless.example.org
# Traefik settings
traefik:
enabled: true
# The consumer will watch a folder (/input, where a volume is mounted) and
# import any files found.
consumer:
# You can disable the consumer if you do not use it, it'll save some resources
enabled: true
# The Docker image to use
image: '[[ .paperless.webserver.image ]]'
# Env vars to set in the container
env: {}
# Resource allocation
resources:
cpu: 100
memory: 150
# Task scheduler
scheduler:
# The Docker image to use
image: '[[ .paperless.webserver.image ]]'
# Env vars to set in the container
env: {}
# Resource allocation
resources:
cpu: 100
memory: 200
# The worker doing the document processing
task_queue:
# The Docker image to use
image: '[[ .paperless.webserver.image ]]'
# Env vars to set in the container
env: {}
# Resource allocation
resources:
cpu: 500
memory: 384
# Volumes for data persistance and exchange
volumes:
# This is the main volume where paperless keeps your documents
data:
type: csi
source: paperless-data
# This is a volume paperless will watch (with the consumer task) and
# any document found in it will be imported. If the consumer is disabled
# you can omit this volumes as it'll not be added to the job
input:
type: csi
source: paperless-input

View File

@ -0,0 +1,7 @@
path "[[ .vault.prefix ]]kv/data/service/[[ .paperless.instance ]]" {
capabilities = ["read"]
}
path "[[ .vault.prefix ]]database/creds/[[ .paperless.instance ]]" {
capabilities = ["read"]
}