From 714b11a2cfe261a19c3c4f099f67aa8b30dd3370 Mon Sep 17 00:00:00 2001 From: Daniel Berteaud Date: Mon, 7 Oct 2013 08:46:39 +0200 Subject: [PATCH] First try to index PDF with pdftotext, and fallback to gs if pdftotext cannot extract any usefull information --- root/usr/bin/ajxppdftotext | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/root/usr/bin/ajxppdftotext b/root/usr/bin/ajxppdftotext index 6b6de45..2a2eb2e 100644 --- a/root/usr/bin/ajxppdftotext +++ b/root/usr/bin/ajxppdftotext @@ -1,2 +1,6 @@ #!/bin/bash -gs -q -dNODISPLAY -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE -c save -f ps2ascii.ps $1 -c quit 2>/dev/null +RES=$(/usr/bin/pdftotext -eol unix -layout -nopgbrk "$1" -) +if [ -z "$RES" ]; then + RES=$(/usr/bin/gs -q -P- -dNODISPLAY -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE -f ps2ascii.ps "$1" -c quit 2>/dev/null | iconv -f ISO8859-1 -t UTF-8) +fi +echo "$RES"