First try to index PDF with pdftotext, and fallback to gs if pdftotext cannot extract any usefull information

This commit is contained in:
Daniel Berteaud 2013-10-07 08:46:39 +02:00
parent a54a1a593b
commit 714b11a2cf
1 changed files with 5 additions and 1 deletions

View File

@ -1,2 +1,6 @@
#!/bin/bash
gs -q -dNODISPLAY -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE -c save -f ps2ascii.ps $1 -c quit 2>/dev/null
RES=$(/usr/bin/pdftotext -eol unix -layout -nopgbrk "$1" -)
if [ -z "$RES" ]; then
RES=$(/usr/bin/gs -q -P- -dNODISPLAY -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE -f ps2ascii.ps "$1" -c quit 2>/dev/null | iconv -f ISO8859-1 -t UTF-8)
fi
echo "$RES"