From 37ce695501eef02cb6c82275433c6b7488ed3a41 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Thu, 16 Dec 2021 17:09:54 +0300 Subject: [PATCH] Multipaged pdftotext --- recoll/bin/pdftotext.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/recoll/bin/pdftotext.sh b/recoll/bin/pdftotext.sh index f2fa3b3..1775278 100755 --- a/recoll/bin/pdftotext.sh +++ b/recoll/bin/pdftotext.sh @@ -2,5 +2,9 @@ tmp=`mktemp` trap "rm -f $tmp" HUP PIPE INT QUIT TERM EXIT -mutool convert -F text -o $tmp "$1" -cat $tmp +pages=`mutool info "$1" | sed -n 's/Pages: //p'` +for page in `seq $pages` ; do + mutool convert -F text -o $tmp "$1" $page 2>/dev/null + cat $tmp + printf '\14\n' # ^L +done -- 2.44.0