blob: 39fb46123684ebe8769c460ca8db8e3eb983fb6c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
#!/bin/bash
# SPDX-License-Identifier: GPL-3.0-or-later
# Copyright (C) 2022 Alexander Vogt
# Author: Alexander Vogt <a.vogt@fulguritus.com>
#
# Sample postprocessing script for gnome-simple-scan for OCR in PDFs
#
# This script first identifies a suitable instance of ocrmypdf
# (https://github.com/ocrmypdf/OCRmyPDF) and then applies this as a
# postprocessing step to PDFs generated by simple-scan.
#
# Usage:
# =====
# simple-scan-postprocessing mime-type keep-origin input-file args
#
# Currently, only mime-type "application/pdf" is supported, the script will
# exit without an error if "image/jpeg", "image/png", or "image/webp" is
# provided. Any other mime-type results in an error.
# All args are provided to ocrmypdf.
# If keep-origin is set to "true", a copy of the source file is kept.
#
# Example:
# =======
# simple-scan-postprocessing application/pdf true scan.pdf -l eng+deu
# simple-scan-postprocessing application/pdf true scan.pdf -rcd --jbig2-lossy -l deu
#
set -e +m
# Arguments
mime_type="$1"
keep_original="$2"
target="$3"
remainder="${@:4}"
# Globals
_ocrmypdfcontainer="jbarlow83/ocrmypdf"
source="${target%.*}_orig.${target##*.}"
# Helper functions
function findOcrMyPdf() {
# Determines the path of ocrmypdf in the following order:
# 1. ocrmypdf from the $PATH (local installation)
# 2. ocrmypdf through podman (if podman in $PATH)
# 3. ocrmypdf through docker (if podman in $PATH)
_ocrmypdf=$(which ocrmypdf) && return
_ocrmypdf="$(which podman) run --rm -i ${_ocrmypdfcontainer} " && return
_ocrmypdf="$(which docker) run --rm -i ${_ocrmypdfcontainer} "
if [ $? -ne 0 ]; then
echo "No suitable instance of ocrmypdf found. Please check your setup. "
exit 1
fi
}
case ${mime_type} in
"application/pdf")
mv "$target" "$source" # create a backup
# Determine the version of ocrmypdf to use
findOcrMyPdf
# Execute OCR
${_ocrmypdf} ${remainder} - - <"$source" >"$target"
;;
"image/jpeg")
exit 0 # Nothing implemented
;;
"image/png")
exit 0 # Nothing implemented
;;
"image/webp")
exit 0 # Nothing implemented
;;
*)
echo "Unsupported mime-type \"${mime_type}\""
exit 1
;;
esac
# Clean up
if [ "$keep_original" == "true" ]; then
exit 0
else
rm "$source"
fi
|