summaryrefslogtreecommitdiff
path: root/src/simple-scan-postprocessing.sh
diff options
context:
space:
mode:
Diffstat (limited to 'src/simple-scan-postprocessing.sh')
-rwxr-xr-xsrc/simple-scan-postprocessing.sh83
1 files changed, 83 insertions, 0 deletions
diff --git a/src/simple-scan-postprocessing.sh b/src/simple-scan-postprocessing.sh
new file mode 100755
index 0000000..39fb461
--- /dev/null
+++ b/src/simple-scan-postprocessing.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0-or-later
+# Copyright (C) 2022 Alexander Vogt
+# Author: Alexander Vogt <a.vogt@fulguritus.com>
+#
+# Sample postprocessing script for gnome-simple-scan for OCR in PDFs
+#
+# This script first identifies a suitable instance of ocrmypdf
+# (https://github.com/ocrmypdf/OCRmyPDF) and then applies this as a
+# postprocessing step to PDFs generated by simple-scan.
+#
+# Usage:
+# =====
+# simple-scan-postprocessing mime-type keep-origin input-file args
+#
+# Currently, only mime-type "application/pdf" is supported, the script will
+# exit without an error if "image/jpeg", "image/png", or "image/webp" is
+# provided. Any other mime-type results in an error.
+# All args are provided to ocrmypdf.
+# If keep-origin is set to "true", a copy of the source file is kept.
+#
+# Example:
+# =======
+# simple-scan-postprocessing application/pdf true scan.pdf -l eng+deu
+# simple-scan-postprocessing application/pdf true scan.pdf -rcd --jbig2-lossy -l deu
+#
+set -e +m
+
+# Arguments
+mime_type="$1"
+keep_original="$2"
+target="$3"
+remainder="${@:4}"
+# Globals
+_ocrmypdfcontainer="jbarlow83/ocrmypdf"
+
+source="${target%.*}_orig.${target##*.}"
+
+# Helper functions
+function findOcrMyPdf() {
+ # Determines the path of ocrmypdf in the following order:
+ # 1. ocrmypdf from the $PATH (local installation)
+ # 2. ocrmypdf through podman (if podman in $PATH)
+ # 3. ocrmypdf through docker (if podman in $PATH)
+ _ocrmypdf=$(which ocrmypdf) && return
+ _ocrmypdf="$(which podman) run --rm -i ${_ocrmypdfcontainer} " && return
+ _ocrmypdf="$(which docker) run --rm -i ${_ocrmypdfcontainer} "
+ if [ $? -ne 0 ]; then
+ echo "No suitable instance of ocrmypdf found. Please check your setup. "
+ exit 1
+ fi
+}
+
+case ${mime_type} in
+ "application/pdf")
+ mv "$target" "$source" # create a backup
+
+ # Determine the version of ocrmypdf to use
+ findOcrMyPdf
+ # Execute OCR
+ ${_ocrmypdf} ${remainder} - - <"$source" >"$target"
+ ;;
+ "image/jpeg")
+ exit 0 # Nothing implemented
+ ;;
+ "image/png")
+ exit 0 # Nothing implemented
+ ;;
+ "image/webp")
+ exit 0 # Nothing implemented
+ ;;
+ *)
+ echo "Unsupported mime-type \"${mime_type}\""
+ exit 1
+ ;;
+esac
+
+# Clean up
+if [ "$keep_original" == "true" ]; then
+ exit 0
+else
+ rm "$source"
+fi