diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2022-03-20 09:13:50 +0100 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2022-03-20 09:13:50 +0100 |
commit | aafaa7c3ff1f88dadbad34559adab7b35be8542b (patch) | |
tree | 11f3ff35af220dd659d42e5f87d29b5d5ca073cb /src/simple-scan-postprocessing.sh | |
parent | 055f6f3e01bb718bfdd61331e1b13b0cdba6d718 (diff) | |
parent | e01ac9786891513c1cb628ca0a0374436057cc3b (diff) |
Merge branch 'release/debian/42.0-1'debian/42.0-1
Diffstat (limited to 'src/simple-scan-postprocessing.sh')
-rwxr-xr-x | src/simple-scan-postprocessing.sh | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/src/simple-scan-postprocessing.sh b/src/simple-scan-postprocessing.sh new file mode 100755 index 0000000..39fb461 --- /dev/null +++ b/src/simple-scan-postprocessing.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-3.0-or-later +# Copyright (C) 2022 Alexander Vogt +# Author: Alexander Vogt <a.vogt@fulguritus.com> +# +# Sample postprocessing script for gnome-simple-scan for OCR in PDFs +# +# This script first identifies a suitable instance of ocrmypdf +# (https://github.com/ocrmypdf/OCRmyPDF) and then applies this as a +# postprocessing step to PDFs generated by simple-scan. +# +# Usage: +# ===== +# simple-scan-postprocessing mime-type keep-origin input-file args +# +# Currently, only mime-type "application/pdf" is supported, the script will +# exit without an error if "image/jpeg", "image/png", or "image/webp" is +# provided. Any other mime-type results in an error. +# All args are provided to ocrmypdf. +# If keep-origin is set to "true", a copy of the source file is kept. +# +# Example: +# ======= +# simple-scan-postprocessing application/pdf true scan.pdf -l eng+deu +# simple-scan-postprocessing application/pdf true scan.pdf -rcd --jbig2-lossy -l deu +# +set -e +m + +# Arguments +mime_type="$1" +keep_original="$2" +target="$3" +remainder="${@:4}" +# Globals +_ocrmypdfcontainer="jbarlow83/ocrmypdf" + +source="${target%.*}_orig.${target##*.}" + +# Helper functions +function findOcrMyPdf() { + # Determines the path of ocrmypdf in the following order: + # 1. ocrmypdf from the $PATH (local installation) + # 2. ocrmypdf through podman (if podman in $PATH) + # 3. ocrmypdf through docker (if podman in $PATH) + _ocrmypdf=$(which ocrmypdf) && return + _ocrmypdf="$(which podman) run --rm -i ${_ocrmypdfcontainer} " && return + _ocrmypdf="$(which docker) run --rm -i ${_ocrmypdfcontainer} " + if [ $? -ne 0 ]; then + echo "No suitable instance of ocrmypdf found. Please check your setup. " + exit 1 + fi +} + +case ${mime_type} in + "application/pdf") + mv "$target" "$source" # create a backup + + # Determine the version of ocrmypdf to use + findOcrMyPdf + # Execute OCR + ${_ocrmypdf} ${remainder} - - <"$source" >"$target" + ;; + "image/jpeg") + exit 0 # Nothing implemented + ;; + "image/png") + exit 0 # Nothing implemented + ;; + "image/webp") + exit 0 # Nothing implemented + ;; + *) + echo "Unsupported mime-type \"${mime_type}\"" + exit 1 + ;; +esac + +# Clean up +if [ "$keep_original" == "true" ]; then + exit 0 +else + rm "$source" +fi |