From 9e1f618b6887ff658c7c9b61b7024f0222b9ce3e Mon Sep 17 00:00:00 2001 From: nberting Date: Mon, 26 Mar 2018 12:38:51 +0200 Subject: [PATCH] updated erase & replace python script with sort function --- src/erase_leastcommon.py | 4 ++-- src/replace_leastcommon.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/erase_leastcommon.py b/src/erase_leastcommon.py index bef06d9..a3f6f87 100644 --- a/src/erase_leastcommon.py +++ b/src/erase_leastcommon.py @@ -46,8 +46,8 @@ def filternone(word_raw): x = -1 leastcommon_list = [] allwords = [] -scanimg = glob.glob('images-tiff/*.tiff') -hocr = glob.glob('hocr/*.html') +scanimg = sorted(glob.glob('images-tiff/*.tiff')) +hocr = sorted(glob.glob('hocr/*.html')) maximum = 20 / len(scanimg) # this helps the script remove words in a way that is proportional to number of pages scanned # loop through every image in scanimg folder diff --git a/src/replace_leastcommon.py b/src/replace_leastcommon.py index faa697b..cf15de0 100644 --- a/src/replace_leastcommon.py +++ b/src/replace_leastcommon.py @@ -63,8 +63,8 @@ def filternone(word_raw): x = -1 leastcommon_list = [] allwords = [] -scanimg = glob.glob('images-tiff/*.tiff') -hocr = glob.glob('hocr/*.html') +scanimg = sorted(glob.glob('images-tiff/*.tiff')) +hocr = sorted(glob.glob('hocr/*.html')) num = 0 maximum = 20 / len(scanimg) # this helps the script remove words in a way that is proportional to number of pages scanned