diff --git a/src/erase_leastcommon.py b/src/erase_leastcommon.py index bef06d9..a3f6f87 100644 --- a/src/erase_leastcommon.py +++ b/src/erase_leastcommon.py @@ -46,8 +46,8 @@ def filternone(word_raw): x = -1 leastcommon_list = [] allwords = [] -scanimg = glob.glob('images-tiff/*.tiff') -hocr = glob.glob('hocr/*.html') +scanimg = sorted(glob.glob('images-tiff/*.tiff')) +hocr = sorted(glob.glob('hocr/*.html')) maximum = 20 / len(scanimg) # this helps the script remove words in a way that is proportional to number of pages scanned # loop through every image in scanimg folder diff --git a/src/replace_leastcommon.py b/src/replace_leastcommon.py index faa697b..cf15de0 100644 --- a/src/replace_leastcommon.py +++ b/src/replace_leastcommon.py @@ -63,8 +63,8 @@ def filternone(word_raw): x = -1 leastcommon_list = [] allwords = [] -scanimg = glob.glob('images-tiff/*.tiff') -hocr = glob.glob('hocr/*.html') +scanimg = sorted(glob.glob('images-tiff/*.tiff')) +hocr = sorted(glob.glob('hocr/*.html')) num = 0 maximum = 20 / len(scanimg) # this helps the script remove words in a way that is proportional to number of pages scanned