@ -46,8 +46,8 @@ def filternone(word_raw):
x = -1
leastcommon_list = []
allwords = []
scanimg = glob.glob('images-tiff/*.tiff')
hocr = glob.glob('hocr/*.html')
scanimg = sorted(glob.glob('images-tiff/*.tiff'))
hocr = sorted(glob.glob('hocr/*.html'))
maximum = 20 / len(scanimg) # this helps the script remove words in a way that is proportional to number of pages scanned
# loop through every image in scanimg folder
@ -63,8 +63,8 @@ def filternone(word_raw):
num = 0