@ -60,6 +61,13 @@ tiffs: ## convert images/ to images-tiff/ Depends on IM
echo$$tiff;\
done;
hocrs:## hocr with tesseract and then change extension to .html
for i in images-tiff/*.tiff;\
doecho$$i;hocrfile=`basename $$i .tiff`;\
tesseract $$i hocr/$$hocrfile hocr;\
mv hocr/$$hocrfile.hocr hocr/$$hocrfile.html;\
done;
#OUTPUT GENERATION RECIPES
output/wordtagger/index.html:ocr/output.txt## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2