|
|
|
@ -1,6 +1,7 @@
|
|
|
|
|
images=$(sort $(wildcard images/*.jpg))
|
|
|
|
|
# @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file
|
|
|
|
|
images-tiff=$(sort $(wildcard images-tiff/*.tiff))
|
|
|
|
|
input-hocr=$(sort $(wildcard hocr/*))
|
|
|
|
|
output_ocr:=$(dir_ocr)/output.txt
|
|
|
|
|
tmpfile:= $(shell mktemp)
|
|
|
|
|
space:= $(empty) $(empty)
|
|
|
|
@ -98,11 +99,13 @@ overunder: ocr/output.txt ## Alice: An interpreted language that translate simpl
|
|
|
|
|
@python3 src/overunder.py
|
|
|
|
|
.PHONY: overunder
|
|
|
|
|
|
|
|
|
|
erase:hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
|
|
|
|
|
erase: tiffs hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
|
|
|
|
|
python3 src/erase_leastcommon.py
|
|
|
|
|
rm $(input-hocr)
|
|
|
|
|
|
|
|
|
|
replace:hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
|
|
|
|
|
replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
|
|
|
|
|
python3 src/replace_leastcommon.py
|
|
|
|
|
rm $(input-hocr)
|
|
|
|
|
|
|
|
|
|
visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer
|
|
|
|
|
@echo $(tmpfile)
|
|
|
|
|