|
|
|
@ -1,5 +1,6 @@
|
|
|
|
|
images=$(sort $(wildcard images/*.jpg))
|
|
|
|
|
# @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file
|
|
|
|
|
images-tiff=$(sort $(wildcard images-tiff/*.jpg))
|
|
|
|
|
output_ocr:=$(dir_ocr)/output.txt
|
|
|
|
|
tmpfile:= $(shell mktemp)
|
|
|
|
|
space:= $(empty) $(empty)
|
|
|
|
@ -34,9 +35,10 @@ dirs: ## create the dirs in working dir
|
|
|
|
|
@-mkdir -p images/
|
|
|
|
|
@-mkdir -p images-tiff/
|
|
|
|
|
@-mkdir -p output/
|
|
|
|
|
@-mkdir -p output/erase-replace/
|
|
|
|
|
@-mkdir -p ocr/
|
|
|
|
|
@-mkdir -p hocr/
|
|
|
|
|
@echo $(color_r)'Directories made': images/ output/
|
|
|
|
|
@echo $(color_r)'Directories made': ocr/ hocr/ images/ images-tiff/ output/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
testif:
|
|
|
|
@ -95,6 +97,11 @@ overunder: ocr/output.txt ## Alice: An interpreted language that translate simpl
|
|
|
|
|
@python3 src/overunder.py
|
|
|
|
|
.PHONY: overunder
|
|
|
|
|
|
|
|
|
|
erase: ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
|
|
|
|
|
python3 src/erase_leastcommon.py
|
|
|
|
|
|
|
|
|
|
replace: ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
|
|
|
|
|
python3 src/replace_leastcommon.py
|
|
|
|
|
|
|
|
|
|
visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer
|
|
|
|
|
@echo $(tmpfile)
|
|
|
|
|