images=$(sort $(wildcard images/*.jpg))
# @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file
output_ocr:=$(dir_ocr)/output.txt
tmpfile:= $(shell mktemp)
space:= $(empty) $(empty)
newline:= '\n'
listimgs:= $(subst $(space),$(newline), $(images) ) # list of the images, with one filename on each line $(subst $(delimitator),$(replacement),$(list))
OS:= $(shell uname)
# Colors: add color to output ie @echo $(color_r) output text
color_w:="\033[0;29m"
color_r:="\033[0;31m"
color_g:="\033[0;32m"
color_b:="\033[0;34m"

# HELP / SELF DOCUMENTATION
# rules where first line contains comment with 2x# (see example in clean rule)
.DEFAULT_GOAL := help # help rule as default when you run: make

.PHONY: help

help:
	@grep -E '^[a-zA-Z_-\/]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

# CLEAN
clean: ## removes output (target) files
	rm ocr/output.txt
	rm $(wildcard output/*)
	rm $(tmpfile)


# ADMINISTRATIVE RECIPES

dirs: ## create the dirs in working dir
	@-mkdir -p images/
	@-mkdir -p images-tiff/
	@-mkdir -p output/
	@-mkdir -p ocr/
	@-mkdir -p hocr/
	@echo $(color_r)'Directories made': images/ output/


testif:
ifeq ($(OS),Darwin)
	@echo $(OS)
endif


# POST-PROCESSING RECIPES

ocr/output.txt:  ## ocr with tesseract
	echo $(listimgs) > $(@D)/list.txt
	@echo $(basename $@ .txt)
	tesseract $(@D)/list.txt $(basename $@ .txt)
	python3 src/build_database.py $(@)

tiffs: ## convert images/ to images-tiff/ Depends on IM
	echo $(images)
	for i in $(images); \
	do tiff=`basename $$i .jpg`.tiff; \
	convert -density 300 $$i images-tiff/$$tiff; \
	echo $$tiff; \
	done;

hocrs: ## hocr with tesseract and then change extension to .html
	for i in images-tiff/*.tiff; \
	do echo $$i; hocrfile=`basename $$i .tiff`; \
	tesseract $$i hocr/$$hocrfile hocr; \
	mv hocr/$$hocrfile.hocr hocr/$$hocrfile.html; \
	done;

#OUTPUT GENERATION RECIPES

output/wordtagger/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2
	mkdir -p output/wordtagger
	cp src/wordtagger/jquery.min.js output/wordtagger
	cp src/wordtagger/script.js output/wordtagger
	cp src/wordtagger/style.css output/wordtagger
	cat $< | python3 src/wordtagger/wordtagger.py
#  install nltk's 'averaged_perceptron_tagger':
#  $ python 3
#  >>> import nltk
#  >>> nltk.download('averaged_perceptron_tagger')

output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot
	cat $< | python3 src/textbotconversation.py $(@)


output/n7.txt: ocr/output.txt ## Replaces nouns with the 7th noun that follows. Dependencies: 91k_nouns
	cat $< | python3 src/n_7.py > $(@)

carlandre: ocr/output.txt ## Alice: Creates visual poetry out of a text. Dependencies: pytest
	@python3 src/carlandre.py
.PHONY: carlandre
# cat $(@) > /dev/usb/lp0

overunder: ocr/output.txt ## Alice: An interpreted language that translate simple weaving instructions and creates a weaving pattern on text.
	@python3 src/overunder.py
.PHONY: overunder


visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer
	@echo $(tmpfile)
	for i in $(images); do \
		cat $$i >> $(tmpfile); \
	done;
ifeq ($(OS),Darwin)
	cat $(tmpfile) | mplayer -sws 4 -zoom -vf dsize=720:720 -demuxer rawvideo -rawvideo w=56:h=64:i420:fps=25 -;
else
	cat $(tmpfile) | mplayer -vo x11 -sws 4 -zoom -vf dsize=720:720 -demuxer rawvideo -rawvideo w=50:h=50:i420:fps=25 -;
endif


tts: output/chatbot.txt ocr/output.txt    ## text to speech. Dependencies: espeak
	@echo $(color_r) speaking $?
	@echo $(color_w)
	cat $? | espeak


ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
	bash src/ttssr-loop-human-only.sh ocr/output.txt

chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk
	python3 src/chatbook.py