images=$(sort $(wildcard images/*.jpg)) # @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file output_ocr:=$(dir_ocr)/output.txt tmpfile:= $(shell mktemp) space:= $(empty) $(empty) newline:= '\n' listimgs:= $(subst $(space),$(newline), $(images) ) # list of the images, with one filename on each line $(subst $(delimitator),$(replacement),$(list)) OS:= $(shell uname) # Colors: add color to output ie @echo $(color_r) output text color_w:="\033[0;29m" color_r:="\033[0;31m" color_g:="\033[0;32m" color_b:="\033[0;34m" # HELP / SELF DOCUMENTATION # rules where first line contains comment with 2x# (see example in clean rule) .DEFAULT_GOAL := help # help rule as default when you run: make .PHONY: help help: @grep -E '^[a-zA-Z_-\/]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' # CLEAN clean: ## removes output (target) files rm ocr/output.txt rm $(wildcard output/*) rm $(tmpfile) # ADMINISTRATIVE RECIPES dirs: ## create the dirs in working dir @-mkdir -p images/ @-mkdir -p images-tiff/ @-mkdir -p output/ @-mkdir -p ocr/ @-mkdir -p hocr/ @echo $(color_r)'Directories made': images/ output/ testif: ifeq ($(OS),Darwin) @echo $(OS) endif # POST-PROCESSING RECIPES ocr/output.txt: ## ocr with tesseract echo $(listimgs) > $(@D)/list.txt @echo $(basename $@ .txt) tesseract $(@D)/list.txt $(basename $@ .txt) python3 src/build_database.py $(@) tiffs: ## convert images/ to images-tiff/ Depends on IM echo $(images) for i in $(images); \ do tiff=`basename $$i .jpg`.tiff; \ convert $$i -density 300 -alpha on images-tiff/$$tiff; \ echo $$tiff; \ done; hocrs: ## hocr with tesseract and then change extension to .html for i in images-tiff/*.tiff; \ do echo $$i; hocrfile=`basename $$i .tiff`; \ tesseract $$i hocr/$$hocrfile hocr; \ mv hocr/$$hocrfile.hocr hocr/$$hocrfile.html; \ done; #OUTPUT GENERATION RECIPES output/wordtagger/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2 mkdir -p output/wordtagger cp src/wordtagger/jquery.min.js output/wordtagger cp src/wordtagger/script.js output/wordtagger cp src/wordtagger/style.css output/wordtagger cat $< | python3 src/wordtagger/wordtagger.py # install nltk's 'averaged_perceptron_tagger': # $ python 3 # >>> import nltk # >>> nltk.download('averaged_perceptron_tagger') output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot cat $< | python3 src/textbotconversation.py $(@) output/n7.txt: ocr/output.txt ## Replaces nouns with the 7th noun that follows. Dependencies: 91k_nouns cat $< | python3 src/n_7.py > $(@) output/carlandre.txt: ocr/output.txt ## Alice: Creates visual poetry out of a text. Dependencies: pytest cat $< | python3 src/carlandre.py > $(@) # cat $(@) > /dev/usb/lp0 output/overunder: ocr/output.txt ## Alice: An interpreted language that translate simple weaving instructions and creates a weaving pattern on text. python3 src/overunder.py visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer @echo $(tmpfile) for i in $(images); do \ cat $$i >> $(tmpfile); \ done; ifeq ($(OS),Darwin) cat $(tmpfile) | mplayer -sws 4 -zoom -vf dsize=720:720 -demuxer rawvideo -rawvideo w=56:h=64:i420:fps=25 -; else cat $(tmpfile) | mplayer -vo x11 -sws 4 -zoom -vf dsize=720:720 -demuxer rawvideo -rawvideo w=50:h=50:i420:fps=25 -; endif tts: output/chatbot.txt ocr/output.txt ## text to speech. Dependencies: espeak @echo $(color_r) speaking $? @echo $(color_w) cat $? | espeak ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx bash src/ttssr-loop-human-only.sh ocr/output.txt chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk python3 src/chatbook.py