OuNuPo/Makefile

images=$(sort $(wildcard images/*))
# @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file
images-tiff=$(sort $(wildcard images-tiff/*.tiff))
input-hocr=$(sort $(wildcard hocr/*))
output_ocr:=$(dir_ocr)/output.txt
tmpfile:= $(shell mktemp)
space:= $(empty) $(empty)
newline:= '\n'
listimgs:= $(subst $(space),$(newline), $(images) ) # list of the images, with one filename on each line $(subst $(delimitator),$(replacement),$(list))
OS:= $(shell uname)
# Colors: add color to output ie @echo $(color_r) output text
color_w:="\033[0;29m"
color_r:="\033[0;31m"
color_g:="\033[0;32m"
color_b:="\033[0;34m"

# HELP / SELF DOCUMENTATION
# rules where first line contains comment with 2x# (see example in clean rule)
.DEFAULT_GOAL := help # help rule as default when you run: make

.PHONY: help

help:
	@grep -E '^[a-zA-Z_-\/]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

# CLEAN
clean: ## removes output (target) files
	rm ocr/output.txt
	rm $(tmpfile)


# ADMINISTRATIVE RECIPES

dirs: ## create the dirs in working dir
	@-mkdir -p images/
	@-mkdir -p images-tiff/
	@-mkdir -p output/
	@-mkdir -p output/erase-replace/
	@-mkdir -p ocr/
	@-mkdir -p hocr/
	@echo $(color_r)'Directories made': ocr/ hocr/ images/ images-tiff/ output/


# POST-PROCESSING RECIPES

ocr/output.txt:  ## ocr with tesseract
	echo $(listimgs) > $(@D)/list.txt
	@echo $(basename $@ .txt)
	tesseract $(@D)/list.txt $(basename $@ .txt)
	python3 src/build_database.py $(@)

tiffs: ## convert images/ to images-tiff/ Depends on IM
	echo $(images)
	for i in $(images); \
	do tiff=`basename $$i .jpg`.tiff; \
	convert -density 300 $$i -colorspace RGB -type truecolor -alpha on images-tiff/$$tiff; \
	echo $$tiff; \
	done;

hocrs: ## hocr with tesseract and then change extension to .html
	for i in images-tiff/*.tiff; \
	do echo $$i; hocrfile=`basename $$i .tiff`; \
	tesseract $$i hocr/$$hocrfile hocr; \
	mv hocr/$$hocrfile.hocr hocr/$$hocrfile.html; \
	done;

#OUTPUT GENERATION RECIPES

reading_structure: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2, weasyprint
	mkdir -p output/reading_structure
	cp src/reading_structure/jquery.min.js output/reading_structure
	cp src/reading_structure/script.js output/reading_structure
	cp src/reading_structure/style.css output/reading_structure
	cat $< | python3 src/reading_structure/reading_structure.py
	weasyprint -s src/reading_structure/print-noun.css output/reading_structure/index.html output/reading_structure/poster_noun.pdf
	weasyprint -s src/reading_structure/print-adv.css output/reading_structure/index.html output/reading_structure/poster_adv.pdf
	weasyprint -s src/reading_structure/print-dppt.css output/reading_structure/index.html output/reading_structure/poster_dppt.pdf
	weasyprint -s src/reading_structure/print-stopword.css output/reading_structure/index.html output/reading_structure/poster_stopword.pdf
	weasyprint -s src/reading_structure/print-neutral.css output/reading_structure/index.html output/reading_structure/poster_neutral.pdf
	weasyprint -s src/reading_structure/print-entity.css output/reading_structure/index.html output/reading_structure/poster_named_entities.pdf
	x-www-browser output/reading_structure/index.html

output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot
	cat $< | python3 src/textbotconversation.py $(@)


output/n7.txt: ocr/output.txt ## Replaces nouns with the 7th noun that follows. Dependencies: 91k_nouns
	cat $< | python3 src/n_7.py > $(@)

carlandre: ocr/output.txt ## Alice: Creates visual poetry out of a text. Dependencies: pytest
	@python3 src/carlandre.py
.PHONY: carlandre
# cat $(@) > /dev/usb/lp0


overunder: ocr/output.txt ## Alice: An interpreted language that translate simple weaving instructions and creates a weaving pattern on text.
	@python3 src/overunder.py
.PHONY: overunder

erase: tiffs hocrs  ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
	python3 src/erase_leastcommon.py
	rm $(input-hocr)
	rm $(images-tiff)

replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
	python3 src/replace_leastcommon.py
	rm $(input-hocr)
	rm $(images-tiff)

ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
	bash src/ttssr-loop-human-only.sh ocr/output.txt

chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk
	python3 src/chatbook.py

oulibot: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk
	python3 src/oulibot.py
fixed RGBA bug in replace script 7 years ago			`images=$(sort $(wildcard images/*))`
added hocrs 7 years ago			`# @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file`
small README + Makefile file changes to dependencies 7 years ago			`images-tiff=$(sort $(wildcard images-tiff/*.tiff))`
updated erase & replace rule with automatic remove of hocr files 7 years ago			`input-hocr=$(sort $(wildcard hocr/*))`
debbuged errors from makefile; Tieded a bit, but there still is a a todo list to be done 7 years ago			`output_ocr:=$(dir_ocr)/output.txt`
Added visualization; and color variables 7 years ago			`tmpfile:= $(shell mktemp)`
Moved output list.txt,plain.txt from src/ to output/. src/. Otherwise git will want to track these outputs, when it should only be tracking the tools :) Added some color vars 7 years ago			`space:= $(empty) $(empty)`
			`newline:= '\n'`
added desc in makefile 7 years ago			`listimgs:= $(subst $(space),$(newline), $(images) ) # list of the images, with one filename on each line $(subst $(delimitator),$(replacement),$(list))`
Update for Darwin 7 years ago			`OS:= $(shell uname)`
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`# Colors: add color to output ie @echo $(color_r) output text`
			`color_w:="\033[0;29m"`
debbuged errors from makefile; Tieded a bit, but there still is a a todo list to be done 7 years ago			`color_r:="\033[0;31m"`
			`color_g:="\033[0;32m"`
			`color_b:="\033[0;34m"`

Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`# HELP / SELF DOCUMENTATION`
			`# rules where first line contains comment with 2x# (see example in clean rule)`
			`.DEFAULT_GOAL := help # help rule as default when you run: make`
debbuged errors from makefile; Tieded a bit, but there still is a a todo list to be done 7 years ago
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`.PHONY: help`
debbuged errors from makefile; Tieded a bit, but there still is a a todo list to be done 7 years ago
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`help:`
			`@grep -E '^[a-zA-Z_-\/]+:.?## .$$' $(MAKEFILE_LIST) \| sort \| awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'`
Moved output list.txt,plain.txt from src/ to output/. src/. Otherwise git will want to track these outputs, when it should only be tracking the tools :) Added some color vars 7 years ago
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`# CLEAN`
			`clean: ## removes output (target) files`
added chatbook and resources 7 years ago			`rm ocr/output.txt`
Added visualization; and color variables 7 years ago			`rm $(tmpfile)`
Moved output list.txt,plain.txt from src/ to output/. src/. Otherwise git will want to track these outputs, when it should only be tracking the tools :) Added some color vars 7 years ago
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago
			`# ADMINISTRATIVE RECIPES`

			`dirs: ## create the dirs in working dir`
added desc in makefile 7 years ago			`@-mkdir -p images/`
mkdir images-tiffs 7 years ago			`@-mkdir -p images-tiff/`
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`@-mkdir -p output/`
added my scripts to makefile 7 years ago			`@-mkdir -p output/erase-replace/`
made changes to ttstt at makefile 7 years ago			`@-mkdir -p ocr/`
added hocrs 7 years ago			`@-mkdir -p hocr/`
Updated make command 7 years ago			`@echo $(color_r)'Directories made': ocr/ hocr/ images/ images-tiff/ output/`
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago

			`# POST-PROCESSING RECIPES`
Moved output list.txt,plain.txt from src/ to output/. src/. Otherwise git will want to track these outputs, when it should only be tracking the tools :) Added some color vars 7 years ago
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`ocr/output.txt: ## ocr with tesseract`
			`echo $(listimgs) > $(@D)/list.txt`
			`@echo $(basename $@ .txt)`
			`tesseract $(@D)/list.txt $(basename $@ .txt)`
added chatbook and resources 7 years ago			`python3 src/build_database.py $(@)`
debbuged errors from makefile; Tieded a bit, but there still is a a todo list to be done 7 years ago
tiffs 7 years ago			`tiffs: ## convert images/ to images-tiff/ Depends on IM`
			`echo $(images)`
			`for i in $(images); \`
			do tiff=`basename $$i .jpg`.tiff; \
fixed RGBA bug in replace script 7 years ago			`convert -density 300 $$i -colorspace RGB -type truecolor -alpha on images-tiff/$$tiff; \`
tiffs 7 years ago			`echo $$tiff; \`
			`done;`
working on cleaning 7 years ago
added hocrs 7 years ago			`hocrs: ## hocr with tesseract and then change extension to .html`
			`for i in images-tiff/*.tiff; \`
			do echo $$i; hocrfile=`basename $$i .tiff`; \
			`tesseract $$i hocr/$$hocrfile hocr; \`
			`mv hocr/$$hocrfile.hocr hocr/$$hocrfile.html; \`
resolved 7 years ago			`done;`
Moved output list.txt,plain.txt from src/ to output/. src/. Otherwise git will want to track these outputs, when it should only be tracking the tools :) Added some color vars 7 years ago
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`#OUTPUT GENERATION RECIPES`
Moved output list.txt,plain.txt from src/ to output/. src/. Otherwise git will want to track these outputs, when it should only be tracking the tools :) Added some color vars 7 years ago
Updated make command 7 years ago			`reading_structure: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2, weasyprint`
Changed name of Wordtagger to reading_structure. Updated script to newest version. Updated makefile. WARNING: Weasyprint is an extra dependency. 7 years ago			`mkdir -p output/reading_structure`
			`cp src/reading_structure/jquery.min.js output/reading_structure`
			`cp src/reading_structure/script.js output/reading_structure`
			`cp src/reading_structure/style.css output/reading_structure`
			`cat $< \| python3 src/reading_structure/reading_structure.py`
Changed css for pint 7 years ago			`weasyprint -s src/reading_structure/print-noun.css output/reading_structure/index.html output/reading_structure/poster_noun.pdf`
			`weasyprint -s src/reading_structure/print-adv.css output/reading_structure/index.html output/reading_structure/poster_adv.pdf`
			`weasyprint -s src/reading_structure/print-dppt.css output/reading_structure/index.html output/reading_structure/poster_dppt.pdf`
			`weasyprint -s src/reading_structure/print-stopword.css output/reading_structure/index.html output/reading_structure/poster_stopword.pdf`
			`weasyprint -s src/reading_structure/print-neutral.css output/reading_structure/index.html output/reading_structure/poster_neutral.pdf`
			`weasyprint -s src/reading_structure/print-entity.css output/reading_structure/index.html output/reading_structure/poster_named_entities.pdf`
Updated make command 7 years ago			`x-www-browser output/reading_structure/index.html`
working on cleaning 7 years ago
added ocr to dirs 7 years ago			`output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot`
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`cat $< \| python3 src/textbotconversation.py $(@)`

Added my wordtagger script. Use make wordtagger to execute it 7 years ago
Added the description of my script 7 years ago			`output/n7.txt: ocr/output.txt ## Replaces nouns with the 7th noun that follows. Dependencies: 91k_nouns`
Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README 7 years ago			`cat $< \| python3 src/n_7.py > $(@)`
debbuged errors from makefile; Tieded a bit, but there still is a a todo list to be done 7 years ago
Edited my makefile recipes 7 years ago			`carlandre: ocr/output.txt ## Alice: Creates visual poetry out of a text. Dependencies: pytest`
			`@python3 src/carlandre.py`
			`.PHONY: carlandre`
Added new recipe for overunder.py 7 years ago			`# cat $(@) > /dev/usb/lp0`

small README + Makefile file changes to dependencies 7 years ago
Edited my makefile recipes 7 years ago			`overunder: ocr/output.txt ## Alice: An interpreted language that translate simple weaving instructions and creates a weaving pattern on text.`
			`@python3 src/overunder.py`
			`.PHONY: overunder`
Added carlandre.py 7 years ago
updated erase & replace rule with automatic remove of hocr files 7 years ago			`erase: tiffs hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF`
added my scripts to makefile 7 years ago			`python3 src/erase_leastcommon.py`
updated erase & replace rule with automatic remove of hocr files 7 years ago			`rm $(input-hocr)`
fixed RGBA bug in replace script 7 years ago			`rm $(images-tiff)`
added my scripts to makefile 7 years ago
updated erase & replace rule with automatic remove of hocr files 7 years ago			`replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF`
added my scripts to makefile 7 years ago			`python3 src/replace_leastcommon.py`
updated erase & replace rule with automatic remove of hocr files 7 years ago			`rm $(input-hocr)`
fixed RGBA bug in replace script 7 years ago			`rm $(images-tiff)`
chatbot update 7 years ago
Added ttssr-human-only 7 years ago			`ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx`
Changes in ttssr makefile 7 years ago			`bash src/ttssr-loop-human-only.sh ocr/output.txt`
added chatbook and resources 7 years ago
edited the chatbook to prevent loop 7 years ago			`chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk`
added chatbook and resources 7 years ago			`python3 src/chatbook.py`
added oulibot 7 years ago
			`oulibot: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk`
			`python3 src/oulibot.py`