From b490da671a6c2db5422bf30d577541efe0b391fa Mon Sep 17 00:00:00 2001 From: Castro0o Date: Mon, 26 Feb 2018 16:24:29 +0100 Subject: [PATCH] Clarifying target names to avoid doubling tasks. Self-documentation in Makefile. Extensive documentation in README --- Makefile | 85 +++++++++++++++++----------------- README | 94 ++++++++++++++++++++++++++++++++++---- src/textbotconversation.py | 4 +- 3 files changed, 130 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index 66192f9..5dfb1dd 100644 --- a/Makefile +++ b/Makefile @@ -1,69 +1,74 @@ -# TO DO # -# * If ocr (tesseract rule) has been performed do repeate it -# * document depencies -# * remove tmp files - -dir_ocr:="ocr" -images=$(wildcard images/*.jpg) +images=$(sort $(wildcard images/*.jpg)) output_ocr:=$(dir_ocr)/output.txt tmpfile:= $(shell mktemp) space:= $(empty) $(empty) newline:= '\n' -listimgs:= $(subst $(space),$(newline),$(images)) # list of the images, with one filename on each line $(subst $(delimitator),$(replacement),$(list)) +listimgs:= $(subst $(space),$(newline), $(images) ) # list of the images, with one filename on each line $(subst $(delimitator),$(replacement),$(list)) OS:= $(shell uname) - -color_w:="\033[0;29m" # Colors +# Colors: add color to output ie @echo $(color_r) output text +color_w:="\033[0;29m" color_r:="\033[0;31m" color_g:="\033[0;32m" color_b:="\033[0;34m" -# add color to output ie @echo $(color_r) something - +# HELP / SELF DOCUMENTATION +# rules where first line contains comment with 2x# (see example in clean rule) +.DEFAULT_GOAL := help # help rule as default when you run: make -##### ADMINISTRATIVE RECIPES +.PHONY: help -dirs: # create the directories for the working structures - @mkdir -p images # scanned image dir - @mkdir -p output # outputs dir - @echo $(color_r)'Directories made' +help: + @grep -E '^[a-zA-Z_-\/]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' -rmtmp: +# CLEAN +clean: ## removes output (target) files + rm ocr/output.txt + rm $(wildcard output/*) rm $(tmpfile) + +# ADMINISTRATIVE RECIPES + +dirs: ## create the dirs in working dir + @-mkdir -p images/ + @-mkdir -p output/ + @echo $(color_r)'Directories made': images/ output/ + + + testif: ifeq ($(OS),Darwin) @echo $(OS) endif -##### POST-PROCESSING RECIPES +# POST-PROCESSING RECIPES -tesseract: - echo $(listimgs) > ocr/list.txt - tesseract ocr/list.txt $(basename $(output_ocr)) +ocr/output.txt: ## ocr with tesseract + echo $(listimgs) > $(@D)/list.txt + @echo $(basename $@ .txt) + tesseract $(@D)/list.txt $(basename $@ .txt) -##### OUTPUT GENERATION RECIPES -myscript: tesseract - cat $(output_ocr) | python3 src/myscript.py > output/a-new-file.txt +#OUTPUT GENERATION RECIPES -wordtagger: tesseract - cat $(output_ocr) | python3 src/wordtagger.py > output/tagged-words.txt -# DEPENDENCY: nltk, nltk: 'averaged_perceptron_tagger' +output/tagged-words.txt: ocr/output.txt ## DESCRIBE WHAT IT DOES. Dependecies: python3's nltk, nltk's averaged_perceptron_tagger + cat $< | python3 src/wordtagger.py > $(@) +# install nltk's 'averaged_perceptron_tagger': # $ python 3 # >>> import nltk # >>> nltk.download('averaged_perceptron_tagger') -talktochatbot: tesseract - cat $(output_ocr) | python3 src/textbotconversation.py -# depency: chatterbot +output/chatbot.txt: ocr/output.txt ## DESCRIBE WHAT IT DOES. Dependecies: python3's chatterbot + cat $< | python3 src/textbotconversation.py $(@) + -n+7: tesseract - cat $(output_ocr) | python3 src/n_7.py > output/n7.txt +output/n7.txt: ocr/output.txt ## DESCRIBE WHAT IT DOES. Dependecies: python3's chatterbot + cat $< | python3 src/n_7.py > $(@) -visualization: $(images) $(tmpfile) #requires mplayer +visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer @echo $(tmpfile) for i in $(images); do \ cat $$i >> $(tmpfile); \ @@ -74,12 +79,8 @@ else cat $(tmpfile) | mplayer -vo x11 -sws 4 -zoom -vf dsize=720:720 -demuxer rawvideo -rawvideo w=50:h=50:i420:fps=25 -; endif -tts: # Text-to-speech - cat $(output_ocr) | espeak - - - -# ** Makefile Syntax notes ** -# @ preceding command tells make not to print the command being executed -# +tts: output/chatbot.txt ocr/output.txt ## text to speech. Dependencies: espeak + @echo $(color_r) speaking $? + @echo $(color_w) + cat $? | espeak diff --git a/README b/README index 1a6bfd5..a348d9c 100644 --- a/README +++ b/README @@ -1,11 +1,87 @@ -*Tools for working with scanned pages* - -Makefile: -* `make dirs`: create the working folder structure -* `make tessaract` **dependency: tesseract** -* `make myscript` -* `make visualization`: **dependency: mplayer** creates visualization of images/ dir, by cating the images content into mplayer. See more option ins [shiftop](https://git.bleu255.com/shiftop/file/shiftop.html) -* `make wordtagger`: **dependency: tesseract** Uses scanned pages as an input, tags each word for their wordtype (noun, verb etc) and saves it in a text file. -* `make talktochatbot`: **dependency: ChatterBot** talkes with the ocred file +# Tools for scanned pages + +Get help on the different makefile targets by running: +`make` + +# Makefile Documentation + +## SYNTAX: AUTOMATIC VARIABLES +https://www.gnu.org/software/make/manual/html_node/Automatic-Variables.html + +* $@ file name of the target of the rule. +* $< name of the first prerequisite. +* $? names of all the prerequisites that are newer than the target, with spaces between . +* $^ names of all the prerequisites, +* $(@D) directory part of the file name of the target, + +## SYNTAX: PRINTING / ERRORS +@ preceeding command tells make not to print the command being executed +- preceeding a command tells make to ignore errors in a recipe line + + +## DEPENDECIES AND RULES +a rule "asks" a *dependency* to be executed, only if the depency does not exist as a file +i.e. I have the 2 following rules in my make file: + +``` +list.txt: + ls . -1 > $@ + +tts: list.txt + cat $< | espeak +``` +when i run: `make tts` +tts rule will execute its dependency list.txt IF the list.xt does not exist in the top level of working directory. ELSE it will execute the list.txt + + +## TARGET NAMES +**use as rules' targets (first line of a rule,left of :) the name of resulting file(s)** + +One main problem in the OuNuPo makefile is the execution of the tesseract rule, every time another rule requests it (in make lingo has it as a dependency). This a duplication of the same process (ocr), which takes quite some time, and hence we want to avoid repeting, if the scanned images haven't changed. +Make has a very simple way of **avoiding this duplication of a process/rule**. +It is done **carefully defining the rules' target** - the name given to the rule, by which we invoke in order to execute that rule. +By having **the target of a rule take the name of the file(s), which will result from to the rule's execution**, the Makefile, will **check if that file is "out of date"**. +"A target is out of date if it does not exist or if it is older than any of the prerequisites " + +If the target/resulting file(s) are **not present**, the dependency rule will be executed +If the target/resulting file(s) are **not present**, the dependency rule will NOT be executed + +what follows is a simple example +``` +foo.txt: + echo "this is a test" >> foo.txt + @echo "$@ was made" + +art: foo.txt + cat foo.txt | figlet + @echo "$@ was made" +``` +For the first time you run `make art` the foo.txt dependency rule will be executed +In subsequent runs of `make art` foo.txt dependency rule will NOT be executed, because its target: foo.txt is already in the make working directory +To trigger the execution of foo.txt ruke, we need to remove its target from the working directoy +That task if often delegate to a rule with target `clean` which removes the files/targets of make, such as + +``` +clean: removes output (target) files + rm ocr/output.txt + rm $(wildcard output/*) + rm $(tmpfile) +``` + +After running `make clean` the foo.txt rule is executed (as a dependancy) when running `make art` + +Tagets can also include subfolders: + +``` +output/art.txt: foo.txt + cat foo.txt | figlet > $@ + @echo "$@ was made" +``` +which can be invoked by `make output/art.txt` + +Read more on https://www.gnu.org/software/make/manual/html_node/Rule-Syntax.html#Rule-Syntax + +# LINKS +* About Makefile syntax: [5 Writing Recipes in Rules](https://www.gnu.org/software/make/manual/make.html#Recipes) diff --git a/src/textbotconversation.py b/src/textbotconversation.py index 02d402c..71a92e4 100644 --- a/src/textbotconversation.py +++ b/src/textbotconversation.py @@ -1,6 +1,6 @@ from chatterbot import ChatBot -from sys import stdin, stderr, stdout +from sys import stdin, stderr, stdout, argv import nltk.data @@ -27,7 +27,7 @@ for sen in sentences: ns.append(response.text) -file = open('output/whatdoesthechatbotsay.txt','w') +file = open(argv[1],'w') file.write("\n".join(ns))