Merge branch 'master' of git.xpub.nl:/var/www/git.xpub.nl/repos/OuNuPo-make

7 years ago · a4e60e1f52
parent cf7697fb05 5bf28905b6
commit a4e60e1f52
2 changed files with 9 additions and 5 deletions
--- a/5
+++ b/5
@ -38,7 +38,7 @@ dirs: ## create the dirs in working dir
 	@-mkdir -p output/erase-replace/
 	@-mkdir -p ocr/
 	@-mkdir -p hocr/
-	@echo $(color_r)'Directories made': ocr/ hocr/ images/ images-tiff/ output/ 
+	@echo $(color_r)'Directories made': ocr/ hocr/ images/ images-tiff/ output/


 testif:
@ -72,7 +72,7 @@ hocrs: ## hocr with tesseract and then change extension to .html

 #OUTPUT GENERATION RECIPES

-output/reading_structure/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2, weasyprint
+reading_structure: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2, weasyprint
 	mkdir -p output/reading_structure
 	cp src/reading_structure/jquery.min.js output/reading_structure
 	cp src/reading_structure/script.js output/reading_structure
@ -80,6 +80,7 @@ output/reading_structure/index.html: ocr/output.txt ## Analyzes OCR'ed text usin
 	cp src/reading_structure/print.css output/reading_structure
 	cat $< | python3 src/reading_structure/reading_structure.py
 	weasyprint -s output/reading_structure/print.css output/reading_structure/index.html output/reading_structure/poster.pdf
+	x-www-browser output/reading_structure/index.html

 output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot
 	cat $< | python3 src/textbotconversation.py $(@)
--- a/9
+++ b/9
@ -53,18 +53,21 @@ Python Libaries:
 * PocketSphinx: `sudo pip3 install PocketSphinx`
 * Speech Recognition: `sudo pip3 install SpeechRecognition`
 * TermColor: `sudo pip3 install termcolor`
-* PyAudio: `pip3 install pyaudio` 
+* PyAudio: `pip3 install pyaudio`


 ## Reading the Structure: Joca
 Description: Uses OCR'ed text as an input, labels each word for Part-of-Speech, stopwords and sentiment. Then it generates a reading interface
 where words with a specific label are hidden. Output can be saved as poster, or exported as json featuring the full data set.

-run: `make output/reading_structure/index.html`
+run: `make reading_structure`

 Specific Dependencies:
 * nltk (http://www.nltk.org/install.html)
-* nltk.tokenize.punkt, ne_chunk, pos_tag, word_tokenize, sentiment.vader (https://www.nltk.org/data.html)
+* nltk.tokenize.punkt, ne_chunk, pos_tag, word_tokenize, sentiment.vader
+*
+  nltk.download('vader_lexicon')
+ (https://www.nltk.org/data.html)
 * weasyprint (http://weasyprint.readthedocs.io/en/latest/install.html)
 * jinja2 (http://jinja.pocoo.org/docs/2.10/intro/#installation)
 * font: PT Sans (os font https://www.fontsquirrel.com/fonts/pt-serif)