Merge branch 'master' of git.xpub.nl:/var/www/git.xpub.nl/repos/OuNuPo-make

master
Natasha Berting 7 years ago
commit f3b3ac5a26

@ -39,7 +39,7 @@ dirs: ## create the dirs in working dir
@-mkdir -p output/erase-replace/ @-mkdir -p output/erase-replace/
@-mkdir -p ocr/ @-mkdir -p ocr/
@-mkdir -p hocr/ @-mkdir -p hocr/
@echo $(color_r)'Directories made': ocr/ hocr/ images/ images-tiff/ output/ @echo $(color_r)'Directories made': ocr/ hocr/ images/ images-tiff/ output/
testif: testif:
@ -73,7 +73,7 @@ hocrs: ## hocr with tesseract and then change extension to .html
#OUTPUT GENERATION RECIPES #OUTPUT GENERATION RECIPES
output/reading_structure/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2, weasyprint reading_structure: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2, weasyprint
mkdir -p output/reading_structure mkdir -p output/reading_structure
cp src/reading_structure/jquery.min.js output/reading_structure cp src/reading_structure/jquery.min.js output/reading_structure
cp src/reading_structure/script.js output/reading_structure cp src/reading_structure/script.js output/reading_structure
@ -81,6 +81,7 @@ output/reading_structure/index.html: ocr/output.txt ## Analyzes OCR'ed text usin
cp src/reading_structure/print.css output/reading_structure cp src/reading_structure/print.css output/reading_structure
cat $< | python3 src/reading_structure/reading_structure.py cat $< | python3 src/reading_structure/reading_structure.py
weasyprint -s output/reading_structure/print.css output/reading_structure/index.html output/reading_structure/poster.pdf weasyprint -s output/reading_structure/print.css output/reading_structure/index.html output/reading_structure/poster.pdf
x-www-browser output/reading_structure/index.html
output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dependencies: python3's chatterbot
cat $< | python3 src/textbotconversation.py $(@) cat $< | python3 src/textbotconversation.py $(@)
@ -120,7 +121,7 @@ endif
ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
bash src/ttssr/ttssr-loop-human-only.sh ocr/output.txt bash src/ttssr-loop-human-only.sh ocr/output.txt
chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk
python3 src/chatbook.py python3 src/chatbook.py

@ -47,12 +47,28 @@ Description: Speech recognition feedback loops using the first sentence of a sca
run: `make ttssr-human-only` run: `make ttssr-human-only`
Specific Dependencies: Specific Dependencies:
* [pocketsphinx](https://github.com/bambocher/pocketsphinx-python) `sudo pip3 install pocketsphinx` ---> FOLLOW THIS EXAMPLE
* SpeechRecognition 3.8.1
* PyAudio
* PocketSphinx pacakge `sudo aptitude install pocketsphinx pocketsphinx-en-us`
Python Libaries:
* PocketSphinx: `sudo pip3 install PocketSphinx`
* Speech Recognition: `sudo pip3 install SpeechRecognition`
* TermColor: `sudo pip3 install termcolor`
* PyAudio: `pip3 install pyaudio`
## Reading the Structure: Joca
Description: Uses OCR'ed text as an input, labels each word for Part-of-Speech, stopwords and sentiment. Then it generates a reading interface
where words with a specific label are hidden. Output can be saved as poster, or exported as json featuring the full data set.
run: `make reading_structure`
Specific Dependencies:
* nltk (http://www.nltk.org/install.html)
* nltk.tokenize.punkt, ne_chunk, pos_tag, word_tokenize, sentiment.vader
*
nltk.download('vader_lexicon')
(https://www.nltk.org/data.html)
* weasyprint (http://weasyprint.readthedocs.io/en/latest/install.html)
* jinja2 (http://jinja.pocoo.org/docs/2.10/intro/#installation)
* font: PT Sans (os font https://www.fontsquirrel.com/fonts/pt-serif)
* font: Ubuntu Mono (os font https://www.fontsquirrel.com/fonts/ubuntu-mono)

@ -2,7 +2,6 @@ import pytest
from math import ceil from math import ceil
import sys import sys
from sys import stdout from sys import stdout
import time
import os.path import os.path

@ -36,6 +36,7 @@ def eval(cmds):
global text global text
global line_number global line_number
global last_index global last_index
global pattern
for cmd in cmds: for cmd in cmds:
if cmd == []: if cmd == []:
@ -75,6 +76,11 @@ def eval(cmds):
pattern = text[0:line_number + 1] pattern = text[0:line_number + 1]
print('\n'.join(pattern)) print('\n'.join(pattern))
elif cmd[0] == 'save':
pattern_file = open('output/patternfile.txt', 'w')
pattern_file.write('\n'.join(pattern))
pattern_file.close()
print('Your pattern has been saved in the output folder.')
elif cmd[0] == 'quit': elif cmd[0] == 'quit':
print('Come back soon!') print('Come back soon!')

@ -5,10 +5,10 @@ head -n 1 $1 > output/input0.txt
while [[ $i -le 10 ]] while [[ $i -le 10 ]]
do echo $i do echo $i
cat output/input$i.txt cat output/input$i.txt
python3 src/ttssr/write_audio.py src/sound$i.wav 2> /dev/null python3 src/write_audio.py src/sound$i.wav 2> /dev/null
play src/sound$i.wav repeat 5 2> /dev/null & #in the background the sound, without it all the sounds play one by one//2 is stderr play src/sound$i.wav repeat 5 2> /dev/null & #in the background the sound, without it all the sounds play one by one//2 is stderr
python3 src/ttssr/audio_transcribe.py sound$i.wav > output/input$((i+1)).txt 2> /dev/null python3 src/audio_transcribe.py sound$i.wav > output/input$((i+1)).txt 2> /dev/null
sleep sleep 1
(( i++ )) (( i++ ))
done done
today=$(date +%Y%m%d.%H-%M); today=$(date +%Y%m%d.%H-%M);
Loading…
Cancel
Save