Merge branch 'master' of git.xpub.nl:/var/www/git.xpub.nl/repos/OuNuPo-make

7 years ago · cc9c5b39ac
parent 0b520856e4 3ffebdc81c
commit cc9c5b39ac
12 changed files with 74 additions and 48 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 images/**
 output/**
 src/index.json
-.DS_Store
+.DS_Store
+src/**.wav
--- a/14
+++ b/14
@ -1,6 +1,7 @@
-images=$(sort $(wildcard images/*.jpg))
+images=$(sort $(wildcard images/*))
 # @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file
 images-tiff=$(sort $(wildcard images-tiff/*.tiff))
+input-hocr=$(sort $(wildcard hocr/*))
 output_ocr:=$(dir_ocr)/output.txt
 tmpfile:= $(shell mktemp)
 space:= $(empty) $(empty)
@ -25,7 +26,6 @@ help:
 # CLEAN
 clean: ## removes output (target) files
 	rm ocr/output.txt
-	rm $(wildcard output/*)
 	rm $(tmpfile)


@ -59,7 +59,7 @@ tiffs: ## convert images/ to images-tiff/ Depends on IM
 	echo $(images)
 	for i in $(images); \
 	do tiff=`basename $$i .jpg`.tiff; \
-	convert -density 300 $$i -alpha on images-tiff/$$tiff; \
+	convert -density 300 $$i -colorspace RGB -type truecolor -alpha on images-tiff/$$tiff; \
 	echo $$tiff; \
 	done;

@ -99,11 +99,15 @@ overunder: ocr/output.txt ## Alice: An interpreted language that translate simpl
 	@python3 src/overunder.py
 .PHONY: overunder

-erase:hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
+erase: tiffs hocrs  ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
 	python3 src/erase_leastcommon.py
+	rm $(input-hocr)
+	rm $(images-tiff)

-replace:hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
+replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
 	python3 src/replace_leastcommon.py
+	rm $(input-hocr)
+	rm $(images-tiff)

 visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer
 	@echo $(tmpfile)
--- a/60
+++ b/60
@ -20,18 +20,12 @@ Natasha Berting, Angeliki Diakrousi, Joca van der Horst, Alexander Roidl, Alice
 * Python3
 * GNU make
 * Python3 NLTK  `pip3 install nltk`
-* NLTK English Corpus:
-    * run NLTK downloader `python -m nltk.downloader`
-    * select menu "Corpora"
-    * select "stopwords"
-    * "Dowload"
-


 # Make commands

 ## N+7 (example) Author
-Description: Replaces every word with the 7th next word in a dictionary.
+Description: Replaces every noun with the 7th next noun in a dictionary. Inspired by an Oulipo work of the same name.

 run: `make N+7`

@ -48,14 +42,13 @@ run: `make ttssr-human-only`

 Specific Dependencies:

-* PocketSphinx pacakge `sudo aptitude install pocketsphinx pocketsphinx-en-us`
-Python Libaries:
-* PocketSphinx: `sudo pip3 install PocketSphinx`, install dependencies: `sudo apt-get install gcc automake autoconf libtool bison swig python-dev libpulse-dev`
+* PocketSphinx package `sudo aptitude install pocketsphinx pocketsphinx-en-us`
+* PocketSphinx: `sudo pip3 install PocketSphinx`
+* Python Libaries:`sudo apt-get install gcc automake autoconf libtool bison swig python-dev libpulse-dev`
 * Speech Recognition: `sudo pip3 install SpeechRecognition`
 * TermColor: `sudo pip3 install termcolor`
 * PyAudio: `pip3 install pyaudio`

-
 ## Reading the Structure: Joca
 Description: Uses OCR'ed text as an input, labels each word for Part-of-Speech, stopwords and sentiment. Then it generates a reading interface
 where words with a specific label are hidden. Output can be saved as poster, or exported as json featuring the full data set.
@ -72,3 +65,48 @@ Specific Dependencies:
 * jinja2 (http://jinja.pocoo.org/docs/2.10/intro/#installation)
 * font: PT Sans (os font https://www.fontsquirrel.com/fonts/pt-serif)
 * font: Ubuntu Mono (os font https://www.fontsquirrel.com/fonts/ubuntu-mono)
+
+## Erase / Replace: Natasha
+Description: Receives your scanned pages in order, then analyzes each image and its vocabulary. Finds and crops the least common words, and either erases them, or replaces them with the most common words. Outputs a PDF of increasingly distorted scan images.
+
+for erase script run: `make erase`
+for replace script run: `make replace`
+
+Specific Dependencies:
+* NLTK English Corpus:
+    * run NLTK downloader `python -m nltk.downloader`
+    * select menu "Corpora"
+    * select "stopwords"
+    * "Download"
+* Python Image Library (PIL):  `pip3 install Pillow`
+* PDF generation for Python (FPDF): `pip3 install fpdf`
+* HTML5lib: `pip3 install html5lib`
+
+Notes & Bugs:
+This script is very picky about the input images it can work with. For best results, please use high resolution images in RGB colorspace. Errors can occur when image modes do not match or tesseract cannot successfully make HOCR files.
+
+## carlandre: Alice
+Description: Generates concrete poetry from a text file. If you're connected to a printer located in /dev/usb/lp0 you can print the poem.
+
+run: make carlandre
+
+Dependencies:
+* pytest (Documentation: https://docs.pytest.org/en/latest/getting-started.html)
+
+## over/under: Alice
+Description: Interpreted programming language written in Python3 which translates basic weaving instructions into code and applies them to text.
+
+run: make overunder
+
+Instructions:
+over/under works with specific commands which execute specific instructions.
+When running, an interpreter will open:
+>
+To load your text, type 'load'. This is necessary before any other instructions. Every time you load the text, the previous instructions will be discarded.
+To see the line you are currently on, type 'show'.
+To start your pattern, type 'over' or 'under', each followed by an integer, separated by a comma.
+e.g. over 5, under 5, over 6, under 10
+To move on to the next line of text, press enter twice.
+To see your pattern, type 'pattern'.
+To save your pattern in a text file, type 'save'.
+To leave the program, type 'quit'.
--- a/ocr/list.txt
+++ b/ocr/list.txt
@ -1,3 +1,3 @@

-images/0029.jpg
+images/0012.tif

--- a/ocr/output.txt
+++ b/ocr/output.txt
@ -1,21 +0,0 @@
-Write it down quickly
-before I forget
-in the car with D. and N.
-cutting across America’s seasons
-muggy sunlight in Santa Barbara
-wet snow in Denver
-and in every Best Western hotel
-the TV’s flickering light
-on her dear sleeping face
-like a young girl once again
-
-but writing down the words
-alters what I want to remember
-that which had no words
-was a living breathing image
-so now I have two versions of the same
-today I can superimpose them
-but tomorrow when I’m gone
-only the words are left
-signs evoking something
-that no eye sees any more
--- a/src/carlandre.py
+++ b/src/carlandre.py
@ -118,7 +118,8 @@ if os.path.exists(my_path):
        "init_printer":  "\x1B\x40",
        'papercut':'\x1D\x56\x00',
    }
-
+    
+    emptylines= "\n\n\n\n"    
    print(escpos['init_printer'])
    print(joined_list)
    print(emptylines)
--- a/src/erase_leastcommon.py
+++ b/src/erase_leastcommon.py
@ -23,7 +23,7 @@ def findleastcommon(list):
 	fdist = FreqDist(word.lower() for word in list) 
 	leastcommon = fdist.most_common()
 	for i in leastcommon:
-		if (i[1] <= limit):
+		if (i[1] <= 1):
 			leastcommon_list.append(i[0])
 	return leastcommon_list

--- a/src/overunder.py
+++ b/src/overunder.py
@ -77,6 +77,7 @@ def eval(cmds):
            print('\n'.join(pattern))

        elif cmd[0] == 'save':
+            pattern = text[0:line_number + 1]
            pattern_file = open('output/patternfile.txt', 'w')
            pattern_file.write('\n'.join(pattern))
            pattern_file.close()
--- a/src/replace_leastcommon.py
+++ b/src/replace_leastcommon.py
@ -40,7 +40,7 @@ def findleastcommon(list):
 	fdist = FreqDist(word.lower() for word in list) 
 	leastcommon = fdist.most_common()
 	for i in leastcommon:
-		if (i[1] <= limit):
+		if (i[1] <= 1):
 			leastcommon_list.append(i[0])
 	return leastcommon_list

@ -92,6 +92,8 @@ for i in scanimg:
 	mostcommon_list = findmostcommon(clean_words, 30) #find most common words and add them to list

 	print ('The most common words until text', x+1, 'are:', mostcommon_list)
+	print ('The least common words until text', x+1, 'are:', leastcommon_list)
+
 	print ('') 

 	# loop through every word in hocr file to extract coordinates, then remove or paste into output image
@ -128,11 +130,11 @@ for i in scanimg:
 			wimcolor7 = Image.new('RGBA', wimreplace7.size, (250, 230, 0, 90))
 			wimcolor_more = Image.new('RGBA', wimreplace_more.size, (250, 230, 0, 90))

-			out4 = Image.alpha_composite(wimreplace4, wimcolor4)
-			out7 = Image.alpha_composite(wimreplace7, wimcolor7)
-			out_more = Image.alpha_composite(wimreplace_more, wimcolor_more)
+			out4 = Image.alpha_composite(wimreplace4.convert('RGBA'), wimcolor4)
+			out7 = Image.alpha_composite(wimreplace7.convert('RGBA'), wimcolor7)
+			out_more = Image.alpha_composite(wimreplace_more.convert('RGBA'), wimcolor_more)

-			if word.lower() in leastcommon_list and len(word) <= limit:
+			if word.lower() in leastcommon_list and len(word) <= 3:
 				oim.paste(wim, (c[0], c[1], c[2], c[3]))

 			elif word.lower() in leastcommon_list and len(word) < 8:
--- a/src/ttssr-loop-human-only.sh
+++ b/src/ttssr-loop-human-only.sh
@ -5,9 +5,9 @@ head -n 1 $1 > output/input0.txt
 while [[ $i -le 10 ]]
 	do echo $i
 	cat output/input$i.txt 
-	python3 src/write_audio.py src/sound$i.wav 2> /dev/null
+	python3 src/ttssr_write_audio.py src/sound$i.wav 2> /dev/null
 	play src/sound$i.wav repeat 5 2> /dev/null & #in the background the sound, without it all the sounds play one by one//2 is stderr
-	python3 src/audio_transcribe.py sound$i.wav > output/input$((i+1)).txt 2> /dev/null
+	python3 src/ttssr_transcribe.py sound$i.wav > output/input$((i+1)).txt 2> /dev/null
 	sleep 1
 	(( i++ ))
 done
--- a/src/ttssr_transcribe.py
+++ b/src/ttssr_transcribe.py
--- a/src/ttssr_write_audio.py
+++ b/src/ttssr_write_audio.py