Merge branch 'master' of git.xpub.nl:/var/www/git.xpub.nl/repos/OuNuPo-make

master
ange 7 years ago
commit 3236fa6493

1
.gitignore vendored

@ -2,3 +2,4 @@ images/**
output/** output/**
src/index.json src/index.json
.DS_Store .DS_Store
src/**.wav

@ -1,4 +1,4 @@
images=$(sort $(wildcard images/*.jpg)) images=$(sort $(wildcard images/*))
# @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file # @andre make wildcard so that it takes any image file but doesn't take the listimg.txt file
images-tiff=$(sort $(wildcard images-tiff/*.tiff)) images-tiff=$(sort $(wildcard images-tiff/*.tiff))
input-hocr=$(sort $(wildcard hocr/*)) input-hocr=$(sort $(wildcard hocr/*))
@ -26,7 +26,6 @@ help:
# CLEAN # CLEAN
clean: ## removes output (target) files clean: ## removes output (target) files
rm ocr/output.txt rm ocr/output.txt
rm $(wildcard output/*)
rm $(tmpfile) rm $(tmpfile)
@ -60,7 +59,7 @@ tiffs: ## convert images/ to images-tiff/ Depends on IM
echo $(images) echo $(images)
for i in $(images); \ for i in $(images); \
do tiff=`basename $$i .jpg`.tiff; \ do tiff=`basename $$i .jpg`.tiff; \
convert -density 300 $$i -alpha on images-tiff/$$tiff; \ convert -density 300 $$i -colorspace RGB -type truecolor -alpha on images-tiff/$$tiff; \
echo $$tiff; \ echo $$tiff; \
done; done;
@ -103,10 +102,12 @@ overunder: ocr/output.txt ## Alice: An interpreted language that translate simpl
erase: tiffs hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF erase: tiffs hocrs ## Natasha: Analyzes pages in order, erases least common words from view. Dependencies: PIL, html5lib, FPDF
python3 src/erase_leastcommon.py python3 src/erase_leastcommon.py
rm $(input-hocr) rm $(input-hocr)
rm $(images-tiff)
replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF replace:tiffs hocrs ## Natasha: Analyzes pages in order, replace least common words with most common words. Dependencies: PIL, html5lib, FPDF
python3 src/replace_leastcommon.py python3 src/replace_leastcommon.py
rm $(input-hocr) rm $(input-hocr)
rm $(images-tiff)
visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer
@echo $(tmpfile) @echo $(tmpfile)

@ -20,18 +20,12 @@ Natasha Berting, Angeliki Diakrousi, Joca van der Horst, Alexander Roidl, Alice
* Python3 * Python3
* GNU make * GNU make
* Python3 NLTK `pip3 install nltk` * Python3 NLTK `pip3 install nltk`
* NLTK English Corpus:
* run NLTK downloader `python -m nltk.downloader`
* select menu "Corpora"
* select "stopwords"
* "Dowload"
# Make commands # Make commands
## N+7 (example) Author ## N+7 (example) Author
Description: Replaces every word with the 7th next word in a dictionary. Description: Replaces every noun with the 7th next noun in a dictionary. Inspired by an Oulipo work of the same name.
run: `make N+7` run: `make N+7`
@ -55,7 +49,6 @@ Specific Dependencies:
* TermColor: `sudo pip3 install termcolor` * TermColor: `sudo pip3 install termcolor`
* PyAudio: `pip3 install pyaudio` * PyAudio: `pip3 install pyaudio`
## Reading the Structure: Joca ## Reading the Structure: Joca
Description: Uses OCR'ed text as an input, labels each word for Part-of-Speech, stopwords and sentiment. Then it generates a reading interface Description: Uses OCR'ed text as an input, labels each word for Part-of-Speech, stopwords and sentiment. Then it generates a reading interface
where words with a specific label are hidden. Output can be saved as poster, or exported as json featuring the full data set. where words with a specific label are hidden. Output can be saved as poster, or exported as json featuring the full data set.
@ -72,3 +65,24 @@ Specific Dependencies:
* jinja2 (http://jinja.pocoo.org/docs/2.10/intro/#installation) * jinja2 (http://jinja.pocoo.org/docs/2.10/intro/#installation)
* font: PT Sans (os font https://www.fontsquirrel.com/fonts/pt-serif) * font: PT Sans (os font https://www.fontsquirrel.com/fonts/pt-serif)
* font: Ubuntu Mono (os font https://www.fontsquirrel.com/fonts/ubuntu-mono) * font: Ubuntu Mono (os font https://www.fontsquirrel.com/fonts/ubuntu-mono)
## Erase / Replace: Natasha
Description: Receives your scanned pages in order, then analyzes each image and its vocabulary. Finds and crops the least common words, and either erases them, or replaces them with the most common words. Outputs a PDF of increasingly distorted scan images.
for erase script run: `make erase`
for replace script run: `make replace`
Specific Dependencies:
* NLTK English Corpus:
* run NLTK downloader `python -m nltk.downloader`
* select menu "Corpora"
* select "stopwords"
* "Download"
* Python Image Library (PIL): `pip3 install Pillow`
* PDF generation for Python (FPDF): `pip3 install fpdf`
* HTML5lib: `pip3 install html5lib`
Notes & Bugs:
This script is very picky about the input images it can work with. For best results, please use high resolution images in RGB colorspace. Errors can occur when image modes do not match or tesseract cannot successfully make HOCR files.

@ -1,101 +0,0 @@
Any one is one having been that one Any one is such a one.
Any one having been that one is one remembering something oi such a thing, is one
remembering having been that one.
Each one having been one is being one having been that one. Each one haying been
one is remembering something of this thing, is remembering something or haying been
that one
Each one is one. Each one has been one. Each one being one, each one havrng been
one is remembering something or that thing.
Each one is one. Each one has been one. Each one is remembering that thing.
Each one is one. Each one has been one. That is something that any one haying been
one, any one being one is having happen Each one being one is haying it happen that
that one is being that one. Each one having been one is one havrng had it happen that
that one has been that one.
Each one is one. Any one is the one that one is Each one is one.
One who is one is remembering that she is one forgetting anything. One who is one is
remembering that she is forgetting everything again and again She is remembering
this thing She is not interested in this thing She is remembering this thing and she is
remembering that this is a quite necessary thing, it is quite a necessary thing that she IS
remembering that she is iorgetting anything.
She is to getting anything This is not a disturbing thing, this is not a distressing thing,
this is not an important thing She is iorgetting anything and she is remembering that
thing, she is remembering that she is forgetting anything.
She is ore being one remembering that she is forgetting anything She is one not
objecting to being one remembering that thing, remembering that she is forgetting
anything She is one objecting to there being some objecting to being ones forgetting
anything She is one objecting to any one being one remembering that they are not
iorgetting anything She is one objecting to any one objecting to her being one
forgetting anything She is not one remembering being one objecting to any one
objecting to her being one iorgetting anything She is one remembering that she is one
objecting to being one remembering that they are not forgetting anything. She is one
remembering something of being one objecting to some being one objecting to
forgetting anything
She is one forgetting anything. She is one remembering something of this thing She is
one repeating this thing repeating remembering something of forgetting anything,
She is one remembering that she has been having something. She is one remembering
something of this thing She has been having something, she is having something, she
is remembering something of this thing. She is not objecting to having something, she
is having something she is remembering something of this thing
She is one being that one being one having something and remembering something of
that thing She is one being one and she is iorgetting anything and she is remembering
being one forgetting anything.
Any one she is kissing is one she is kissing then, not kissing again and again, not
fissing and kissing, any one she is kissing is one she kissed then, is one she did kiss
then. one she kissed some then
Any one she is kissing is one needing something then, needing kissing, needing
anytinng first then, needing some kissing then. Any one she is kissing is one having
been kissed then, having been kissed some then and she was the one who was kissing
that one some just then Any one she was kissing was one whom she was kissing just
from hows Vests to i narrating Ouhry
that Any one she was kissing was one who might have been needing something then,
needing anything then, needing kissing then, needing a little kissing then, needing any ‘»
Mtg anything then, needing kissing then, needing a little kissing then, needing any
kiss!!!) then, needing something then, needing kissing then
She was one living and remembering that she had enough for this thing, enough for
than She was one remembering that she had enough for being livrng and she was
runemben'ng that she could always be needing that thing needing having enough to be
m She could remembering to remind herself and any one of this thing, she could
W that thing, she could remember to be reminded of that thing. She could
W to beone reminding herself, she could remember to be one havmg any one
rewind her quite often of this thing that she could remember that she had enough and
wot“ be always having enough to be livrng. She could remembering that she was
needing this thing needing having enough always enough for livmg. She could
rentemtler enough oi reminding any one of this thing. She could remember this thing
rectum reminding herself of this thing. She could remember something of being
rein-Iced of this thing. She could remember this thing, she could remember a good
deal of knowing that she was havmg enough for berng living and that she could always
be neodng having enough for liVing. She could remember this thing, she could qurte
new that thing.
She was one forgetting anything. She was remembering something of that thing of
toasting anything She could always remember something of that thing, remember
m of forgetting anything.
in giving she was giving what she had then remembered to give then In gwing she

@ -77,6 +77,7 @@ def eval(cmds):
print('\n'.join(pattern)) print('\n'.join(pattern))
elif cmd[0] == 'save': elif cmd[0] == 'save':
pattern = text[0:line_number + 1]
pattern_file = open('output/patternfile.txt', 'w') pattern_file = open('output/patternfile.txt', 'w')
pattern_file.write('\n'.join(pattern)) pattern_file.write('\n'.join(pattern))
pattern_file.close() pattern_file.close()

@ -40,7 +40,7 @@ def findleastcommon(list):
fdist = FreqDist(word.lower() for word in list) fdist = FreqDist(word.lower() for word in list)
leastcommon = fdist.most_common() leastcommon = fdist.most_common()
for i in leastcommon: for i in leastcommon:
if (i[1] <= limit): if (i[1] <= 1):
leastcommon_list.append(i[0]) leastcommon_list.append(i[0])
return leastcommon_list return leastcommon_list
@ -92,6 +92,8 @@ for i in scanimg:
mostcommon_list = findmostcommon(clean_words, 30) #find most common words and add them to list mostcommon_list = findmostcommon(clean_words, 30) #find most common words and add them to list
print ('The most common words until text', x+1, 'are:', mostcommon_list) print ('The most common words until text', x+1, 'are:', mostcommon_list)
print ('The least common words until text', x+1, 'are:', leastcommon_list)
print ('') print ('')
# loop through every word in hocr file to extract coordinates, then remove or paste into output image # loop through every word in hocr file to extract coordinates, then remove or paste into output image
@ -128,11 +130,11 @@ for i in scanimg:
wimcolor7 = Image.new('RGBA', wimreplace7.size, (250, 230, 0, 90)) wimcolor7 = Image.new('RGBA', wimreplace7.size, (250, 230, 0, 90))
wimcolor_more = Image.new('RGBA', wimreplace_more.size, (250, 230, 0, 90)) wimcolor_more = Image.new('RGBA', wimreplace_more.size, (250, 230, 0, 90))
out4 = Image.alpha_composite(wimreplace4, wimcolor4) out4 = Image.alpha_composite(wimreplace4.convert('RGBA'), wimcolor4)
out7 = Image.alpha_composite(wimreplace7, wimcolor7) out7 = Image.alpha_composite(wimreplace7.convert('RGBA'), wimcolor7)
out_more = Image.alpha_composite(wimreplace_more, wimcolor_more) out_more = Image.alpha_composite(wimreplace_more.convert('RGBA'), wimcolor_more)
if word.lower() in leastcommon_list and len(word) <= limit: if word.lower() in leastcommon_list and len(word) <= 3:
oim.paste(wim, (c[0], c[1], c[2], c[3])) oim.paste(wim, (c[0], c[1], c[2], c[3]))
elif word.lower() in leastcommon_list and len(word) < 8: elif word.lower() in leastcommon_list and len(word) < 8:

Loading…
Cancel
Save