updated make tiffs

master
Natasha Berting 7 years ago
commit 4eb890ff27

2
.gitignore vendored

@ -1,3 +1,3 @@
images/** images/**
output/** output/**
src/index.json

@ -34,7 +34,6 @@ dirs: ## create the dirs in working dir
@-mkdir -p images/ @-mkdir -p images/
@-mkdir -p images-tiff/ @-mkdir -p images-tiff/
@-mkdir -p output/ @-mkdir -p output/
@-mkdir -p output/wordtagger
@-mkdir -p ocr/ @-mkdir -p ocr/
@-mkdir -p hocr/ @-mkdir -p hocr/
@echo $(color_r)'Directories made': images/ output/ @echo $(color_r)'Directories made': images/ output/
@ -72,6 +71,7 @@ hocrs: ## hocr with tesseract and then change extension to .html
#OUTPUT GENERATION RECIPES #OUTPUT GENERATION RECIPES
output/wordtagger/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2 output/wordtagger/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2
mkdir -p output/wordtagger
cp src/wordtagger/jquery.min.js output/wordtagger cp src/wordtagger/jquery.min.js output/wordtagger
cp src/wordtagger/script.js output/wordtagger cp src/wordtagger/script.js output/wordtagger
cp src/wordtagger/style.css output/wordtagger cp src/wordtagger/style.css output/wordtagger
@ -88,10 +88,13 @@ output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dep
output/n7.txt: ocr/output.txt ## Replaces nouns with the 7th noun that follows. Dependencies: 91k_nouns output/n7.txt: ocr/output.txt ## Replaces nouns with the 7th noun that follows. Dependencies: 91k_nouns
cat $< | python3 src/n_7.py > $(@) cat $< | python3 src/n_7.py > $(@)
output/carlandre.txt: ocr/output.txt ## Creates visual poetry out of a text. Dependencies output/carlandre.txt: ocr/output.txt ## Alice: Creates visual poetry out of a text. Dependencies: pytest
cat $< | python3 src/carlandre.py > $(@) cat $< | python3 src/carlandre.py > $(@)
# cat $(@) > /dev/usb/lp0 # cat $(@) > /dev/usb/lp0
output/overunder: ocr/output.txt ## Alice: An interpreted language that translate simple weaving instructions and creates a weaving pattern on text.
python3 src/overunder.py
visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer
@echo $(tmpfile) @echo $(tmpfile)
@ -114,5 +117,5 @@ tts: output/chatbot.txt ocr/output.txt ## text to speech. Dependencies: espea
ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
bash src/ttssr-loop-human-only.sh ocr/output.txt bash src/ttssr-loop-human-only.sh ocr/output.txt
chatbook: ocr/output.txt chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk
python3 src/chatbook.py python3 src/chatbook.py

File diff suppressed because one or more lines are too long

@ -1,4 +1,3 @@
images/*-0.jpg images/0029.jpg
images/*-1.jpg

@ -1,13 +1,37 @@
apparatus of Greece. the logic of its
organisation becomes clear. In that respect, it
is not the utopian proposal. Plato claimed that
instead of over~dramatisatlon of reality.
educational system should provide a clear ZEROS + ONES DIGITAL WOMEN 4|» THE NEWTECHNOCULTURE
description of reality. According to Plato that is
precisely what philosophy Is doing. Who should moments of unknown, disconnected lives, ”invisible voices
therefor rule peoples hearts and minds? conducted through the tips of her fingers."
Philosophers or poets? It was a power struggle
between philosophers and poets. Poetry stood Poised as an interface between man and the world, she is
in Plato's way to propagate Platonism. also wired to a network of digital machines: typists connected to
QWERTY alphabets, bodies shaped by the motion of the keys,
one hundred words a minute, viral speed, Thousands oi opera
tors, relays, calls, exchanges humming in Virtual conjunction,
learning the same phrases, flipping the same switches,
repeating the same responses, pushing plugs into the
answering iacks, maybe two hundred, three hundred times an
hours She has "a fingertip mastery of the ringing. listening, dial,
and other keys on her key shelf; of the row or rows of cords for
making connections; of the location and meaning of all parts of
the honey combed formation of jacks and trunks for recording,
for switching, for toll circuits, for tandem, for information-" It
becomes second nature it grows on her, "Having done this stufl
a few hundred thousand times, you become quite good at it. In
fact you're plugging, and connecting, and disconnecting ten,
twenty, forty cords at a time." After a while these processes
become "quite satisfying in a way, rather like weaving on an
upright loom,"
102

@ -34,5 +34,5 @@ for n in args.text:
#print(index) #print(index)
with open('index.json', 'w') as outfile: with open('src/index.json', 'w') as outfile:
json.dump(index, outfile) json.dump(index, outfile)

@ -4,22 +4,24 @@ import random
from nltk.tokenize import sent_tokenize, word_tokenize from nltk.tokenize import sent_tokenize, word_tokenize
import json import json
#from thread import start_new_thread #from thread import start_new_thread
import os
r = Rake() r = Rake()
def chunks(l, n): def chunks(l, n):
for i in range(0, len(l), n): for i in range(0, len(l), n):
yield l[i:i+n] yield l[i:i+n]
class HelloBot(irc.bot.SingleServerIRCBot): class HelloBot(irc.bot.SingleServerIRCBot):
def __init__(self, channel, nickname, server, port=6667, index=None): def __init__(self, channel, nickname, server, port=6667, index=None):
print("connecting to chatroom...")
irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname) irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
self.channel = channel self.channel = channel
self.index = index self.index = index
def on_welcome(self, c, e): def on_welcome(self, c, e):
c.join(self.channel) c.join(self.channel)
print("joined chatroom")
def on_privmsg(self, c, e): def on_privmsg(self, c, e):
pass pass
@ -27,10 +29,15 @@ class HelloBot(irc.bot.SingleServerIRCBot):
def on_pubmsg(self, c, e): def on_pubmsg(self, c, e):
print(e.arguments, e.source) print(e.arguments, e.source)
msg=e.arguments[0] msg=e.arguments[0]
print(e.source.split("!")[0][:1])
incoming_msg = e.arguments[0]
r.extract_keywords_from_text(msg) r.extract_keywords_from_text(msg)
#r.get_ranked_phrases_with_scores()
listOfKeys = r.get_ranked_phrases() listOfKeys = r.get_ranked_phrases()
msg_where = ""
if e.source.split("!")[0][-3:] != "bot" or e.source.split("!")[0][:1] != "A":
print("true")
for keyWord in listOfKeys: for keyWord in listOfKeys:
if keyWord in self.index: if keyWord in self.index:
msg = (index.get(keyWord)[0].get('sentence')) msg = (index.get(keyWord)[0].get('sentence'))
@ -38,14 +45,11 @@ class HelloBot(irc.bot.SingleServerIRCBot):
else: else:
msg = "I don't know anything about that" msg = "I don't know anything about that"
msg_where = "" msg_where = ""
for chunk in chunks(msg, 400): for chunk in chunks(msg, 400):
print(chunk)
c.privmsg(self.channel, chunk) c.privmsg(self.channel, chunk)
else:
print("bot")
c.privmsg(self.channel, msg_where)
if __name__ == "__main__": if __name__ == "__main__":
@ -61,13 +65,15 @@ if __name__ == "__main__":
args=ap.parse_args() args=ap.parse_args()
# build the index of sentences organized by keywords # build the index of sentences organized by keywords
with open("index.json") as f: with open("src/index.json") as f:
try: try:
index = json.load(f) index = json.load(f)
except: except:
index={} index={}
#print(index) #print(index)
myhost = os.uname()[1]
bot = HelloBot(args.channel, args.nickname, args.server, args.port, index) bot = HelloBot(args.channel, "A-2{}-bot".format(len(index)), args.server, args.port, index)
bot.start() bot.start()

File diff suppressed because one or more lines are too long

@ -0,0 +1,90 @@
import linecache
import textwrap
import sys
from sys import exit
class LeavingProgram(Exception):
pass
def parse(program):
cmds = program.split(',')
splitted_cmds = []
for cmd in cmds:
splitted = cmd.split()
splitted_cmds.append(splitted)
return splitted_cmds
#return tokenize(program)
def tokenize(s):
return s.split()
def repl():
while True:
try:
val = eval(parse(input('> ')))
if val is not None:
print(val)
except LeavingProgram:
break
text = None
line_number = 0
last_index = 0
def eval(cmds):
global text
global line_number
global last_index
for cmd in cmds:
if cmd == []:
line_number += 1
last_index = 0
elif cmd[0] == 'load':
contents = open('ocr/output.txt').read()
text = textwrap.wrap(contents, 40, break_long_words=True)
print('\n'.join(text))
line_number = 0
last_index = 0
elif cmd[0] == 'show':
print(text[line_number])
elif cmd[0] == 'under':
current_line = text[line_number]
char_number = int(cmd[1]) - 1
char_list = list(current_line)
x=range(last_index, char_number + last_index + 1)
for time in x:
if time < len(char_list):
char_list[time] = u'\u21e2'
last_index += char_number + 1
joined = ''.join(char_list)
text[line_number] = joined
elif cmd[0] == 'over':
last_index += int(cmd[1])
elif cmd[0] == 'pattern':
pattern = text[0:line_number + 1]
print('\n'.join(pattern))
elif cmd[0] == 'quit':
print('Come back soon!')
raise LeavingProgram()
else:
joined = ' '.join(cmd)
print('Did not understand command {}'.format(joined))
if __name__ == '__main__':
repl()
Loading…
Cancel
Save