updated make tiffs

master
Natasha Berting 7 years ago
commit 4eb890ff27

2
.gitignore vendored

@ -1,3 +1,3 @@
images/**
output/**
src/index.json

@ -34,7 +34,6 @@ dirs: ## create the dirs in working dir
@-mkdir -p images/
@-mkdir -p images-tiff/
@-mkdir -p output/
@-mkdir -p output/wordtagger
@-mkdir -p ocr/
@-mkdir -p hocr/
@echo $(color_r)'Directories made': images/ output/
@ -72,6 +71,7 @@ hocrs: ## hocr with tesseract and then change extension to .html
#OUTPUT GENERATION RECIPES
output/wordtagger/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2
mkdir -p output/wordtagger
cp src/wordtagger/jquery.min.js output/wordtagger
cp src/wordtagger/script.js output/wordtagger
cp src/wordtagger/style.css output/wordtagger
@ -88,10 +88,13 @@ output/chatbot.txt: ocr/output.txt ## Comments a text with a simple chatbot. Dep
output/n7.txt: ocr/output.txt ## Replaces nouns with the 7th noun that follows. Dependencies: 91k_nouns
cat $< | python3 src/n_7.py > $(@)
output/carlandre.txt: ocr/output.txt ## Creates visual poetry out of a text. Dependencies
output/carlandre.txt: ocr/output.txt ## Alice: Creates visual poetry out of a text. Dependencies: pytest
cat $< | python3 src/carlandre.py > $(@)
# cat $(@) > /dev/usb/lp0
output/overunder: ocr/output.txt ## Alice: An interpreted language that translate simple weaving instructions and creates a weaving pattern on text.
python3 src/overunder.py
visualization: $(images) $(tmpfile) ##Creates data visualization from images/*.jpg. Dependencies: mplayer
@echo $(tmpfile)
@ -114,5 +117,5 @@ tts: output/chatbot.txt ocr/output.txt ## text to speech. Dependencies: espea
ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
bash src/ttssr-loop-human-only.sh ocr/output.txt
chatbook: ocr/output.txt
chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk
python3 src/chatbook.py

File diff suppressed because one or more lines are too long

@ -1,4 +1,3 @@
images/*-0.jpg
images/*-1.jpg
images/0029.jpg

@ -1,13 +1,37 @@
apparatus of Greece. the logic of its
organisation becomes clear. In that respect, it
is not the utopian proposal. Plato claimed that
instead of over~dramatisatlon of reality.
educational system should provide a clear
description of reality. According to Plato that is
precisely what philosophy Is doing. Who should
therefor rule peoples hearts and minds?
Philosophers or poets? It was a power struggle
between philosophers and poets. Poetry stood
in Plato's way to propagate Platonism.
ZEROS + ONES DIGITAL WOMEN 4|» THE NEWTECHNOCULTURE
moments of unknown, disconnected lives, ”invisible voices
conducted through the tips of her fingers."
Poised as an interface between man and the world, she is
also wired to a network of digital machines: typists connected to
QWERTY alphabets, bodies shaped by the motion of the keys,
one hundred words a minute, viral speed, Thousands oi opera
tors, relays, calls, exchanges humming in Virtual conjunction,
learning the same phrases, flipping the same switches,
repeating the same responses, pushing plugs into the
answering iacks, maybe two hundred, three hundred times an
hours She has "a fingertip mastery of the ringing. listening, dial,
and other keys on her key shelf; of the row or rows of cords for
making connections; of the location and meaning of all parts of
the honey combed formation of jacks and trunks for recording,
for switching, for toll circuits, for tandem, for information-" It
becomes second nature it grows on her, "Having done this stufl
a few hundred thousand times, you become quite good at it. In
fact you're plugging, and connecting, and disconnecting ten,
twenty, forty cords at a time." After a while these processes
become "quite satisfying in a way, rather like weaving on an
upright loom,"
102

@ -34,5 +34,5 @@ for n in args.text:
#print(index)
with open('index.json', 'w') as outfile:
with open('src/index.json', 'w') as outfile:
json.dump(index, outfile)

@ -4,22 +4,24 @@ import random
from nltk.tokenize import sent_tokenize, word_tokenize
import json
#from thread import start_new_thread
import os
r = Rake()
def chunks(l, n):
for i in range(0, len(l), n):
yield l[i:i+n]
class HelloBot(irc.bot.SingleServerIRCBot):
def __init__(self, channel, nickname, server, port=6667, index=None):
print("connecting to chatroom...")
irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
self.channel = channel
self.index = index
def on_welcome(self, c, e):
c.join(self.channel)
print("joined chatroom")
def on_privmsg(self, c, e):
pass
@ -27,10 +29,15 @@ class HelloBot(irc.bot.SingleServerIRCBot):
def on_pubmsg(self, c, e):
print(e.arguments, e.source)
msg=e.arguments[0]
print(e.source.split("!")[0][:1])
incoming_msg = e.arguments[0]
r.extract_keywords_from_text(msg)
#r.get_ranked_phrases_with_scores()
listOfKeys = r.get_ranked_phrases()
msg_where = ""
if e.source.split("!")[0][-3:] != "bot" or e.source.split("!")[0][:1] != "A":
print("true")
for keyWord in listOfKeys:
if keyWord in self.index:
msg = (index.get(keyWord)[0].get('sentence'))
@ -38,14 +45,11 @@ class HelloBot(irc.bot.SingleServerIRCBot):
else:
msg = "I don't know anything about that"
msg_where = ""
for chunk in chunks(msg, 400):
print(chunk)
c.privmsg(self.channel, chunk)
c.privmsg(self.channel, msg_where)
else:
print("bot")
if __name__ == "__main__":
@ -61,13 +65,15 @@ if __name__ == "__main__":
args=ap.parse_args()
# build the index of sentences organized by keywords
with open("index.json") as f:
with open("src/index.json") as f:
try:
index = json.load(f)
except:
index={}
#print(index)
myhost = os.uname()[1]
bot = HelloBot(args.channel, args.nickname, args.server, args.port, index)
bot = HelloBot(args.channel, "A-2{}-bot".format(len(index)), args.server, args.port, index)
bot.start()

File diff suppressed because one or more lines are too long

@ -0,0 +1,90 @@
import linecache
import textwrap
import sys
from sys import exit
class LeavingProgram(Exception):
pass
def parse(program):
cmds = program.split(',')
splitted_cmds = []
for cmd in cmds:
splitted = cmd.split()
splitted_cmds.append(splitted)
return splitted_cmds
#return tokenize(program)
def tokenize(s):
return s.split()
def repl():
while True:
try:
val = eval(parse(input('> ')))
if val is not None:
print(val)
except LeavingProgram:
break
text = None
line_number = 0
last_index = 0
def eval(cmds):
global text
global line_number
global last_index
for cmd in cmds:
if cmd == []:
line_number += 1
last_index = 0
elif cmd[0] == 'load':
contents = open('ocr/output.txt').read()
text = textwrap.wrap(contents, 40, break_long_words=True)
print('\n'.join(text))
line_number = 0
last_index = 0
elif cmd[0] == 'show':
print(text[line_number])
elif cmd[0] == 'under':
current_line = text[line_number]
char_number = int(cmd[1]) - 1
char_list = list(current_line)
x=range(last_index, char_number + last_index + 1)
for time in x:
if time < len(char_list):
char_list[time] = u'\u21e2'
last_index += char_number + 1
joined = ''.join(char_list)
text[line_number] = joined
elif cmd[0] == 'over':
last_index += int(cmd[1])
elif cmd[0] == 'pattern':
pattern = text[0:line_number + 1]
print('\n'.join(pattern))
elif cmd[0] == 'quit':
print('Come back soon!')
raise LeavingProgram()
else:
joined = ' '.join(cmd)
print('Did not understand command {}'.format(joined))
if __name__ == '__main__':
repl()
Loading…
Cancel
Save