Merge branch 'master' of git.xpub.nl:/var/www/git.xpub.nl/repos/OuNoPo-make

7 years ago · 87a07516ac
parent 62c07bd59b 2751a84fc0
commit 87a07516ac
8 changed files with 67 additions and 39 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,3 @@
 images/**
 output/**
-
+src/index.json
--- a/4
+++ b/4
@ -34,7 +34,6 @@ dirs: ## create the dirs in working dir
 	@-mkdir -p images/
 	@-mkdir -p images-tiff/
 	@-mkdir -p output/
-	@-mkdir -p output/wordtagger
 	@-mkdir -p ocr/
 	@-mkdir -p hocr/
 	@echo $(color_r)'Directories made': images/ output/
@ -72,6 +71,7 @@ hocrs: ## hocr with tesseract and then change extension to .html
 #OUTPUT GENERATION RECIPES

 output/wordtagger/index.html: ocr/output.txt ## Analyzes OCR'ed text using a Part of Speech (POS) tagger. Outputs a string of tags (e.g. nouns, verbs, adjectives, and adverbs). Dependencies: python3's nltk, jinja2
+	mkdir -p output/wordtagger
 	cp src/wordtagger/jquery.min.js output/wordtagger
 	cp src/wordtagger/script.js output/wordtagger
 	cp src/wordtagger/style.css output/wordtagger
@ -117,5 +117,5 @@ tts: output/chatbot.txt ocr/output.txt    ## text to speech. Dependencies: espea
 ttssr-human-only: ocr/output.txt ## Loop: text to speech-speech recognition. Dependencies: espeak, pocketsphinx
 	bash src/ttssr-loop-human-only.sh ocr/output.txt

-chatbook: ocr/output.txt
+chatbook: ocr/output.txt #chatbot based on the knowledge of the scans Dependencies: nltk_rake, irc, nltk
 	python3 src/chatbook.py
--- a/index.json
+++ b/index.json
--- a/ocr/list.txt
+++ b/ocr/list.txt
@ -1,4 +1,3 @@

-images/*-0.jpg
-images/*-1.jpg
+images/0029.jpg

--- a/ocr/output.txt
+++ b/ocr/output.txt
@ -1,13 +1,37 @@
-apparatus of Greece. the logic of its
-organisation becomes clear. In that respect, it
-is not the utopian proposal. Plato claimed that
-instead of over~dramatisatlon of reality.
-educational system should provide a clear
-description of reality. According to Plato that is
-precisely what philosophy Is doing. Who should
-therefor rule people‘s hearts and minds?
-Philosophers or poets? It was a power struggle
-between philosophers and poets. Poetry stood
-in Plato's way to propagate Platonism.
+ 

+ 
+
+ZEROS + ONES DIGITAL WOMEN 4|» THE NEWTECHNOCULTURE
+
+moments of unknown, disconnected lives, ”invisible voices
+conducted through the tips of her ﬁngers."
+
+Poised as an interface between man and the world, she is
+also wired to a network of digital machines: typists connected to
+QWERTY alphabets, bodies shaped by the motion of the keys,
+one hundred words a minute, viral speed, Thousands oi opera
+tors, relays, calls, exchanges humming in Virtual conjunction,
+learning the same phrases, ﬂipping the same switches,
+repeating the same responses, pushing plugs into the
+answering iacks, maybe two hundred, three hundred times an
+hours She has "a ﬁngertip mastery of the ringing. listening, dial,
+and other keys on her key shelf; of the row or rows of cords for
+making connections; of the location and meaning of all parts of
+the honey combed formation of jacks and trunks for recording,
+for switching, for toll circuits, for tandem, for information-" It
+becomes second nature it grows on her, "Having done this stufl
+a few hundred thousand times, you become quite good at it. In
+fact you're plugging, and connecting, and disconnecting ten,
+twenty, forty cords at a time." After a while these processes
+become "quite satisfying in a way, rather like weaving on an
+upright loom,"
+
+102
+
+ 
+
+ 
+
+ 

--- a/src/build_database.py
+++ b/src/build_database.py
@ -34,5 +34,5 @@ for n in args.text:

 #print(index)

-with open('index.json', 'w') as outfile:
+with open('src/index.json', 'w') as outfile:
    json.dump(index, outfile)
--- a/src/chatbook.py
+++ b/src/chatbook.py
@ -4,9 +4,9 @@ import random
 from nltk.tokenize import sent_tokenize, word_tokenize
 import json
 #from thread import start_new_thread
+import os

-r= Rake()
-
+r = Rake()

 def chunks(l, n):
    for i in range(0, len(l), n):
@ -14,38 +14,42 @@ def chunks(l, n):

 class HelloBot(irc.bot.SingleServerIRCBot):
    def __init__(self, channel, nickname, server, port=6667, index=None):
+        print("connecting to chatroom...")
        irc.bot.SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
        self.channel = channel
        self.index = index

    def on_welcome(self, c, e):
        c.join(self.channel)
+        print("joined chatroom")

    def on_privmsg(self, c, e):
        pass

    def on_pubmsg(self, c, e):
        print(e.arguments, e.source)
-        msg = e.arguments[0]
+        msg=e.arguments[0]
+        print(e.source.split("!")[0][:1])
+        incoming_msg = e.arguments[0]
        r.extract_keywords_from_text(msg)
-        #r.get_ranked_phrases_with_scores()
        listOfKeys = r.get_ranked_phrases()

-        for keyWord in listOfKeys:
-            if keyWord in self.index:
-                msg = (index.get(keyWord)[0].get('sentence'))
-                msg_where = "I found this in {}".format(index.get(keyWord)[0].get('filename'))
-            else:
-                msg = "I don't know anything about that"
-                msg_where = ""
-
-        for chunk in chunks(msg, 400):
-            c.privmsg(self.channel, chunk)
-
-
-        c.privmsg(self.channel, msg_where)
-
+        msg_where = ""

+        if e.source.split("!")[0][-3:] != "bot" or e.source.split("!")[0][:1] != "A":
+            print("true")
+            for keyWord in listOfKeys:
+                if keyWord in self.index:
+                    msg = (index.get(keyWord)[0].get('sentence'))
+                    msg_where = "I found this in {}".format(index.get(keyWord)[0].get('filename'))
+                else:
+                    msg = "I don't know anything about that"
+                    msg_where = ""
+            for chunk in chunks(msg, 400):
+                print(chunk)
+                c.privmsg(self.channel, chunk)
+        else:
+            print("bot")


 if __name__ == "__main__":
@ -61,13 +65,15 @@ if __name__ == "__main__":
    args=ap.parse_args()

    # build the index of sentences organized by keywords
-    with open("index.json") as f:
+    with open("src/index.json") as f:
        try:
            index = json.load(f)
        except:
            index={}

    #print(index)
+    myhost = os.uname()[1]
+

-    bot = HelloBot(args.channel, args.nickname, args.server, args.port, index)
+    bot = HelloBot(args.channel, "A-2{}-bot".format(len(index)), args.server, args.port, index)
    bot.start()
--- a/src/index.json
+++ b/src/index.json