comments and tidy
parent
8a9b39d430
commit
55f7d5c0e8
@ -0,0 +1,106 @@
|
||||
#
|
||||
# NLTK (Natural Language ToolKit) is a library for Natural Language Process.
|
||||
# We will use it to get the Part Of Speech (POS) of the speech-to-text results.
|
||||
#
|
||||
# What does it mean?
|
||||
#
|
||||
# It works as grammar tagging: for instance, the sentence "Around the clouds"
|
||||
# would have this output:
|
||||
#
|
||||
# [('Around', 'IN'), ('the', 'DT'), ('clouds', 'NN')]
|
||||
#
|
||||
# 'IN' means 'preposition' - 'DT' means 'determiner' - 'NN' means 'noun, common, singular or mass'
|
||||
|
||||
|
||||
import time # to create delays :: for having a few seconds to check the console
|
||||
import nltk # to use NLTK
|
||||
|
||||
# Open the speech-to-text result :: downloaded from the web interface >>
|
||||
|
||||
with open('speech.txt','r') as speech: # let's import the text
|
||||
text = speech.read() # and make python read it :)
|
||||
print(text) # print it!
|
||||
|
||||
time.sleep(2) # check it in the console!
|
||||
|
||||
|
||||
text = text.replace('<span class="interim"></span>','').replace('\n','. ') # delete this from the results
|
||||
|
||||
tokens = nltk.word_tokenize(text) # Tokenize the words :: split each word
|
||||
pos = nltk.pos_tag(tokens) # Elaborate the Part of Speech! It will create an array, a list
|
||||
print(pos) # print the array!
|
||||
|
||||
time.sleep(2) # check it in the console!
|
||||
|
||||
|
||||
|
||||
# To see all the POS tags, open the terminal and copy:
|
||||
#
|
||||
# python3
|
||||
# import nltk
|
||||
# nltk.help.upenn_tagset()
|
||||
|
||||
|
||||
|
||||
|
||||
# start the layouting :: html + css + paged.js >>
|
||||
#
|
||||
# declare html :: we will fill it in the process with loops
|
||||
# declare the first part of the text for two html files with different CSS
|
||||
|
||||
html = ''
|
||||
|
||||
html1 = '''
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="./pagedjs_files/interface.css">
|
||||
<script src="./pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="./styles/1.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>📡 💻📘</title>
|
||||
</head>
|
||||
<body>
|
||||
'''
|
||||
|
||||
html2 = '''
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="./pagedjs_files/interface.css">
|
||||
<script src="./pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="./styles/2.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>📡 💻📘</title>
|
||||
</head>
|
||||
<body>
|
||||
'''
|
||||
|
||||
|
||||
# Process each element of the list
|
||||
|
||||
for e in pos: # e is the current element, pos is the array to process
|
||||
|
||||
if e[0] == '.': # if e is a dot, its class will be 'dot'
|
||||
html += " <span class='dot'>.</span><br> \n"
|
||||
|
||||
else: # fill the html with each word and assign it as class its POS
|
||||
html += " <span class='"+e[1]+"'> "+e[0]+"</span>\n"
|
||||
|
||||
|
||||
# Close the html text
|
||||
html += '''</body>
|
||||
</html>'''
|
||||
|
||||
html = html.replace(' .','.').replace(" '", "'") # to tidy wrong " . " and " ' " position
|
||||
|
||||
|
||||
# Save the <html> files!
|
||||
|
||||
with open('../2_layout/1.html','w') as index:
|
||||
index.write(html1)
|
||||
index.write(html)
|
||||
|
||||
with open('../2_layout/2.html','w') as index:
|
||||
index.write(html2)
|
||||
index.write(html)
|
@ -0,0 +1,68 @@
|
||||
# Scrape images from DuckDuckGo
|
||||
#
|
||||
# With duckduckgo_images_api!
|
||||
|
||||
|
||||
from duckduckgo_images_api import search # import the library for scrape
|
||||
import time # to create delays :: for having a few seconds to check the console
|
||||
|
||||
|
||||
with open('speech.txt','r') as speech: # let's import the text
|
||||
qq = speech.readlines() # and split it in lines, it will create an array, a list
|
||||
print(qq) # print the array!
|
||||
|
||||
time.sleep(2) # check qq in the console!
|
||||
|
||||
|
||||
# declare the first part of the text of the html, we will fill it
|
||||
# in the process with loops
|
||||
|
||||
html = '''
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="./pagedjs_files/interface.css">
|
||||
<script src="./pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="./styles/3.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>📡 💻📘</title>
|
||||
</head>
|
||||
<body>
|
||||
'''
|
||||
|
||||
|
||||
# Elaborate each line :: process every element of the array qq
|
||||
|
||||
# q is for "query", qq for "queries", because we will send requests to
|
||||
# DuckDuckGo searching the text of each line of speech.txt
|
||||
|
||||
for q in qq:
|
||||
|
||||
print(q) # print the q!
|
||||
|
||||
time.sleep(2) # check current q in the console!
|
||||
|
||||
q = q.strip()
|
||||
|
||||
if q == '''<span class="interim"></span>''': # This nope.
|
||||
continue
|
||||
|
||||
q = q.replace("\n","") # remove "\n", which means "return to the next line"
|
||||
|
||||
# Scrape images with search()!
|
||||
# q is, indeed, the query for DuckDuckGo
|
||||
results = search(q)
|
||||
r = results["results"][0]["image"] # get the http link to the image
|
||||
|
||||
html += f""" <span> {q} </span>\n""" # Now let's fill the html with text and the pic
|
||||
html += f""" <img src='{r}'>\n"""
|
||||
|
||||
|
||||
# Close the html text
|
||||
html += '''</body>
|
||||
</html>'''
|
||||
|
||||
html = html.replace(" '", "'")
|
||||
|
||||
with open(',,/2_layout/3.html','w') as index: # Save the <html> file!
|
||||
index.write(html)
|
@ -0,0 +1,132 @@
|
||||
# Bonus!
|
||||
#
|
||||
# Scrape and download images in local from DuckDuckGo
|
||||
|
||||
# with DuckDuckGoImages!
|
||||
|
||||
|
||||
|
||||
import DuckDuckGoImages as ddg # import the library for scrape
|
||||
import os # to manipulate the file system
|
||||
import shutil # same but powerfull >:D
|
||||
import time # to create delays :: for having a few seconds to check the console
|
||||
import random # to get random numbers
|
||||
|
||||
|
||||
|
||||
# Prepare the local folder :: where the images will be saved >>
|
||||
|
||||
if os.path.isdir('./images/'): # check if the folder "images" exists
|
||||
shutil.rmtree('./images/') # if yes, delete it
|
||||
|
||||
os.mkdir('./images/') # and then create a fresh new one
|
||||
|
||||
# start the layouting :: html + css + paged.js >>
|
||||
|
||||
# declare the first part of the text of the html, we will fill it
|
||||
# in the process with loops
|
||||
html = '''
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="./pagedjs_files/interface.css">
|
||||
<script src="./pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="./styles/3.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>📡 💻📘</title>
|
||||
</head>
|
||||
<body>
|
||||
'''
|
||||
|
||||
|
||||
|
||||
# Open the speech-to-text result :: downloaded from the web interface >>
|
||||
|
||||
with open('speech.txt','r') as speech: # let's import the text
|
||||
qq = speech.readlines() # and split it in lines, it will create an array, a list
|
||||
print(qq) # print the array!
|
||||
|
||||
|
||||
time.sleep(2) # check qq in the console!
|
||||
|
||||
|
||||
# Elaborate each line :: process every element of the array qq
|
||||
|
||||
# q is for "query", qq for "queries", because we will send requests to
|
||||
# DuckDuckGo searching the text of each line of speech.txt
|
||||
|
||||
for q in qq:
|
||||
|
||||
print(q) # print the q!
|
||||
|
||||
time.sleep(2) # check current q in the console!
|
||||
|
||||
if q == '''<span class="interim"></span>''': # This nope.
|
||||
continue
|
||||
|
||||
|
||||
qBinded = q.replace(' ','') # qBinded is the current q but without spaces and "\n", which
|
||||
qBinded = qBinded.replace("\n","") # means "return to the next line";
|
||||
# because we will qBinded to name each file downloaded
|
||||
|
||||
os.mkdir(f'./images/{qBinded}') # create the folder with the name given by qBinded
|
||||
|
||||
print(qBinded) # print qBinded!
|
||||
|
||||
time.sleep(2) # check current q in the console!
|
||||
|
||||
|
||||
|
||||
# Scrape images with ddg.download()! :: we imported DuckDuckGoImages *as* ddg,
|
||||
# it's just compacted the name
|
||||
|
||||
# q is, indeed, the query for DuckDuckGo
|
||||
# folder=(../path/to/download)
|
||||
# max_urls=(how many results attempt to scrape
|
||||
# thumbnails=(True/False, to download thumbnails or bigger images)
|
||||
|
||||
ddg.download(q, folder= f"./images/{qBinded}/", max_urls=10, thumbnails=True)
|
||||
|
||||
|
||||
picsList = os.listdir(f"./images/{qBinded}/") # get the contents of the folder, it will create another array
|
||||
# each downloaded image will have a randomic UUIDv4 name so next step is
|
||||
# to change its name with the name of the current q
|
||||
|
||||
print("List of pics:", picsList) # Check how many downloaded pictures!
|
||||
|
||||
time.sleep(2) # check in the console!
|
||||
|
||||
|
||||
|
||||
|
||||
if len(picsList) == 0: # if the the list is empty..
|
||||
html += f'<span class="{qBinded}">{q}</span><br><br>' # ..add now the <html> for just the text, since there are no images downloaded..
|
||||
html += "\n"
|
||||
os.rmdir(f'./images/{qBinded}/') # ..and delete the folder created, since is useless..
|
||||
continue # ..from now on this q can't do anything more, let's go to the next iteration
|
||||
|
||||
|
||||
|
||||
# Layout q and its pic!
|
||||
|
||||
r = random.randint(0,len(picsList)) # get a random number from 0 to the lenght of the array {in compiuters 0 means the first!! :]] }
|
||||
pic = picsList[r] # let's take a random picture from the array
|
||||
|
||||
|
||||
os.rename(f'./images/{qBinded}/{pic}', f'./images/{qBinded}/{qBinded}.jpg') # This is to rename the pic with qBinded + the .jpg extension
|
||||
os.replace(f'./images/{qBinded}/{qBinded}.jpg', f'./images/{qBinded}.jpg') # This is to move the pic to the main folder
|
||||
shutil.rmtree(f'./images/{qBinded}/') # and it's time to delete the folder of this q
|
||||
|
||||
html += f""" <span class="{qBinded}">{q}</span>""" # Now let's fill the html with text and the pic
|
||||
html += "\n"
|
||||
html += f""" <span><img src="./images/{qBinded}.jpg"></span>"""
|
||||
html += "\n"
|
||||
|
||||
|
||||
|
||||
# Close the html text
|
||||
html += '''</body>
|
||||
</html>'''
|
||||
|
||||
with open('../2_layout/localpics.html','w') as index: # Save the <html> file!
|
||||
index.write(html)
|
@ -0,0 +1,25 @@
|
||||
First, you need python. You can download Python from its website:
|
||||
|
||||
https://www.python.org/
|
||||
|
||||
|
||||
|
||||
If you need to install NLTK, open the terminal and copy:
|
||||
|
||||
pip3 install nltk
|
||||
python3
|
||||
import nltk
|
||||
nltk.download('tagsets')
|
||||
nltk.download('')
|
||||
|
||||
|
||||
|
||||
If you need to install duckduckgo-images-api, open the terminal and digit:
|
||||
|
||||
pip3 install duckduckgo-images-api
|
||||
|
||||
|
||||
|
||||
If you need to install DuckDuckGoImages, open the terminal and digit:
|
||||
|
||||
pip3 install DuckDuckGoImages
|
@ -0,0 +1,149 @@
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="../pagedjs_files/interface.css">
|
||||
<script src="../pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="./styles/1.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>📡 💻📘</title>
|
||||
</head>
|
||||
<body>
|
||||
<span class='JJ'> ok</span>
|
||||
<span class='NN'> let</span>
|
||||
<span class='POS'>'s</span>
|
||||
<span class='NN'> talk</span>
|
||||
<span class='IN'> in</span>
|
||||
<span class='NNP'> English</span>
|
||||
<span class='IN'> for</span>
|
||||
<span class='NN'> awhile</span>
|
||||
<span class='WP'> what</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='PRP'> we</span>
|
||||
<span class='VBG'> eating</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='CC'> or</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='IN'> so</span>
|
||||
<span class='PRP'> it</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='VBG'> starting</span>
|
||||
<span class='RB'> again</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='JJ'> ok</span>
|
||||
<span class='NN'> navigate</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> you</span>
|
||||
<span class='VBP'> need</span>
|
||||
<span class='TO'> to</span>
|
||||
<span class='VB'> rest</span>
|
||||
<span class='RB'> sometimes</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> also</span>
|
||||
<span class='VBD'> implemented</span>
|
||||
<span class='IN'> that</span>
|
||||
<span class='VBG'> implementing</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> he</span>
|
||||
<span class='VBZ'>'s</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> sentence</span>
|
||||
<span class='CC'> and</span>
|
||||
<span class='PRP'> we</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='JJ'> interested</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='VB'> remember</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> so</span>
|
||||
<span class='RB'> basically</span>
|
||||
<span class='NN'> sentence</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='WRB'> where</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='TO'> to</span>
|
||||
<span class='VB'> go</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> couple</span>
|
||||
<span class='CC'> and</span>
|
||||
<span class='RB'> then</span>
|
||||
<span class='PRP'> you</span>
|
||||
<span class='VBP'> put</span>
|
||||
<span class='DT'> the</span>
|
||||
<span class='NN'> adult</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='DT'> a</span>
|
||||
<span class='RB'> bit</span>
|
||||
<span class='JJR'> easier</span>
|
||||
<span class='TO'> to</span>
|
||||
<span class='VB'> understand</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> actually</span>
|
||||
<span class='RB'> maybe</span>
|
||||
<span class='NNS'> people</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='VBP'> do</span>
|
||||
<span class='RB'> n't</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> I</span>
|
||||
<span class='VBP'>'m</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='VBG'> thinking</span>
|
||||
<span class='PRP'> it</span>
|
||||
<span class='VBZ'>'s</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='DT'> the</span>
|
||||
<span class='NNS'> centres</span>
|
||||
<span class='VBP'> do</span>
|
||||
<span class='RB'> n't</span>
|
||||
<span class='VB'> put</span>
|
||||
<span class='DT'> the</span>
|
||||
<span class='NN'> dot</span>
|
||||
<span class='CC'> and</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='VB'> put</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> couple</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='IN'> because</span>
|
||||
<span class='RB'> then</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> they</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='VBG'> talking</span>
|
||||
<span class='PRP'> they</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> booklet</span>
|
||||
<span class='CC'> but</span>
|
||||
<span class='PRP'> they</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='VBG'> writing</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='IN'> so</span>
|
||||
<span class='PRP'> it</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='JJ'> fine</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> so</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='PRP'> you</span>
|
||||
<span class='VBN'> done</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='CC'> and</span>
|
||||
<span class='RB'> now</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='JJ'> f</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='RB'> up</span>
|
||||
<span class='dot'>.</span><br>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,149 @@
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="..clea/pagedjs_files/interface.css">
|
||||
<script src="../pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="./styles/2.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>📡 💻📘</title>
|
||||
</head>
|
||||
<body>
|
||||
<span class='JJ'> ok</span>
|
||||
<span class='NN'> let</span>
|
||||
<span class='POS'>'s</span>
|
||||
<span class='NN'> talk</span>
|
||||
<span class='IN'> in</span>
|
||||
<span class='NNP'> English</span>
|
||||
<span class='IN'> for</span>
|
||||
<span class='NN'> awhile</span>
|
||||
<span class='WP'> what</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='PRP'> we</span>
|
||||
<span class='VBG'> eating</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='CC'> or</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='IN'> so</span>
|
||||
<span class='PRP'> it</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='VBG'> starting</span>
|
||||
<span class='RB'> again</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='JJ'> ok</span>
|
||||
<span class='NN'> navigate</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> you</span>
|
||||
<span class='VBP'> need</span>
|
||||
<span class='TO'> to</span>
|
||||
<span class='VB'> rest</span>
|
||||
<span class='RB'> sometimes</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> also</span>
|
||||
<span class='VBD'> implemented</span>
|
||||
<span class='IN'> that</span>
|
||||
<span class='VBG'> implementing</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> he</span>
|
||||
<span class='VBZ'>'s</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> sentence</span>
|
||||
<span class='CC'> and</span>
|
||||
<span class='PRP'> we</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='JJ'> interested</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='VB'> remember</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> so</span>
|
||||
<span class='RB'> basically</span>
|
||||
<span class='NN'> sentence</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='WRB'> where</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='TO'> to</span>
|
||||
<span class='VB'> go</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> couple</span>
|
||||
<span class='CC'> and</span>
|
||||
<span class='RB'> then</span>
|
||||
<span class='PRP'> you</span>
|
||||
<span class='VBP'> put</span>
|
||||
<span class='DT'> the</span>
|
||||
<span class='NN'> adult</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='DT'> a</span>
|
||||
<span class='RB'> bit</span>
|
||||
<span class='JJR'> easier</span>
|
||||
<span class='TO'> to</span>
|
||||
<span class='VB'> understand</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> actually</span>
|
||||
<span class='RB'> maybe</span>
|
||||
<span class='NNS'> people</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='VBP'> do</span>
|
||||
<span class='RB'> n't</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> I</span>
|
||||
<span class='VBP'>'m</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='VBG'> thinking</span>
|
||||
<span class='PRP'> it</span>
|
||||
<span class='VBZ'>'s</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='DT'> the</span>
|
||||
<span class='NNS'> centres</span>
|
||||
<span class='VBP'> do</span>
|
||||
<span class='RB'> n't</span>
|
||||
<span class='VB'> put</span>
|
||||
<span class='DT'> the</span>
|
||||
<span class='NN'> dot</span>
|
||||
<span class='CC'> and</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='VB'> put</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> couple</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='IN'> because</span>
|
||||
<span class='RB'> then</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='PRP'> they</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='VBG'> talking</span>
|
||||
<span class='PRP'> they</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='DT'> a</span>
|
||||
<span class='NN'> booklet</span>
|
||||
<span class='CC'> but</span>
|
||||
<span class='PRP'> they</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='RB'> not</span>
|
||||
<span class='VBG'> writing</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='IN'> so</span>
|
||||
<span class='PRP'> it</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='RB'> just</span>
|
||||
<span class='JJ'> fine</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='RB'> so</span>
|
||||
<span class='VBP'> are</span>
|
||||
<span class='PRP'> you</span>
|
||||
<span class='VBN'> done</span>
|
||||
<span class='dot'>.</span><br>
|
||||
<span class='CC'> and</span>
|
||||
<span class='RB'> now</span>
|
||||
<span class='VBZ'> is</span>
|
||||
<span class='JJ'> f</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='NNP'> *</span>
|
||||
<span class='RB'> up</span>
|
||||
<span class='dot'>.</span><br>
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,47 @@
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="pagedjs_files/interface.css">
|
||||
<script src="pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="styles/3.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>Booklet</title>
|
||||
</head>
|
||||
<body>
|
||||
<span>ok let's talk in English for awhile what are we eating</span>
|
||||
<img src='https://tse3.mm.bing.net/th?id=OIP.URkqiGHddaNVnvoUtMflQQHaF0&pid=Api'>
|
||||
<span>or not so it is starting again</span>
|
||||
<img src='https://tse3.mm.bing.net/th?id=OIP.w2KDA__F4lQ1NyTt5FOjIgAAAA&pid=Api'>
|
||||
<span>ok navigate</span>
|
||||
<img src='https://tse3.mm.bing.net/th?id=OIF.y9XMU%2b2gdAWMj7VXDE%2ft%2fQ&pid=Api'>
|
||||
<span>you need to rest sometimes</span>
|
||||
<img src='https://tse4.mm.bing.net/th?id=OIP.GbFGas-ayDWMUd_9vgedSwHaGO&pid=Api'>
|
||||
<span>also implemented that implementing</span>
|
||||
<img src='https://tse3.mm.bing.net/th?id=OIF.GJKQxrJERpdrEixc86Aylg&pid=Api'>
|
||||
<span>he's a sentence and we are not interested</span>
|
||||
<img src='https://tse2.mm.bing.net/th?id=OIP.B8V15Vgwb4WSIBCh_DBKAAHaEK&pid=Api'>
|
||||
<span>remember</span>
|
||||
<img src='https://tse4.mm.bing.net/th?id=OIP.FIpqGxb0o5Qhs7F6FsmLUwHaJ4&pid=Api'>
|
||||
<span>so basically sentence</span>
|
||||
<img src='https://tse4.mm.bing.net/th?id=OIP.R-Aq7zQgyXkEkqHsRpd_fQHaK5&pid=Api'>
|
||||
<span>where is to go a couple and then you put the adult</span>
|
||||
<img src='https://tse3.mm.bing.net/th?id=OIP.-zezO3MYsykMDFXKr5BN4gHaFT&pid=Api'>
|
||||
<span>a bit easier to understand</span>
|
||||
<img src='https://tse2.mm.bing.net/th?id=OIP.dAOU1dmcC7DcOHjdZWnsOAHaFj&pid=Api'>
|
||||
<span>actually maybe people just don't just</span>
|
||||
<img src='https://tse3.mm.bing.net/th?id=OIP.lN80mz6v0-Og5Hy4rBFDBgHaFj&pid=Api'>
|
||||
<span>I'm just thinking it's not the centres don't put the dot and just put a couple</span>
|
||||
<img src='https://tse2.mm.bing.net/th?id=OIP.soE-jIhLUlXXcsImI9dSAgHaDE&pid=Api'>
|
||||
<span>because then</span>
|
||||
<img src='https://tse4.mm.bing.net/th?id=OIP._isREjX53ak2ahph2VOozgHaIM&pid=Api'>
|
||||
<span>they are talking they are a booklet but they are not writing</span>
|
||||
<img src='https://tse2.mm.bing.net/th?id=OIP.-utvaIFVP4SIM9E9Am3efAHaEZ&pid=Api'>
|
||||
<span>so it is just fine</span>
|
||||
<img src='https://tse3.mm.bing.net/th?id=OIP.6d-LRDAm986tLKsRdmxAFwHaC_&pid=Api'>
|
||||
<span>so are you done</span>
|
||||
<img src='https://tse1.mm.bing.net/th?id=OIF.GUCE7OkP722hk6eZcjJckQ&pid=Api'>
|
||||
<span>and now is f***** up</span>
|
||||
<img src='https://tse4.mm.bing.net/th?id=OIP.Z_JLWGz4XfBuc0lzMmTUggHaEQ&pid=Api'>
|
||||
</body>
|
||||
</html>
|
Binary file not shown.
@ -1,17 +0,0 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="pagedjs_files/interface.css">
|
||||
<script src="pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="styles/1.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>Booklet</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div class="newchapter">
|
||||
<h1>I WISH I COULD SHOW YOU A PICTURE OF WHAT I GOT🤮</h1>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -1,35 +0,0 @@
|
||||
import nltk
|
||||
nltk.download('punkt')
|
||||
|
||||
with open('speech.txt','r') as result:
|
||||
r = result.read()
|
||||
|
||||
r = r.replace('<span class="interim"></span>','').replace('\n','. ')
|
||||
|
||||
l=nltk.word_tokenize(r)
|
||||
pos = nltk.pos_tag(l)
|
||||
|
||||
html = '''
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="pagedjs_files/interface.css">
|
||||
<script src="pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="styles/1.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>Booklet</title>
|
||||
</head>
|
||||
<body>
|
||||
'''
|
||||
|
||||
for x in pos:
|
||||
if x[0] == '.':
|
||||
html += "<span class='dot'>.<span><br> "
|
||||
else:
|
||||
html += "<span class='"+x[1]+"'>"+x[0]+"<span> "
|
||||
|
||||
html = '''</body>
|
||||
</html>'''
|
||||
|
||||
with open('index.html','w') as index:
|
||||
index.write(html)
|
@ -1,3 +0,0 @@
|
||||
and again are we are
|
||||
let's see if it works again with these fantastic xt500
|
||||
<span class="interim"></span>
|
@ -1,87 +0,0 @@
|
||||
# Bonus!
|
||||
|
||||
# Scrape and download images in local from DuckDuckGo
|
||||
|
||||
|
||||
# First, you need python. You can download Python from its website:
|
||||
|
||||
# https://www.python.org/
|
||||
|
||||
|
||||
# Then, you need to install DuckDuckGoImages,open the terminal and digit:
|
||||
|
||||
# pip3 install DuckDuckGoImages
|
||||
|
||||
|
||||
|
||||
import DuckDuckGoImages as ddg
|
||||
import os
|
||||
import shutil
|
||||
|
||||
with open('speech.txt','r') as speech:
|
||||
qq = speech.readlines()
|
||||
|
||||
html = ''
|
||||
|
||||
html = '''
|
||||
<html>
|
||||
<head>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="pagedjs_files/interface.css">
|
||||
<script src="pagedjs_files/paged.polyfill.js"></script>
|
||||
<link rel="stylesheet" href="styles/3.css">
|
||||
<meta charset="utf-8"/>
|
||||
<title>Booklet</title>
|
||||
</head>
|
||||
<body>
|
||||
'''
|
||||
|
||||
if os.path.isdir('./images/') is True:
|
||||
shutil.rmtree('./images/')
|
||||
|
||||
os.mkdir('./images/')
|
||||
|
||||
|
||||
for q in qq:
|
||||
if q == '''<span class="interim"></span>''':
|
||||
continue
|
||||
|
||||
qDDG = q.replace(' ','+')
|
||||
qBinded = q.replace(' ','')
|
||||
print(qDDG)
|
||||
qBinded = qBinded.replace("\n","")
|
||||
os.mkdir(f'./images/{qBinded}')
|
||||
ddg.download(qDDG, folder= f"./images/{qBinded}/", max_urls=10, thumbnails=True)
|
||||
normalize = os.listdir(f"./images/{qBinded}/")
|
||||
|
||||
print(len(os.listdir(f"./images/{qBinded}/")))
|
||||
|
||||
if len(os.listdir(f"./images/{qBinded}/")) == 0:
|
||||
html += f'<span class="{qBinded}">{q}</span><br><br>'
|
||||
os.rmdir(f'./images/{qBinded}/')
|
||||
continue
|
||||
|
||||
|
||||
print('NORMALIZE', normalize)
|
||||
normalize = normalize[0]
|
||||
|
||||
splitExtension = os.path.splitext(normalize)
|
||||
print('ESTENSIONE', splitExtension)
|
||||
|
||||
|
||||
os.rename(f'./images/{qBinded}/{normalize}', f'./images/{qBinded}/{qBinded}')
|
||||
os.replace(f'./images/{qBinded}/{qBinded}', f'./images/{qBinded}.jpg')
|
||||
shutil.rmtree(f'./images/{qBinded}/')
|
||||
|
||||
html += f"""<span class="{qBinded}">{q}</span>"""
|
||||
html += f"""<span><img src="./images/{qBinded}.jpg"></span>"""
|
||||
|
||||
|
||||
|
||||
html += '''</body>
|
||||
</html>'''
|
||||
|
||||
html = html.replace(' .','.').replace(" '", "'")
|
||||
|
||||
with open('picindex.html','w') as index:
|
||||
index.write(html)
|
@ -1,131 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "35367849-a425-404c-bbbd-d8d7138f1838",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"\n",
|
||||
"with open('../speech.txt','r') as result:\n",
|
||||
" r = result.read()\n",
|
||||
" \n",
|
||||
"r = r.replace('<span class=\"interim\"></span>','').replace('\\n','. ')\n",
|
||||
"\n",
|
||||
"l=nltk.word_tokenize(r)\n",
|
||||
"pos = nltk.pos_tag(l)\n",
|
||||
"\n",
|
||||
"html = ''\n",
|
||||
"for x in pos:\n",
|
||||
" if x[0] == '.':\n",
|
||||
" html += \"<span class='dot'>.<span><br> \"\n",
|
||||
" else:\n",
|
||||
" html += \"<span class='\"+x[1]+\"'> \"+x[0]+\"<span>\"\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0d1737d0-cda2-4208-9585-487ef809bdff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"<span class='CC'>and<span> <span class='RB'>again<span> <span class='VBP'>are<span> <span class='PRP'>we<span> <span class='VBP'>are<span> <span class='dot'>.<span><br> <span class='NN'>let<span> <span class='POS'>'s<span> <span class='VB'>see<span> <span class='IN'>if<span> <span class='PRP'>it<span> <span class='VBZ'>works<span> <span class='RB'>again<span> <span class='IN'>with<span> <span class='DT'>these<span> <span class='JJ'>fantastic<span> <span class='NN'>xt500<span> <span class='dot'>.<span><br> \""
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 127,
|
||||
"id": "cfe7aecd-bcff-4e3b-81c2-0622434cf62b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('index.html','w') as index:\n",
|
||||
" index.write(html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 137,
|
||||
"id": "8d2195b3-dc7a-4cdc-b599-5570766df12d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 138,
|
||||
"id": "02f79b41-8238-4fa1-9530-a84b5069bce0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c6e3cf8e-3493-463b-807a-89342e7c8731",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ac322fc4-db27-42a3-aef0-ac0fbe3e26fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1e02a152-1c3c-4e2c-b1aa-1a1f3172810c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "264c688c-aba4-4cf5-8f6d-b9b35c3a0969",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "35367849-a425-404c-bbbd-d8d7138f1838",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import nltk\n",
|
||||
"\n",
|
||||
"with open('../speech.txt','r') as result:\n",
|
||||
" r = result.read()\n",
|
||||
" \n",
|
||||
"r = r.replace('<span class=\"interim\"></span>','').replace('\\n','. ')\n",
|
||||
"\n",
|
||||
"l=nltk.word_tokenize(r)\n",
|
||||
"pos = nltk.pos_tag(l)\n",
|
||||
"\n",
|
||||
"html = ''\n",
|
||||
"for x in pos:\n",
|
||||
" if x[0] == '.':\n",
|
||||
" html += \"<span class='dot'>.<span><br> \"\n",
|
||||
" else:\n",
|
||||
" html += \"<span class='\"+x[1]+\"'> \"+x[0]+\"<span>\"\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "0d1737d0-cda2-4208-9585-487ef809bdff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"<span class='CC'>and<span> <span class='RB'>again<span> <span class='VBP'>are<span> <span class='PRP'>we<span> <span class='VBP'>are<span> <span class='dot'>.<span><br> <span class='NN'>let<span> <span class='POS'>'s<span> <span class='VB'>see<span> <span class='IN'>if<span> <span class='PRP'>it<span> <span class='VBZ'>works<span> <span class='RB'>again<span> <span class='IN'>with<span> <span class='DT'>these<span> <span class='JJ'>fantastic<span> <span class='NN'>xt500<span> <span class='dot'>.<span><br> \""
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"html"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 127,
|
||||
"id": "cfe7aecd-bcff-4e3b-81c2-0622434cf62b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open('index.html','w') as index:\n",
|
||||
" index.write(html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 137,
|
||||
"id": "8d2195b3-dc7a-4cdc-b599-5570766df12d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 138,
|
||||
"id": "02f79b41-8238-4fa1-9530-a84b5069bce0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c6e3cf8e-3493-463b-807a-89342e7c8731",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ac322fc4-db27-42a3-aef0-ac0fbe3e26fb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1e02a152-1c3c-4e2c-b1aa-1a1f3172810c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "264c688c-aba4-4cf5-8f6d-b9b35c3a0969",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1 +0,0 @@
|
||||
<span class='CC'>and<span> <span class='RB'>again<span> <span class='VBP'>are<span> <span class='PRP'>we<span> <span class='VBP'>are<span> <span class='dot'>.<span><br> <span class='NN'>let<span> <span class='POS'>'s<span> <span class='VB'>see<span> <span class='IN'>if<span> <span class='PRP'>it<span> <span class='VBZ'>works<span> <span class='RB'>again<span> <span class='IN'>with<span> <span class='DT'>these<span> <span class='JJ'>fantastic<span> <span class='NN'>xt500<span> <span class='dot'>.<span><br>
|
Loading…
Reference in New Issue