You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
144 lines
6.2 KiB
Python
144 lines
6.2 KiB
Python
# October 2021, copyleft || Kamome and Funix || Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam
|
|
|
|
|
|
# Bonus!
|
|
#
|
|
# Scrape and download images in local from DuckDuckGo
|
|
|
|
# with DuckDuckGoImages!
|
|
|
|
|
|
|
|
import DuckDuckGoImages as ddg # import the library for scrape
|
|
import os # to manipulate the file system
|
|
import shutil # same but powerfull >:D
|
|
import time # to create delays :: for having a few seconds to check the console
|
|
import random # to get random numbers
|
|
|
|
|
|
|
|
# Prepare the local folder :: where the images will be saved >>
|
|
|
|
if os.path.isdir('./images/'): # check if the folder "images" exists
|
|
shutil.rmtree('./images/') # if yes, delete it
|
|
|
|
os.mkdir('./images/') # and then create a fresh new one
|
|
|
|
# start the layouting :: html + css + paged.js >>
|
|
|
|
# declare the first part of the text of the html, we will fill it
|
|
# in the process with loops
|
|
html = '''
|
|
<html>
|
|
<head>
|
|
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
<link rel="stylesheet" href="./pagedjs_files/interface.css">
|
|
<script src="./pagedjs_files/paged.polyfill.js"></script>
|
|
<link rel="stylesheet" href="./styles/3.css">
|
|
<meta charset="utf-8"/>
|
|
<title>📡 💻📘</title>
|
|
</head>
|
|
<body>
|
|
|
|
<div class="firstP">
|
|
<h1 style="position: absolute; top: 0; left:0; color: black;">Title!</h1>
|
|
|
|
<p style="position: absolute; bottom: 0; right:0;">Authors!</p>
|
|
</div>
|
|
|
|
<div class="contents">
|
|
'''
|
|
|
|
|
|
|
|
# Open the speech-to-text result :: downloaded from the web interface >>
|
|
|
|
with open('../speech.txt','r') as speech: # let's import the text
|
|
qq = speech.readlines() # and split it in lines, it will create an array, a list
|
|
print(qq) # print the array!
|
|
|
|
|
|
time.sleep(2) # check qq in the console!
|
|
|
|
|
|
# Elaborate each line :: process every element of the array qq
|
|
|
|
# q is for "query", qq for "queries", because we will send requests to
|
|
# DuckDuckGo searching the text of each line of speech.txt
|
|
|
|
for q in qq:
|
|
|
|
print(q) # print the q!
|
|
|
|
time.sleep(2) # check current q in the console!
|
|
|
|
if q == '''<span class="interim"></span>''': # This nope.
|
|
continue
|
|
|
|
|
|
qBinded = q.replace(' ','') # qBinded is the current q but without spaces and "\n", which
|
|
qBinded = qBinded.replace("\n","") # means "return to the next line";
|
|
# because we will qBinded to name each file downloaded
|
|
|
|
os.mkdir(f'./images/{qBinded}') # create the folder with the name given by qBinded
|
|
|
|
print(qBinded) # print qBinded!
|
|
|
|
time.sleep(2) # check current q in the console!
|
|
|
|
|
|
|
|
# Scrape images with ddg.download()! :: we imported DuckDuckGoImages *as* ddg,
|
|
# it's just compacted the name
|
|
|
|
# q is, indeed, the query for DuckDuckGo
|
|
# folder=(../path/to/download)
|
|
# max_urls=(how many results attempt to scrape
|
|
# thumbnails=(True/False, to download thumbnails or bigger images)
|
|
|
|
ddg.download(q, folder= f"./images/{qBinded}/", max_urls=10, thumbnails=True)
|
|
|
|
|
|
picsList = os.listdir(f"./images/{qBinded}/") # get the contents of the folder, it will create another array
|
|
# each downloaded image will have a randomic UUIDv4 name so next step is
|
|
# to change its name with the name of the current q
|
|
|
|
print("List of pics:", picsList) # Check how many downloaded pictures!
|
|
|
|
time.sleep(2) # check in the console!
|
|
|
|
|
|
|
|
|
|
if len(picsList) == 0: # if the the list is empty..
|
|
html += f'<span class="{qBinded}">{q}</span><br><br>' # ..add now the <html> for just the text, since there are no images downloaded..
|
|
html += "\n"
|
|
os.rmdir(f'./images/{qBinded}/') # ..and delete the folder created, since is useless..
|
|
continue # ..from now on this q can't do anything more, let's go to the next iteration
|
|
|
|
|
|
|
|
# Layout q and its pic!
|
|
|
|
r = random.randint(0,len(picsList)) # get a random number from 0 to the lenght of the array {in compiuters 0 means the first!! :]] }
|
|
pic = picsList[r] # let's take a random picture from the array
|
|
|
|
|
|
os.rename(f'./images/{qBinded}/{pic}', f'./images/{qBinded}/{qBinded}.jpg') # This is to rename the pic with qBinded + the .jpg extension
|
|
os.replace(f'./images/{qBinded}/{qBinded}.jpg', f'./images/{qBinded}.jpg') # This is to move the pic to the main folder
|
|
shutil.rmtree(f'./images/{qBinded}/') # and it's time to delete the folder of this q
|
|
|
|
html += f""" <span class="{qBinded}">{q}</span>""" # Now let's fill the html with text and the pic
|
|
html += "\n"
|
|
html += f""" <span><img src="./images/{qBinded}.jpg"></span>"""
|
|
html += "\n"
|
|
|
|
|
|
|
|
# Close the html text
|
|
html += ''' <div>
|
|
</body>
|
|
</html>'''
|
|
|
|
with open('../2_layout/localpics.html','w') as index: # Save the <html> file!
|
|
index.write(html) |