You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

135 lines
6.0 KiB
Python

# October 2021, copyleft || Kamome and Funix || Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam
# Bonus!
#
# Scrape and download images in local from DuckDuckGo
# with DuckDuckGoImages!
import DuckDuckGoImages as ddg # import the library for scrape
import os # to manipulate the file system
import shutil # same but powerfull >:D
import time # to create delays :: for having a few seconds to check the console
import random # to get random numbers
# Prepare the local folder :: where the images will be saved >>
if os.path.isdir('./images/'): # check if the folder "images" exists
shutil.rmtree('./images/') # if yes, delete it
os.mkdir('./images/') # and then create a fresh new one
# start the layouting :: html + css + paged.js >>
# declare the first part of the text of the html, we will fill it
# in the process with loops
html = '''
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="./pagedjs_files/interface.css">
<script src="./pagedjs_files/paged.polyfill.js"></script>
<link rel="stylesheet" href="./styles/3.css">
<meta charset="utf-8"/>
<title>📡 💻📘</title>
</head>
<body>
'''
# Open the speech-to-text result :: downloaded from the web interface >>
with open('../speech.txt','r') as speech: # let's import the text
qq = speech.readlines() # and split it in lines, it will create an array, a list
print(qq) # print the array!
time.sleep(2) # check qq in the console!
# Elaborate each line :: process every element of the array qq
# q is for "query", qq for "queries", because we will send requests to
# DuckDuckGo searching the text of each line of speech.txt
for q in qq:
print(q) # print the q!
time.sleep(2) # check current q in the console!
if q == '''<span class="interim"></span>''': # This nope.
continue
qBinded = q.replace(' ','') # qBinded is the current q but without spaces and "\n", which
qBinded = qBinded.replace("\n","") # means "return to the next line";
# because we will qBinded to name each file downloaded
os.mkdir(f'./images/{qBinded}') # create the folder with the name given by qBinded
print(qBinded) # print qBinded!
time.sleep(2) # check current q in the console!
# Scrape images with ddg.download()! :: we imported DuckDuckGoImages *as* ddg,
# it's just compacted the name
# q is, indeed, the query for DuckDuckGo
# folder=(../path/to/download)
# max_urls=(how many results attempt to scrape
# thumbnails=(True/False, to download thumbnails or bigger images)
ddg.download(q, folder= f"./images/{qBinded}/", max_urls=10, thumbnails=True)
picsList = os.listdir(f"./images/{qBinded}/") # get the contents of the folder, it will create another array
# each downloaded image will have a randomic UUIDv4 name so next step is
# to change its name with the name of the current q
print("List of pics:", picsList) # Check how many downloaded pictures!
time.sleep(2) # check in the console!
if len(picsList) == 0: # if the the list is empty..
html += f'<span class="{qBinded}">{q}</span><br><br>' # ..add now the <html> for just the text, since there are no images downloaded..
html += "\n"
os.rmdir(f'./images/{qBinded}/') # ..and delete the folder created, since is useless..
continue # ..from now on this q can't do anything more, let's go to the next iteration
# Layout q and its pic!
r = random.randint(0,len(picsList)) # get a random number from 0 to the lenght of the array {in compiuters 0 means the first!! :]] }
pic = picsList[r] # let's take a random picture from the array
os.rename(f'./images/{qBinded}/{pic}', f'./images/{qBinded}/{qBinded}.jpg') # This is to rename the pic with qBinded + the .jpg extension
os.replace(f'./images/{qBinded}/{qBinded}.jpg', f'./images/{qBinded}.jpg') # This is to move the pic to the main folder
shutil.rmtree(f'./images/{qBinded}/') # and it's time to delete the folder of this q
html += f""" <span class="{qBinded}">{q}</span>""" # Now let's fill the html with text and the pic
html += "\n"
html += f""" <span><img src="./images/{qBinded}.jpg"></span>"""
html += "\n"
# Close the html text
html += '''</body>
</html>'''
with open('../2_layout/localpics.html','w') as index: # Save the <html> file!
index.write(html)