# October 2021, copyleft || Kamome and Funix || Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam # Bonus! # # Scrape and download images in local from DuckDuckGo # with DuckDuckGoImages! import DuckDuckGoImages as ddg # import the library for scrape import os # to manipulate the file system import shutil # same but powerfull >:D import time # to create delays :: for having a few seconds to check the console import random # to get random numbers # Prepare the local folder :: where the images will be saved >> if os.path.isdir('./images/'): # check if the folder "images" exists shutil.rmtree('./images/') # if yes, delete it os.mkdir('./images/') # and then create a fresh new one # start the layouting :: html + css + paged.js >> # declare the first part of the text of the html, we will fill it # in the process with loops html = ''' 📡 💻📘

Title!

Authors!

''' # Open the speech-to-text result :: downloaded from the web interface >> with open('../speech.txt','r') as speech: # let's import the text qq = speech.readlines() # and split it in lines, it will create an array, a list print(qq) # print the array! time.sleep(2) # check qq in the console! # Elaborate each line :: process every element of the array qq # q is for "query", qq for "queries", because we will send requests to # DuckDuckGo searching the text of each line of speech.txt for q in qq: print(q) # print the q! time.sleep(2) # check current q in the console! if q == '''''': # This nope. continue qBinded = q.replace(' ','') # qBinded is the current q but without spaces and "\n", which qBinded = qBinded.replace("\n","") # means "return to the next line"; # because we will qBinded to name each file downloaded os.mkdir(f'./images/{qBinded}') # create the folder with the name given by qBinded print(qBinded) # print qBinded! time.sleep(2) # check current q in the console! # Scrape images with ddg.download()! :: we imported DuckDuckGoImages *as* ddg, # it's just compacted the name # q is, indeed, the query for DuckDuckGo # folder=(../path/to/download) # max_urls=(how many results attempt to scrape # thumbnails=(True/False, to download thumbnails or bigger images) ddg.download(q, folder= f"./images/{qBinded}/", max_urls=10, thumbnails=True) picsList = os.listdir(f"./images/{qBinded}/") # get the contents of the folder, it will create another array # each downloaded image will have a randomic UUIDv4 name so next step is # to change its name with the name of the current q print("List of pics:", picsList) # Check how many downloaded pictures! time.sleep(2) # check in the console! if len(picsList) == 0: # if the the list is empty.. html += f'{q}

' # ..add now the for just the text, since there are no images downloaded.. html += "\n" os.rmdir(f'./images/{qBinded}/') # ..and delete the folder created, since is useless.. continue # ..from now on this q can't do anything more, let's go to the next iteration # Layout q and its pic! r = random.randint(0,len(picsList)) # get a random number from 0 to the lenght of the array {in compiuters 0 means the first!! :]] } pic = picsList[r] # let's take a random picture from the array os.rename(f'./images/{qBinded}/{pic}', f'./images/{qBinded}/{qBinded}.jpg') # This is to rename the pic with qBinded + the .jpg extension os.replace(f'./images/{qBinded}/{qBinded}.jpg', f'./images/{qBinded}.jpg') # This is to move the pic to the main folder shutil.rmtree(f'./images/{qBinded}/') # and it's time to delete the folder of this q html += f""" {q}""" # Now let's fill the html with text and the pic html += "\n" html += f""" """ html += "\n" # Close the html text html += '''
''' with open('../2_layout/localpics.html','w') as index: # Save the file! index.write(html)