speech2design/1_pythoning/bonus_imageDownloader.py

# October 2021, copyleft || Kamome and Funix ||  Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam


# Bonus!
#
# Scrape and download images in local from DuckDuckGo

# with DuckDuckGoImages!


import DuckDuckGoImages as ddg          # import the library for scrape
import os                               # to manipulate the file system
import shutil                           # same but powerfull >:D
import time                             # to create delays :: for having a few seconds to check the console
import random                           # to get random numbers


                                        # Prepare the local folder :: where the images will be saved >>

if os.path.isdir('./images/'):          # check if the folder "images" exists
     shutil.rmtree('./images/')         # if yes, delete it

os.mkdir('./images/')                   # and then create a fresh new one

                                        # start the layouting :: html + css + paged.js >>

                                        # declare the first part of the text of the html, we will fill it
                                        # in the process with loops
html = '''
<html>
<head>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="./pagedjs_files/interface.css">
    <script src="./pagedjs_files/paged.polyfill.js"></script>
    <link rel="stylesheet" href="./styles/3.css">
    <meta charset="utf-8"/>
    <title>📡 💻📘</title>
</head>
<body>

    <div class="firstP">
        <h1 style="position: absolute; top: 0; left:0; color: black;">Title!</h1>

        <p style="position: absolute; bottom: 0; right:0;">Authors!</p>
    </div>

    <div class="contents">
'''


                                        # Open the speech-to-text result :: downloaded from the web interface >>

with open('../speech.txt','r') as speech:  # let's import the text
    qq = speech.readlines()             # and split it in lines, it will create an array, a list
    print(qq)                           # print the array!


time.sleep(2)                            # check qq in the console!


                                        # Elaborate each line :: process every element of the array qq

                                        # q is for "query", qq for "queries", because we will send requests to
                                        # DuckDuckGo searching the text of each line of speech.txt

for q in qq:

    print(q)                            # print the q!

    time.sleep(2)                       # check current q in the console!

    if q == '''<span class="interim"></span>''': # This nope.
        continue


    qBinded = q.replace(' ','')         # qBinded is the current q but without spaces and "\n", which
    qBinded = qBinded.replace("\n","")  # means "return to the next line";
                                        # because we will qBinded to name each file downloaded

    os.mkdir(f'./images/{qBinded}')     # create the folder with the name given by qBinded

    print(qBinded)                      # print qBinded!

    time.sleep(2)                       # check current q in the console!


                                        # Scrape images  with ddg.download()! :: we imported DuckDuckGoImages *as* ddg,
                                        # it's just compacted the name

                                        # q is, indeed, the query for DuckDuckGo
                                        # folder=(../path/to/download)
                                        # max_urls=(how many results attempt to scrape
                                        # thumbnails=(True/False, to download thumbnails or bigger images)

    ddg.download(q, folder= f"./images/{qBinded}/", max_urls=10, thumbnails=True)


    picsList = os.listdir(f"./images/{qBinded}/")   # get the contents of the folder, it will create another array
                                                    # each downloaded image will have a randomic UUIDv4 name so next step is
                                                    # to change its name with the name of the current q

    print("List of pics:", picsList)                # Check how many downloaded pictures!

    time.sleep(2)                                   # check in the console!


    if len(picsList) == 0:                          # if the the list is empty..
        html += f'<span class="{qBinded}">{q}</span><br><br>' # ..add now the <html> for just the text, since there are no images downloaded..
        html += "\n"
        os.rmdir(f'./images/{qBinded}/')            # ..and delete the folder created, since is useless..
        continue                                    # ..from now on this q can't do anything more, let's go to the next iteration


                                                    # Layout q and its pic!

    r = random.randint(0,len(picsList))             # get a random number from 0 to the lenght of the array {in compiuters 0 means the first!! :]] }
    pic = picsList[r]                               # let's take a random picture from the array


    os.rename(f'./images/{qBinded}/{pic}', f'./images/{qBinded}/{qBinded}.jpg')  # This is to rename the pic with qBinded + the .jpg extension
    os.replace(f'./images/{qBinded}/{qBinded}.jpg', f'./images/{qBinded}.jpg')   # This is to move the pic to the main folder
    shutil.rmtree(f'./images/{qBinded}/')                                        # and it's time to delete the folder of this q

    html += f"""    <span class="{qBinded}">{q}</span>"""                        # Now let's fill the html with text and the pic
    html += "\n"
    html += f"""    <span><img src="./images/{qBinded}.jpg"></span>"""
    html += "\n"


                                                      # Close the html text
html += ''' <div>
    </body>
</html>'''

with open('../2_layout/localpics.html','w') as index: # Save the <html> file!
    index.write(html)