speech2design/1_pythoning/3_imageScraping.py

# October 2021, copyleft || Kamome and Funix ||  Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam

# Scrape images from DuckDuckGo
#
# With duckduckgo_images_api!


from duckduckgo_images_api import search # import the library for scrape
import time                             # to create delays :: for having a few seconds to check the console


with open('speech.txt','r') as speech:  # let's import the text
    qq = speech.readlines()             # and split it in lines, it will create an array, a list
    print(qq)                           # print the array!

time.sleep(2)                           # check qq in the console!


                                        # declare the first part of the text of the html, we will fill it
                                        # in the process with loops

html = '''
<html>
<head>
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="./pagedjs_files/interface.css">
    <script src="./pagedjs_files/paged.polyfill.js"></script>
    <link rel="stylesheet" href="./styles/3.css">
    <meta charset="utf-8"/>
    <title>📡 💻📘</title>
</head>
<body>
'''


                                        # Elaborate each line :: process every element of the array qq

                                        # q is for "query", qq for "queries", because we will send requests to
                                        # DuckDuckGo searching the text of each line of speech.txt

for q in qq:

    print(q)                            # print the q!

    time.sleep(2)                       # check current q in the console!

    q = q.strip()

    if q == '''<span class="interim"></span>''': # This nope.
        continue

    q = q.replace("\n","")              # remove "\n", which means "return to the next line"

                                        # Scrape images with search()!
                                        # q is, indeed, the query for DuckDuckGo
    results = search(q)
    r = results["results"][0]["image"]  # get the http link to the image

    html += f"""    <span> {q} </span>\n"""  # Now let's fill the html with text and the pic
    html += f"""    <img src='{r}'>\n"""


                                        # Close the html text
html += '''</body>
</html>'''

html = html.replace(" '", "'")

with open(',,/2_layout/3.html','w') as index: # Save the <html> file!
    index.write(html)