speech2design/1_pythoning/3_imageScraping.py

# October 2021, copyleft || Kamome and Funix ||  Speech-to-Derive * The Myth of Natural Language || Roodkapje, Rotterdam

# Scrape images from DuckDuckGo
#
# With duckduckgo_images_api!


from duckduckgo_images_api import search  # import the library for scrape
# to create delays :: for having a few seconds to check the console
import time


with open('../speech.txt', 'r') as speech:  # let's import the text
    # and split it in lines, it will create an array, a list
    qq = speech.readlines()
    print(qq)                           # print the array!

time.sleep(2)                           # check qq in the console!

# declare the first part of the text of the html, we will fill it
# in the process with loops

html = '''
<!DOCTYPE html>
<head>
    <meta charset="UTF-8"> 
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="stylesheet" href="./pagedjs_files/interface.css">
    <script src="./pagedjs_files/paged.polyfill.js"></script>
    <link rel="stylesheet" href="./styles/2.css">
    <title>📡 💻📘</title>
</head>
<body>

    <div class="firstP">
        <h1 style="position: absolute; top: 0; left:0; color: black;">Title!</h1>
        
        <p style="position: absolute; bottom: 0; right:0;">Authors!</p>
    </div>

    <div class="contents">
'''

# Elaborate each line :: process every element of the array qq

# q is for "query", qq for "queries", because we will send requests to
# DuckDuckGo searching the text of each line of speech.txt

for q in qq:

    print(q)                            # print the q!

    time.sleep(2)                       # check current q in the console!

    q = q.strip()

    # remove "\n", which means "return to the next line"
    q = q.replace("\n", "")

    # Scrape images with search()!
    # q is, indeed, the query for DuckDuckGo
    results = search(q)
    r = results["results"][0]["image"]  # get the http link to the image

    html += f"""    <span> {q} </span>\n"""  # Now let's fill the html with text and the pic
    html += f"""    <img src='{r}'>\n"""

    # Close the html text
html += ''' <div>
    </body>                                                              
</html>'''


html = html.replace(" '", "'")

with open('../2_layout/3.html', 'w') as index:  # Save the <html> file!
    index.write(html)
readme ++ 3 years ago			`# October 2021, copyleft \|\| Kamome and Funix \|\| Speech-to-Derive * The Myth of Natural Language \|\| Roodkapje, Rotterdam`

comments and tidy 3 years ago			`# Scrape images from DuckDuckGo`
			`#`
			`# With duckduckgo_images_api!`


fixxees 3 years ago			`from duckduckgo_images_api import search # import the library for scrape`
			`# to create delays :: for having a few seconds to check the console`
			`import time`
comments and tidy 3 years ago

fixxees 3 years ago			`with open('../speech.txt', 'r') as speech: # let's import the text`
			`# and split it in lines, it will create an array, a list`
			`qq = speech.readlines()`
comments and tidy 3 years ago			`print(qq) # print the array!`

			`time.sleep(2) # check qq in the console!`

fixxees 3 years ago			`# declare the first part of the text of the html, we will fill it`
			`# in the process with loops`
comments and tidy 3 years ago
			`html = '''`
first page 3 years ago			`<!DOCTYPE html>`
comments and tidy 3 years ago			`<head>`
first page 3 years ago			`<meta charset="UTF-8">`
comments and tidy 3 years ago			`<meta name="viewport" content="width=device-width, initial-scale=1">`
			`<link rel="stylesheet" href="./pagedjs_files/interface.css">`
			`<script src="./pagedjs_files/paged.polyfill.js"></script>`
first page 3 years ago			`<link rel="stylesheet" href="./styles/2.css">`
comments and tidy 3 years ago			`<title>📡 💻📘</title>`
			`</head>`
			`<body>`
first page 3 years ago
			`<div class="firstP">`
			`<h1 style="position: absolute; top: 0; left:0; color: black;">Title!</h1>`

first page 3 years ago			`<p style="position: absolute; bottom: 0; right:0;">Authors!</p>`
first page 3 years ago			`</div>`

			`<div class="contents">`
comments and tidy 3 years ago			`'''`

fixxees 3 years ago			`# Elaborate each line :: process every element of the array qq`
comments and tidy 3 years ago
fixxees 3 years ago			`# q is for "query", qq for "queries", because we will send requests to`
			`# DuckDuckGo searching the text of each line of speech.txt`
comments and tidy 3 years ago
			`for q in qq:`

			`print(q) # print the q!`

			`time.sleep(2) # check current q in the console!`

fixxees 3 years ago			`q = q.strip()`
comments and tidy 3 years ago
fixxees 3 years ago			`# remove "\n", which means "return to the next line"`
			`q = q.replace("\n", "")`
comments and tidy 3 years ago
fixxees 3 years ago			`# Scrape images with search()!`
			`# q is, indeed, the query for DuckDuckGo`
comments and tidy 3 years ago			`results = search(q)`
			`r = results["results"][0]["image"] # get the http link to the image`

			`html += f""" <span> {q} </span>\n""" # Now let's fill the html with text and the pic`
			`html += f""" <img src='{r}'>\n"""`

fixxees 3 years ago			`# Close the html text`
first page 3 years ago			`html += ''' <div>`
			`</body>`
comments and tidy 3 years ago			`</html>'''`

first page 3 years ago
comments and tidy 3 years ago			`html = html.replace(" '", "'")`

fixxees 3 years ago			`with open('../2_layout/3.html', 'w') as index: # Save the <html> file!`
readme ++ 3 years ago			`index.write(html)`