You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

246 lines
8.9 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "bbf97edb-4082-49fd-9946-56aa3f3d0eb4",
"metadata": {},
"outputs": [],
"source": [
"import selenium \n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.by import By\n",
"import time\n",
"import requests\n",
"import os\n",
"import io\n",
"import json\n",
"import random\n",
"import json\n",
"from selenium.webdriver.chrome.service import Service\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"\n",
"DRIVER_PATH = '../../geckodriver'\n",
"DRIVER_PATH2 = '../chromedriver'"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d53ab4b1-da92-4ae2-ae11-02fa2d8ffe7c",
"metadata": {},
"outputs": [],
"source": [
"def harvesting(query:str, max_links_to_fetch:int, wd:webdriver, sleep_between_interactions:int=1):\n",
" dictionary = open('axios-example/pics.json',)\n",
" l = json.load(dictionary)\n",
" \n",
" cycles = 3\n",
"\n",
" def scroll_to_end(wd):\n",
" wd.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
" time.sleep(sleep_between_interactions) \n",
" \n",
" # build the google query\n",
" search_url = \"https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img\"\n",
"\n",
" # load the page\n",
" wd.get(search_url.format(q=query))\n",
"\n",
" image_urls = set()\n",
" image_count = 0\n",
" results_start = 0\n",
" \n",
" sx =\"sx\"\n",
" dx=\"dx\"\n",
"\n",
" \n",
" while image_count < max_links_to_fetch:\n",
" for loop in range(cycles):\n",
" scroll_to_end(wd)\n",
" time.sleep(.1)\n",
"\n",
" # get all image thumbnail results\n",
" thumbnail_results = wd.find_elements(By.CLASS_NAME,\"Q4LuWd\")\n",
" number_results = len(thumbnail_results)\n",
" print(f\"Found: {number_results} search results. Extracting links from {results_start}:{number_results}\")\n",
" \n",
" # some useful variables\n",
" nPic = 0\n",
" holdPic=0\n",
" groundPic = 0\n",
" currentMeta = ''\n",
"\n",
" \n",
" for img in thumbnail_results[results_start:number_results]:\n",
" # click the thumbnail to get the actual image\n",
" try:\n",
" img.click()\n",
" time.sleep(.1)\n",
" except Exception:\n",
" continue\n",
"\n",
" # extract image url\n",
" actual_images = wd.find_elements(By.CLASS_NAME,'n3VNCb')\n",
" actual_image = actual_images[-1]\n",
" if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'):\n",
" image_urls.add(actual_image.get_attribute('src'))\n",
" linkPic = actual_image.get_attribute('src')\n",
" #\n",
" # print(linkPic)\n",
" \n",
" l[nPic] = linkPic\n",
" time.sleep(.5)\n",
"\n",
" # print(l) \n",
"\n",
" \n",
" with open(\"axios-example/pics.json\", \"w\") as outfile: \n",
" json.dump(l, outfile) \n",
" \n",
" nPic = nPic+1\n",
"\n",
" holdPic = holdPic+1\n",
" time.sleep(.5)\n",
"\n",
" image_count = len(image_urls)\n",
"\n",
" if len(image_urls) >= max_links_to_fetch:\n",
" print(f\"Found: {len(image_urls)} image links, done!\")\n",
" break\n",
" else:\n",
" print(\"Found:\", len(image_urls), \"image links, looking for more ...\")\n",
" time.sleep(1)\n",
" return\n",
" load_more_button = wd.find_element(By.CLASS_NAME,\"Mye4qd\")\n",
" if load_more_button:\n",
" wd.execute_script(\"document.querySelector('.mye4qd').click();\")\n",
"\n",
" # move the result startpoint further down\n",
" results_start = len(thumbnail_results)\n",
"\n",
" return image_urls"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6de53389-ded8-47e2-b0aa-c132405a308f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found: 267 search results. Extracting links from 0:267\n",
"Found: 200 image links, done!\n"
]
}
],
"source": [
"if __name__ == '__main__':\n",
" wd = webdriver.Chrome(service=Service(ChromeDriverManager().install()))\n",
" query = \"VAAST COLSON\"\n",
" wd.get('https://google.com')\n",
" time.sleep(1)\n",
" wd.find_element(By.ID,'W0wltc').click()\n",
"\n",
" time.sleep(.5)\n",
" \n",
" search_box = wd.find_element(By.CLASS_NAME,'gLFyf')\n",
" search_box.send_keys(query)\n",
" \n",
" links = harvesting(query,1000,wd)\n",
" \n",
" wd.quit()"
]
},
{
"cell_type": "code",
"execution_count": 90,
"id": "e2f2b34f-ab75-41d4-82ff-e69b8c0c4481",
"metadata": {},
"outputs": [],
"source": [
" #if (nPic%2):\n",
" # wd2.execute_script(f\"document.querySelector('.scene3D').innerHTML += '<div><img class={dx} style={pos} id=pic{nPic} src={linkPic}></div>'\")\n",
" # else:\n",
" # wd2.execute_script(f\"document.querySelector('.scene3D').innerHTML += '<div><img class={sx} style={pos} id=pic{nPic} src={linkPic}></div>'\")\n",
" #if (holdPic == 50):\n",
" # for rPic in range(groundPic,groundPic+10):\n",
" # try:\n",
" # wd2.execute_script(f\"remove('pic{rPic}')\")\n",
" # except:\n",
" # print('eheh')\n",
" # groundPic = groundPic+10\n",
" # holdPic = 0"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "ba2a67d3-8958-48eb-8f98-7d7d97d61706",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"View Vaast Colson's artworks on artnet. Learn about the artist and find an in-depth biography, exhibitions, original artworks, the latest news, ...\n",
"Biography Vaast Colson (Kapellen, Belgium, 1977) Lives and works in Antwerp, Belgium. Monopolis — Antwerp. 10 September — 6 November 2005. Exhibition.\n",
"Vaast Colson (°1977, Belgium) Vaast Colson's artworks are essentially ludic models of social spaces. His projects place the spectators into a certain social ...\n",
"Stay up to date with Vaast Colson (Belgian, 1977), APT artist. Discover works for sale, auction results, market data, news and exhibitions on MutualArt.\n",
"Browse the latest artworks, exhibitions, shows by Vaast Colson. On Artland you can find art for sale, browse 3D gallery exhibitions and much more.\n",
"Vaast Colson MAES & MATTHYS GALLERY Antwerp. 29. Nov 22. Feb 2009. Un-Scene WIELS Contemporary Art Centre, Brussels. 19. Oct 04. Jan 2009.\n",
"Vaast Colson and Kati Heck “Show me yours & I'll show you mine” at Museum of Contemporary Art Antwerp. 15.05.2016. Reading Time 2. Share.\n",
"MOSTRE ED EVENTI CHE RIGUARDANO Vaast Colson. TUTTIIN CORSOCONCLUSIFUTURI. Sanguine. Luc Tuymans on Baroque. Dal 17 ottobre 2018 al 25 febbraio 2019\n",
"Vaast Colson (Kapellen, 1977) is een Belgisch kunstschilder. Hij studeerde schilderkunst aan de Koninklijke Academie voor Schone Kunsten van Antwerpen.\n",
"Vaast Colson about his drawing activities, on the occasion of The Big Draw in Antwerp. www.thebigdraw.be Sine Van Menxel's solo exhibition, Fencing a...\n"
]
}
],
"source": [
"import json \n",
"dictionary = open('/meta.json',)\n",
"l = json.load(dictionary)\n",
"for x in range(10):\n",
" print(l[str(x)])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5d60578-9ce2-491b-be89-4a4dc3cc1203",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}