|
|
@ -7,7 +7,9 @@
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"from jinja2 import Template \n",
|
|
|
|
"from jinja2 import Template \n",
|
|
|
|
"from urllib.parse import urlparse"
|
|
|
|
"from urllib.parse import urlparse\n",
|
|
|
|
|
|
|
|
"import requests\n",
|
|
|
|
|
|
|
|
"from bs4 import BeautifulSoup"
|
|
|
|
]
|
|
|
|
]
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
{
|
|
|
@ -16,9 +18,6 @@
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"import requests\n",
|
|
|
|
|
|
|
|
"from bs4 import BeautifulSoup\n",
|
|
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
|
|
"url = \"https://archive.org/details/cd-roms?and[]=mediatype%3A%22image%22\"\n",
|
|
|
|
"url = \"https://archive.org/details/cd-roms?and[]=mediatype%3A%22image%22\"\n",
|
|
|
|
"response = requests.get(url)\n",
|
|
|
|
"response = requests.get(url)\n",
|
|
|
|
"html = response.content\n",
|
|
|
|
"html = response.content\n",
|
|
|
@ -31,8 +30,8 @@
|
|
|
|
"metadata": {},
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"source": [
|
|
|
|
"titles = scraped.find_all(\"img\", src=True)\n",
|
|
|
|
"titles = soup.find_all(\"img\", src=True)\n",
|
|
|
|
"titles2 = scraped.find_all(\"img\", source=True)\n",
|
|
|
|
"titles2 = soup.find_all(\"img\", source=True)\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"allimages = titles + titles2\n"
|
|
|
|
"allimages = titles + titles2\n"
|
|
|
|
]
|
|
|
|
]
|
|
|
|