From a9717263367258ab1862572c8c6120cca7e82042 Mon Sep 17 00:00:00 2001 From: manetta Date: Thu, 11 Feb 2021 15:56:31 +0100 Subject: [PATCH] changing scraped to soup, debugging --- downloading-party/pirate-downloading-to-pdf.ipynb | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/downloading-party/pirate-downloading-to-pdf.ipynb b/downloading-party/pirate-downloading-to-pdf.ipynb index 1af7155..8adc2ac 100644 --- a/downloading-party/pirate-downloading-to-pdf.ipynb +++ b/downloading-party/pirate-downloading-to-pdf.ipynb @@ -7,7 +7,9 @@ "outputs": [], "source": [ "from jinja2 import Template \n", - "from urllib.parse import urlparse" + "from urllib.parse import urlparse\n", + "import requests\n", + "from bs4 import BeautifulSoup" ] }, { @@ -16,9 +18,6 @@ "metadata": {}, "outputs": [], "source": [ - "import requests\n", - "from bs4 import BeautifulSoup\n", - "\n", "url = \"https://archive.org/details/cd-roms?and[]=mediatype%3A%22image%22\"\n", "response = requests.get(url)\n", "html = response.content\n", @@ -31,8 +30,8 @@ "metadata": {}, "outputs": [], "source": [ - "titles = scraped.find_all(\"img\", src=True)\n", - "titles2 = scraped.find_all(\"img\", source=True)\n", + "titles = soup.find_all(\"img\", src=True)\n", + "titles2 = soup.find_all(\"img\", source=True)\n", "\n", "allimages = titles + titles2\n" ]