wow!
parent
1450807cc0
commit
a358e4234b
@ -1,555 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Mediawiki API Download Images"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import urllib\n",
|
|
||||||
"import json\n",
|
|
||||||
"from IPython.display import JSON # iPython JSON renderer"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Let's first test it with one image.\n",
|
|
||||||
"# For example: File:Debo 009 05 01.jpg\n",
|
|
||||||
"\n",
|
|
||||||
"filename = 'Debo 009 05 01.jpg'\n",
|
|
||||||
"filename = filename.replace(' ', '_') # let's replace spaces again with _\n",
|
|
||||||
"filename = filename.replace('.jpg', '') # and let's remove the file extension"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"application/json": {
|
|
||||||
"batchcomplete": "",
|
|
||||||
"continue": {
|
|
||||||
"aicontinue": "Deck_6.jpg",
|
|
||||||
"continue": "-||"
|
|
||||||
},
|
|
||||||
"query": {
|
|
||||||
"allimages": [
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33518",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Debo_009_05_01.jpg",
|
|
||||||
"name": "Debo_009_05_01.jpg",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2021-01-21T14:54:44Z",
|
|
||||||
"title": "File:Debo 009 05 01.jpg",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/c/c8/Debo_009_05_01.jpg"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=14589",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Debord-societysml.gif",
|
|
||||||
"name": "Debord-societysml.gif",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2014-11-30T00:19:20Z",
|
|
||||||
"title": "File:Debord-societysml.gif",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/b/ba/Debord-societysml.gif"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=4462",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Dec_6_AWU.pdf",
|
|
||||||
"name": "Dec_6_AWU.pdf",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2011-12-06T15:23:11Z",
|
|
||||||
"title": "File:Dec 6 AWU.pdf",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/7/70/Dec_6_AWU.pdf"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=4463",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Dec_6_AWUII.pdf",
|
|
||||||
"name": "Dec_6_AWUII.pdf",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2011-12-06T16:34:43Z",
|
|
||||||
"title": "File:Dec 6 AWUII.pdf",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/f/fd/Dec_6_AWUII.pdf"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=2090",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:December.gif",
|
|
||||||
"name": "December.gif",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2010-12-14T21:07:54Z",
|
|
||||||
"title": "File:December.gif",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/3/3f/December.gif"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33093",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Deck_1.jpg",
|
|
||||||
"name": "Deck_1.jpg",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2020-11-23T14:31:00Z",
|
|
||||||
"title": "File:Deck 1.jpg",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/7/74/Deck_1.jpg"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33095",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Deck_2.jpg",
|
|
||||||
"name": "Deck_2.jpg",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2020-11-23T14:31:00Z",
|
|
||||||
"title": "File:Deck 2.jpg",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/0/08/Deck_2.jpg"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33084",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Deck_3.jpg",
|
|
||||||
"name": "Deck_3.jpg",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2020-11-23T14:30:52Z",
|
|
||||||
"title": "File:Deck 3.jpg",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/f/f5/Deck_3.jpg"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33088",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Deck_4.jpg",
|
|
||||||
"name": "Deck_4.jpg",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2020-11-23T14:30:52Z",
|
|
||||||
"title": "File:Deck 4.jpg",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/2/24/Deck_4.jpg"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"descriptionshorturl": "https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33085",
|
|
||||||
"descriptionurl": "https://pzwiki.wdka.nl/mediadesign/File:Deck_5.jpg",
|
|
||||||
"name": "Deck_5.jpg",
|
|
||||||
"ns": 6,
|
|
||||||
"timestamp": "2020-11-23T14:30:52Z",
|
|
||||||
"title": "File:Deck 5.jpg",
|
|
||||||
"url": "https://pzwiki.wdka.nl/mw-mediadesign/images/9/93/Deck_5.jpg"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"text/plain": [
|
|
||||||
"<IPython.core.display.JSON object>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {
|
|
||||||
"application/json": {
|
|
||||||
"expanded": false,
|
|
||||||
"root": "root"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# We cannot ask the API for the URL of a specific image (:///), but we can still find it using the \"aifrom=\" parameter.\n",
|
|
||||||
"# Note: ai=allimages\n",
|
|
||||||
"url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n",
|
|
||||||
"response = urllib.request.urlopen(url).read()\n",
|
|
||||||
"data = json.loads(response)\n",
|
|
||||||
"JSON(data)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Select the first result [0], let's assume that that is always the right image that we need :)\n",
|
|
||||||
"image = data['query']['allimages'][0]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"{'name': 'Debo_009_05_01.jpg', 'timestamp': '2021-01-21T14:54:44Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/c/c8/Debo_009_05_01.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Debo_009_05_01.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33518', 'ns': 6, 'title': 'File:Debo 009 05 01.jpg'}\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"print(image)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"https://pzwiki.wdka.nl/mw-mediadesign/images/c/c8/Debo_009_05_01.jpg\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"print(image['url'])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Now we can use this URL to download the images!"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 9,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"image_url = image['url']\n",
|
|
||||||
"image_filename = image['name']\n",
|
|
||||||
"image_response = urllib.request.urlopen(image_url).read() # We use urllib for this again, this is basically our tool to download things from the web !"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 27,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Download all the images of our page"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 37,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"[{'name': 'Debo_009_05_01.jpg', 'timestamp': '2021-01-21T14:54:44Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/c/c8/Debo_009_05_01.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Debo_009_05_01.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33518', 'ns': 6, 'title': 'File:Debo 009 05 01.jpg'}, {'name': 'Debord-societysml.gif', 'timestamp': '2014-11-30T00:19:20Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/b/ba/Debord-societysml.gif', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Debord-societysml.gif', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=14589', 'ns': 6, 'title': 'File:Debord-societysml.gif'}, {'name': 'Dec_6_AWU.pdf', 'timestamp': '2011-12-06T15:23:11Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/7/70/Dec_6_AWU.pdf', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Dec_6_AWU.pdf', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=4462', 'ns': 6, 'title': 'File:Dec 6 AWU.pdf'}, {'name': 'Dec_6_AWUII.pdf', 'timestamp': '2011-12-06T16:34:43Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/f/fd/Dec_6_AWUII.pdf', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Dec_6_AWUII.pdf', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=4463', 'ns': 6, 'title': 'File:Dec 6 AWUII.pdf'}, {'name': 'December.gif', 'timestamp': '2010-12-14T21:07:54Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/3/3f/December.gif', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:December.gif', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=2090', 'ns': 6, 'title': 'File:December.gif'}, {'name': 'Deck_1.jpg', 'timestamp': '2020-11-23T14:31:00Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/7/74/Deck_1.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_1.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33093', 'ns': 6, 'title': 'File:Deck 1.jpg'}, {'name': 'Deck_2.jpg', 'timestamp': '2020-11-23T14:31:00Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/0/08/Deck_2.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_2.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33095', 'ns': 6, 'title': 'File:Deck 2.jpg'}, {'name': 'Deck_3.jpg', 'timestamp': '2020-11-23T14:30:52Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/f/f5/Deck_3.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_3.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33084', 'ns': 6, 'title': 'File:Deck 3.jpg'}, {'name': 'Deck_4.jpg', 'timestamp': '2020-11-23T14:30:52Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/2/24/Deck_4.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_4.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33088', 'ns': 6, 'title': 'File:Deck 4.jpg'}, {'name': 'Deck_5.jpg', 'timestamp': '2020-11-23T14:30:52Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/9/93/Deck_5.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_5.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33085', 'ns': 6, 'title': 'File:Deck 5.jpg'}]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# We have our variable \"images\"\n",
|
|
||||||
"images = data['query']['allimages']\n",
|
|
||||||
"\n",
|
|
||||||
"print(images)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 45,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Debo_009_05_01.jpg\n",
|
|
||||||
"Debord-societysml.gif\n",
|
|
||||||
"Dec_6_AWU.pdf\n",
|
|
||||||
"Dec_6_AWUII.pdf\n",
|
|
||||||
"December.gif\n",
|
|
||||||
"Deck_1.jpg\n",
|
|
||||||
"Deck_2.jpg\n",
|
|
||||||
"Deck_3.jpg\n",
|
|
||||||
"Deck_4.jpg\n",
|
|
||||||
"Deck_5.jpg\n",
|
|
||||||
"['Debo_009_05_01.jpg', 'Debord-societysml.gif', 'Dec_6_AWU.pdf', 'Dec_6_AWUII.pdf', 'December.gif', 'Deck_1.jpg', 'Deck_2.jpg', 'Deck_3.jpg', 'Deck_4.jpg', 'Deck_5.jpg']\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"images1 = data['query']['allimages']\n",
|
|
||||||
"images=[]\n",
|
|
||||||
"\n",
|
|
||||||
"for item in images1:\n",
|
|
||||||
" filename = item['name']\n",
|
|
||||||
" \n",
|
|
||||||
" print(filename)\n",
|
|
||||||
" \n",
|
|
||||||
" images.append(filename)\n",
|
|
||||||
" \n",
|
|
||||||
"print(images)\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 46,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Downloading: Debo_009_05_01.jpg\n",
|
|
||||||
"Downloading: Debord-societysml.gif\n",
|
|
||||||
"Downloading: Dec_6_AWU.pdf\n",
|
|
||||||
"Downloading: Dec_6_AWUII.pdf\n",
|
|
||||||
"Downloading: December.gif\n",
|
|
||||||
"Downloading: Deck_1.jpg\n",
|
|
||||||
"Downloading: Deck_2.jpg\n",
|
|
||||||
"Downloading: Deck_3.jpg\n",
|
|
||||||
"Downloading: Deck_4.jpg\n",
|
|
||||||
"Downloading: Deck_5.jpg\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Let's loop through this list and download each image!\n",
|
|
||||||
"for filename in images:\n",
|
|
||||||
" print('Downloading:', filename)\n",
|
|
||||||
" \n",
|
|
||||||
" filename = filename.replace(' ', '_') # let's replace spaces again with _\n",
|
|
||||||
" filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n",
|
|
||||||
" \n",
|
|
||||||
" # first we search for the full URL of the image\n",
|
|
||||||
" url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n",
|
|
||||||
" response = urllib.request.urlopen(url).read()\n",
|
|
||||||
" data = json.loads(response)\n",
|
|
||||||
" image = data['query']['allimages'][0]\n",
|
|
||||||
" \n",
|
|
||||||
" # then we download the image\n",
|
|
||||||
" image_url = image['url']\n",
|
|
||||||
" image_filename = image['name']\n",
|
|
||||||
" image_response = urllib.request.urlopen(image_url).read()\n",
|
|
||||||
" \n",
|
|
||||||
" # and we save it as a file\n",
|
|
||||||
" out = open(image_filename, 'wb') \n",
|
|
||||||
" out.write(image_response)\n",
|
|
||||||
" out.close()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"filename"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 50,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Debo_009_05_01.jpg\n",
|
|
||||||
"Debord-societysml.gif\n",
|
|
||||||
"Dec_6_AWU.pdf\n",
|
|
||||||
"Dec_6_AWUII.pdf\n",
|
|
||||||
"December.gif\n",
|
|
||||||
"Deck_1.jpg\n",
|
|
||||||
"Deck_2.jpg\n",
|
|
||||||
"Deck_3.jpg\n",
|
|
||||||
"Deck_4.jpg\n",
|
|
||||||
"Deck_5.jpg\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"html = ''\n",
|
|
||||||
"\n",
|
|
||||||
"for imagelink in images:\n",
|
|
||||||
" print(imagelink)\n",
|
|
||||||
" \n",
|
|
||||||
" # let's use the \"safe\" pagenames for the filenames \n",
|
|
||||||
" # by replacing the ' ' with '_'\n",
|
|
||||||
" filename = imagelink.replace(' ', '_')\n",
|
|
||||||
" \n",
|
|
||||||
" if '.pdf' in filename:\n",
|
|
||||||
" a=f'<iframe src=\"{filename}\"></iframe>'\n",
|
|
||||||
" else:\n",
|
|
||||||
" a = f'<img src=\"{ filename }\">'\n",
|
|
||||||
"\n",
|
|
||||||
" html += a\n",
|
|
||||||
" html += '\\n'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 51,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"<img src=\"Debo_009_05_01.jpg\">\n",
|
|
||||||
"<img src=\"Debord-societysml.gif\">\n",
|
|
||||||
"<iframe src=\"Dec_6_AWU.pdf\"></iframe>\n",
|
|
||||||
"<iframe src=\"Dec_6_AWUII.pdf\"></iframe>\n",
|
|
||||||
"<img src=\"December.gif\">\n",
|
|
||||||
"<img src=\"Deck_1.jpg\">\n",
|
|
||||||
"<img src=\"Deck_2.jpg\">\n",
|
|
||||||
"<img src=\"Deck_3.jpg\">\n",
|
|
||||||
"<img src=\"Deck_4.jpg\">\n",
|
|
||||||
"<img src=\"Deck_5.jpg\">\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"print(html)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 52,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"output = open('image/imageimage.html', 'w')\n",
|
|
||||||
"output.write(html)\n",
|
|
||||||
"output.close()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.7.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
@ -0,0 +1,217 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import urllib\n",
|
||||||
|
"import json\n",
|
||||||
|
"from IPython.display import JSON # iPython JSON renderer\n",
|
||||||
|
"import sys"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Download all the images from one wikipedia page :)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"wikipediapage = 'Sculpture'\n",
|
||||||
|
"\n",
|
||||||
|
"#https://en.wikipedia.org/wiki/Sculpture"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"url = f'https://en.wikipedia.org/w/api.php?action=parse&prop=images&page={ wikipediapage }&format=json'\n",
|
||||||
|
"response = urllib.request.urlopen(url).read()\n",
|
||||||
|
"data = json.loads(response)\n",
|
||||||
|
"JSON(data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# We have our variable \"images\"\n",
|
||||||
|
"images = data['parse']['images']\n",
|
||||||
|
"\n",
|
||||||
|
"print(images)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#ctrl + ? => remove all"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Let's loop through this list and download each image!\n",
|
||||||
|
"for filename in images:\n",
|
||||||
|
" try:\n",
|
||||||
|
" print('Downloading:', filename)\n",
|
||||||
|
"\n",
|
||||||
|
" filename = filename.replace(' ', '_') # let's replace spaces again with _\n",
|
||||||
|
" filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n",
|
||||||
|
"\n",
|
||||||
|
" # first we search for the full URL of the image\n",
|
||||||
|
" url = f'https://commons.wikimedia.org/w/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n",
|
||||||
|
" response = urllib.request.urlopen(url).read()\n",
|
||||||
|
" data = json.loads(response)\n",
|
||||||
|
" image = data['query']['allimages'][0]\n",
|
||||||
|
"\n",
|
||||||
|
" # then we download the image\n",
|
||||||
|
" image_url = image['url']\n",
|
||||||
|
" image_filename = image['name']\n",
|
||||||
|
" image_response = urllib.request.urlopen(image_url).read()\n",
|
||||||
|
"\n",
|
||||||
|
" # and we save it as a file\n",
|
||||||
|
" out = open(\"wikiimage/\"+image_filename, 'wb') \n",
|
||||||
|
" out.write(image_response)\n",
|
||||||
|
" out.close()\n",
|
||||||
|
" \n",
|
||||||
|
" except:\n",
|
||||||
|
" error = sys.exc_info()[0]\n",
|
||||||
|
" print('Skipped:', image)\n",
|
||||||
|
" print('With the error:', error)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"html = ''\n",
|
||||||
|
"\n",
|
||||||
|
"for imagelink in images:\n",
|
||||||
|
" print(imagelink)\n",
|
||||||
|
" \n",
|
||||||
|
" # let's use the \"safe\" pagenames for the filenames \n",
|
||||||
|
" # by replacing the ' ' with '_'\n",
|
||||||
|
" filename = imagelink.replace(' ', '_')\n",
|
||||||
|
" \n",
|
||||||
|
" if '.pdf' in filename:\n",
|
||||||
|
" a=f'<iframe src=\"{filename}\"></iframe>'\n",
|
||||||
|
" else:\n",
|
||||||
|
" a = f'<img src=\"{ filename }\">'\n",
|
||||||
|
"\n",
|
||||||
|
" html += a\n",
|
||||||
|
" html += '\\n'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"output = open('wikiimage/imageimage.html', 'w')\n",
|
||||||
|
"output.write(html)\n",
|
||||||
|
"output.close()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#git pull\n",
|
||||||
|
"#git status\n",
|
||||||
|
"#git add FILENAME\n",
|
||||||
|
"#git commit -m \"write a msg\"\n",
|
||||||
|
"#git push"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
Loading…
Reference in New Issue