{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Download Images from one Wikipage\n", "\n", "(using the Mediawiki API)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import urllib\n", "import json\n", "from IPython.display import JSON # iPython JSON renderer\n", "import sys" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### API request > list of image filenames" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "wiki = 'https://pzwiki.wdka.nl/mw-mediadesign' # no slash at the end!\n", "page = 'Category:Situationist_Times'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "url = f'{ wiki }/api.php?action=parse&prop=images&page={ page }&format=json'\n", "response = urllib.request.urlopen(url).read()\n", "data = json.loads(response)\n", "JSON(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "images = data['parse']['images']\n", "# images = ['FILENAME.jpg', 'FILENAME2.jpg']\n", "\n", "# We have our variable \"images\"\n", "print(images)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Downloading the image files" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Let's loop through this list and download each image!\n", "for filename in images:\n", " print('Downloading:', filename)\n", " \n", " filename = filename.replace(' ', '_') # let's replace spaces again with _\n", " filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n", " \n", " # first we search for the full filename of the image\n", " url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n", " response = urllib.request.urlopen(url).read()\n", " data = json.loads(response)\n", " \n", " # we select the first search result\n", " # (assuming that this is the image we are looking for)\n", " image = data['query']['allimages'][0]\n", " \n", " # then we download the image\n", " image_url = image['url']\n", " image_filename = image['name']\n", " image_response = urllib.request.urlopen(image_url).read()\n", " \n", " # and we save it as a file\n", " out = open(image_filename, 'wb') \n", " out.write(image_response)\n", " out.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }