From dc63b4e9f7c3b6320ff94162edbb61e8d86f728f Mon Sep 17 00:00:00 2001 From: manetta Date: Tue, 2 Feb 2021 09:52:54 +0100 Subject: [PATCH] updating the mediawiki api notebooks --- ...image_from_wikipedia_page-checkpoint.ipynb | 0 .../mediawiki-api-checkpoint.ipynb | 0 ...awiki-api-download-images-checkpoint.ipynb | 156 ++++ .../mediawiki-api-dérive-checkpoint.ipynb | 0 .../mediawiki-api-part-2-checkpoint.ipynb | 1 + ...wnload_all_image_from_wikipedia_page.ipynb | 217 ++++++ .../mediawiki-api-download-images.ipynb | 140 +--- mediawiki-api/mediawiki-api-dérive.ipynb | 464 +++++++++++ mediawiki-api/mediawiki-api-part-2.ipynb | 438 +++++++++++ mediawiki-api/mediawiki-api.ipynb | 731 ++++++++++++++++++ 10 files changed, 2030 insertions(+), 117 deletions(-) rename download_all_image_from_wikipedia_page.ipynb => mediawiki-api/.ipynb_checkpoints/download_all_image_from_wikipedia_page-checkpoint.ipynb (100%) rename mediawiki-api.ipynb => mediawiki-api/.ipynb_checkpoints/mediawiki-api-checkpoint.ipynb (100%) create mode 100644 mediawiki-api/.ipynb_checkpoints/mediawiki-api-download-images-checkpoint.ipynb rename mediawiki-api-dérive.ipynb => mediawiki-api/.ipynb_checkpoints/mediawiki-api-dérive-checkpoint.ipynb (100%) rename mediawiki-api-part-2.ipynb => mediawiki-api/.ipynb_checkpoints/mediawiki-api-part-2-checkpoint.ipynb (99%) create mode 100644 mediawiki-api/download_all_image_from_wikipedia_page.ipynb rename mediawiki-api-download-images.ipynb => mediawiki-api/mediawiki-api-download-images.ipynb (51%) create mode 100644 mediawiki-api/mediawiki-api-dérive.ipynb create mode 100644 mediawiki-api/mediawiki-api-part-2.ipynb create mode 100644 mediawiki-api/mediawiki-api.ipynb diff --git a/download_all_image_from_wikipedia_page.ipynb b/mediawiki-api/.ipynb_checkpoints/download_all_image_from_wikipedia_page-checkpoint.ipynb similarity index 100% rename from download_all_image_from_wikipedia_page.ipynb rename to mediawiki-api/.ipynb_checkpoints/download_all_image_from_wikipedia_page-checkpoint.ipynb diff --git a/mediawiki-api.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-checkpoint.ipynb similarity index 100% rename from mediawiki-api.ipynb rename to mediawiki-api/.ipynb_checkpoints/mediawiki-api-checkpoint.ipynb diff --git a/mediawiki-api/.ipynb_checkpoints/mediawiki-api-download-images-checkpoint.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-download-images-checkpoint.ipynb new file mode 100644 index 0000000..9ea8eca --- /dev/null +++ b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-download-images-checkpoint.ipynb @@ -0,0 +1,156 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download Images from one Wikipage\n", + "\n", + "(using the Mediawiki API)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib\n", + "import json\n", + "from IPython.display import JSON # iPython JSON renderer\n", + "import sys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### API request > list of image filenames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wiki = 'https://pzwiki.wdka.nl/mw-mediadesign' # no slash at the end!\n", + "page = 'Category:Situationist_Times'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = f'{ wiki }/api.php?action=parse&prop=images&page={ page }&format=json'\n", + "response = urllib.request.urlopen(url).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "images = data['parse']['images']\n", + "# images = ['FILENAME.jpg', 'FILENAME2.jpg']\n", + "\n", + "# We have our variable \"images\"\n", + "print(images)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Downloading the image files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's loop through this list and download each image!\n", + "for filename in images:\n", + " print('Downloading:', filename)\n", + " \n", + " filename = filename.replace(' ', '_') # let's replace spaces again with _\n", + " filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n", + " \n", + " # first we search for the full filename of the image\n", + " url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n", + " response = urllib.request.urlopen(url).read()\n", + " data = json.loads(response)\n", + " \n", + " # we select the first search result\n", + " # (assuming that this is the image we are looking for)\n", + " image = data['query']['allimages'][0]\n", + " \n", + " # then we download the image\n", + " image_url = image['url']\n", + " image_filename = image['name']\n", + " image_response = urllib.request.urlopen(image_url).read()\n", + " \n", + " # and we save it as a file\n", + " out = open(image_filename, 'wb') \n", + " out.write(image_response)\n", + " out.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/mediawiki-api-dérive.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-dérive-checkpoint.ipynb similarity index 100% rename from mediawiki-api-dérive.ipynb rename to mediawiki-api/.ipynb_checkpoints/mediawiki-api-dérive-checkpoint.ipynb diff --git a/mediawiki-api-part-2.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-part-2-checkpoint.ipynb similarity index 99% rename from mediawiki-api-part-2.ipynb rename to mediawiki-api/.ipynb_checkpoints/mediawiki-api-part-2-checkpoint.ipynb index 2d840ad..a338f36 100644 --- a/mediawiki-api-part-2.ipynb +++ b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-part-2-checkpoint.ipynb @@ -13,6 +13,7 @@ "source": [ "This notebook:\n", "\n", + "* continues with exploring the connections between `Hypertext` & `Dérive`\n", "* uses the `query` & `parse` actions of the `MediaWiki API`, which we can use to work with wiki pages as (versioned and hypertextual) technotexts\n", "\n", "## Epicpedia\n", diff --git a/mediawiki-api/download_all_image_from_wikipedia_page.ipynb b/mediawiki-api/download_all_image_from_wikipedia_page.ipynb new file mode 100644 index 0000000..6b3c883 --- /dev/null +++ b/mediawiki-api/download_all_image_from_wikipedia_page.ipynb @@ -0,0 +1,217 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib\n", + "import json\n", + "from IPython.display import JSON # iPython JSON renderer\n", + "import sys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download all the images from one wikipedia page :)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wikipediapage = 'Sculpture'\n", + "\n", + "#https://en.wikipedia.org/wiki/Sculpture" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = f'https://en.wikipedia.org/w/api.php?action=parse&prop=images&page={ wikipediapage }&format=json'\n", + "response = urllib.request.urlopen(url).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# We have our variable \"images\"\n", + "images = data['parse']['images']\n", + "\n", + "print(images)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#ctrl + ? => remove all" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Let's loop through this list and download each image!\n", + "for filename in images:\n", + " try:\n", + " print('Downloading:', filename)\n", + "\n", + " filename = filename.replace(' ', '_') # let's replace spaces again with _\n", + " filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n", + "\n", + " # first we search for the full URL of the image\n", + " url = f'https://commons.wikimedia.org/w/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n", + " response = urllib.request.urlopen(url).read()\n", + " data = json.loads(response)\n", + " image = data['query']['allimages'][0]\n", + "\n", + " # then we download the image\n", + " image_url = image['url']\n", + " image_filename = image['name']\n", + " image_response = urllib.request.urlopen(image_url).read()\n", + "\n", + " # and we save it as a file\n", + " out = open(\"wikiimage/\"+image_filename, 'wb') \n", + " out.write(image_response)\n", + " out.close()\n", + " \n", + " except:\n", + " error = sys.exc_info()[0]\n", + " print('Skipped:', image)\n", + " print('With the error:', error)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "html = ''\n", + "\n", + "for imagelink in images:\n", + " print(imagelink)\n", + " \n", + " # let's use the \"safe\" pagenames for the filenames \n", + " # by replacing the ' ' with '_'\n", + " filename = imagelink.replace(' ', '_')\n", + " \n", + " if '.pdf' in filename:\n", + " a=f''\n", + " else:\n", + " a = f''\n", + "\n", + " html += a\n", + " html += '\\n'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output = open('wikiimage/imageimage.html', 'w')\n", + "output.write(html)\n", + "output.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#git pull\n", + "#git status\n", + "#git add FILENAME\n", + "#git commit -m \"write a msg\"\n", + "#git push" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/mediawiki-api-download-images.ipynb b/mediawiki-api/mediawiki-api-download-images.ipynb similarity index 51% rename from mediawiki-api-download-images.ipynb rename to mediawiki-api/mediawiki-api-download-images.ipynb index 97568e4..9ea8eca 100644 --- a/mediawiki-api-download-images.ipynb +++ b/mediawiki-api/mediawiki-api-download-images.ipynb @@ -4,105 +4,28 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Mediawiki API Download Images" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Let's first test it with one image.\n", - "# For example: File:Debo 009 05 01.jpg\n", + "# Download Images from one Wikipage\n", "\n", - "filename = 'Debo 009 05 01.jpg'\n", - "filename = filename.replace(' ', '_') # let's replace spaces again with _\n", - "filename = filename.replace('.jpg', '') # and let's remove the file extension" + "(using the Mediawiki API)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# We cannot ask the API for the URL of a specific image (:///), but we can still find it using the \"aifrom=\" parameter.\n", - "# Note: ai=allimages\n", - "url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n", - "response = urllib.request.urlopen(url).read()\n", - "data = json.loads(response)\n", - "JSON(data)" + "import urllib\n", + "import json\n", + "from IPython.display import JSON # iPython JSON renderer\n", + "import sys" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Select the first result [0], let's assume that that is always the right image that we need :)\n", - "image = data['query']['allimages'][0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(image)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(image['url'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now we can use this URL to download the images!" + "### API request > list of image filenames" ] }, { @@ -111,25 +34,20 @@ "metadata": {}, "outputs": [], "source": [ - "image_url = image['url']\n", - "image_filename = image['name']\n", - "image_response = urllib.request.urlopen(image_url).read() # We use urllib for this again, this is basically our tool to download things from the web !" + "wiki = 'https://pzwiki.wdka.nl/mw-mediadesign' # no slash at the end!\n", + "page = 'Category:Situationist_Times'" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "print(image_response)" + "url = f'{ wiki }/api.php?action=parse&prop=images&page={ page }&format=json'\n", + "response = urllib.request.urlopen(url).read()\n", + "data = json.loads(response)\n", + "JSON(data)" ] }, { @@ -138,33 +56,18 @@ "metadata": {}, "outputs": [], "source": [ - "out = open(image_filename, 'wb') # 'wb' stands for 'write bytes', we basically ask this file to accept data in byte format\n", - "out.write(image_response)\n", - "out.close()" + "images = data['parse']['images']\n", + "# images = ['FILENAME.jpg', 'FILENAME2.jpg']\n", + "\n", + "# We have our variable \"images\"\n", + "print(images)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Download all the images of our page" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# We have our variable \"images\"\n", - "print(images)" + "### Downloading the image files" ] }, { @@ -180,10 +83,13 @@ " filename = filename.replace(' ', '_') # let's replace spaces again with _\n", " filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n", " \n", - " # first we search for the full URL of the image\n", + " # first we search for the full filename of the image\n", " url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n", " response = urllib.request.urlopen(url).read()\n", " data = json.loads(response)\n", + " \n", + " # we select the first search result\n", + " # (assuming that this is the image we are looking for)\n", " image = data['query']['allimages'][0]\n", " \n", " # then we download the image\n", diff --git a/mediawiki-api/mediawiki-api-dérive.ipynb b/mediawiki-api/mediawiki-api-dérive.ipynb new file mode 100644 index 0000000..89bfe57 --- /dev/null +++ b/mediawiki-api/mediawiki-api-dérive.ipynb @@ -0,0 +1,464 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MediaWiki API (Dérive)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook:\n", + "\n", + "* continues with exploring the connections between `Hypertext` & `Dérive`\n", + "* saves (parts of) wiki pages as html files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib\n", + "import json\n", + "from IPython.display import JSON # iPython JSON renderer\n", + "import sys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Parse" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's use another wiki this time: the English Wikipedia.\n", + "\n", + "You can pick any page, i took the Hypertext page for this notebook as an example: https://en.wikipedia.org/wiki/Hypertext" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parse the wiki page Hypertext\n", + "request = 'https://en.wikipedia.org/w/api.php?action=parse&page=Hypertext&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wiki links dérive\n", + "\n", + "Select the wiki links from the `data` response:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "links = data['parse']['links']\n", + "JSON(links)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's save the links as a list of pagenames, to make it look like this:\n", + "\n", + "`['hyperdocuments', 'hyperwords', 'hyperworld']`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# How is \"links\" structured now?\n", + "print(links)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It helps to copy paste a small part of the output first:\n", + "\n", + "`[{'ns': 0, 'exists': '', '*': 'Metatext'}, {'ns': 0, '*': 'De man met de hoed'}]`\n", + "\n", + "and to write it differently with indentation:\n", + "\n", + "```\n", + "links = [\n", + " { \n", + " 'ns' : 0,\n", + " 'exists' : '',\n", + " '*', 'Metatext'\n", + " }, \n", + " {\n", + " 'ns' : 0,\n", + " 'exists' : '',\n", + " '*' : 'De man met de hoed'\n", + " } \n", + "]\n", + "```\n", + "\n", + "We can now loop through \"links\" and add all the pagenames to a new list called \"wikilinks\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "wikilinks = []\n", + "\n", + "for link in links:\n", + " \n", + " print('link:', link)\n", + " \n", + " for key, value in link.items():\n", + " print('----- key:', key)\n", + " print('----- value:', value)\n", + " print('-----')\n", + " \n", + " pagename = link['*']\n", + " print('===== pagename:', pagename)\n", + " \n", + " wikilinks.append(pagename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "wikilinks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving the links in a HTML page\n", + "\n", + "Let's convert the list of pagenames into HTML link elements (``):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "html = ''\n", + "\n", + "for wikilink in wikilinks:\n", + " print(wikilink)\n", + " \n", + " # let's use the \"safe\" pagenames for the filenames \n", + " # by replacing the ' ' with '_'\n", + " filename = wikilink.replace(' ', '_')\n", + " \n", + " a = f'{ wikilink }'\n", + " html += a\n", + " html += '\\n'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "print(html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's save this page in a separate folder, i called it \"mediawiki-api-dérive\"\n", + "# We can make this folder here using a terminal command, but you can also do it in the interface on the left\n", + "! mkdir mediawiki-api-dérive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output = open('mediawiki-api-dérive/Hypertext.html', 'w')\n", + "output.write(html)\n", + "output.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Recursive parsing\n", + "\n", + "We can now repeat the steps for each wikilink that we collected!\n", + "\n", + "We can make an API request for each wikilink, \\\n", + "ask for all the links on the page \\\n", + "and save it as an HTML page." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# First we save the Hypertext page again:\n", + "\n", + "startpage = 'Hypertext'\n", + "\n", + "# parse the first wiki page\n", + "request = f'https://en.wikipedia.org/w/api.php?action=parse&page={ startpage }&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)\n", + "\n", + "# select the links\n", + "links = data['parse']['links']\n", + "\n", + "# turn it into a list of pagenames\n", + "wikilinks = []\n", + "for link in links:\n", + " pagename = link['*']\n", + " wikilinks.append(pagename)\n", + "\n", + "# turn the wikilinks into a set of links\n", + "html = ''\n", + "for wikilink in wikilinks:\n", + " filename = wikilink.replace(' ', '_')\n", + " a = f'{ wikilink }'\n", + " html += a\n", + " html += '\\n'\n", + "\n", + "# save it as a HTML page\n", + "startpage = startpage.replace(' ', '_') # let's again stay safe on the filename side\n", + "output = open(f'mediawiki-api-dérive/{ startpage }.html', 'w')\n", + "output.write(html)\n", + "output.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Then we loop through the list of wikilinks\n", + "# and repeat the steps for each page\n", + " \n", + "for wikilink in wikilinks:\n", + " \n", + " # let's copy the current wikilink pagename, to avoid confusion later\n", + " currentwikilink = wikilink \n", + " print('Now requesting:', currentwikilink)\n", + " \n", + " # parse this wiki page\n", + " wikilink = wikilink.replace(' ', '_')\n", + " request = f'https://en.wikipedia.org/w/api.php?action=parse&page={ wikilink }&format=json'\n", + " \n", + " # --> we insert a \"try and error\" condition, \n", + " # to catch errors in case a page does not exist \n", + " try: \n", + " \n", + " # continue the parse request\n", + " response = urllib.request.urlopen(request).read()\n", + " data = json.loads(response)\n", + " JSON(data)\n", + "\n", + " # select the links\n", + " links = data['parse']['links']\n", + "\n", + " # turn it into a list of pagenames\n", + " wikilinks = []\n", + " for link in links:\n", + " pagename = link['*']\n", + " wikilinks.append(pagename)\n", + "\n", + " # turn the wikilinks into a set of links\n", + " html = ''\n", + " for wikilink in wikilinks:\n", + " filename = wikilink.replace(' ', '_')\n", + " a = f'{ wikilink }'\n", + " html += a\n", + " html += '\\n'\n", + "\n", + " # save it as a HTML page\n", + " currentwikilink = currentwikilink.replace(' ', '_') # let's again stay safe on the filename side\n", + " output = open(f'mediawiki-api-dérive/{ currentwikilink }.html', 'w')\n", + " output.write(html)\n", + " output.close()\n", + " \n", + " except:\n", + " error = sys.exc_info()[0]\n", + " print('Skipped:', wikilink)\n", + " print('With the error:', error)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What's next?\n", + "\n", + "?\n", + "\n", + "You could add more loops to the recursive parsing, adding more layers ...\n", + "\n", + "You could request all images of a page (instead of links) ...\n", + "\n", + "or something else the API offers ... (contributors, text, etc)\n", + "\n", + "or ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/mediawiki-api/mediawiki-api-part-2.ipynb b/mediawiki-api/mediawiki-api-part-2.ipynb new file mode 100644 index 0000000..6b4879d --- /dev/null +++ b/mediawiki-api/mediawiki-api-part-2.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MediaWiki API (part 2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook:\n", + "\n", + "* uses the `query` & `parse` actions of the `MediaWiki API`, which we can use to work with wiki pages as (versioned and hypertextual) technotexts\n", + "\n", + "## Epicpedia\n", + "\n", + "Reference: Epicpedia (2008), Annemieke van der Hoek \\\n", + "(from: https://diversions.constantvzw.org/wiki/index.php?title=Eventual_Consistency#Towards_diffractive_technotexts)\n", + "\n", + "> In Epicpedia (2008), Annemieke van der Hoek creates a work that makes use of the underlying history that lies beneath the surface of each Wikipedia article.[20] Inspired by the work of Berthold Brecht and the notion of Epic Theater, Epicpedia presents Wikipedia articles as screenplays, where each edit becomes an utterance performed by a cast of characters (both major and minor) that takes place over a span of time, typically many years. The work uses the API of wikipedia to retrieve for a given article the sequence of revisions, their corresponding user handles, the summary message (that allows editors to describe the nature of their edit), and the timestamp to then produce a differential reading. \n", + "\n", + "![](https://diversions.constantvzw.org/wiki/images/b/b0/Epicpedia_EpicTheater02.png)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib\n", + "import json\n", + "from IPython.display import JSON # iPython JSON renderer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Query & Parse" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will work again with the `Dérive` page on the wiki: https://pzwiki.wdka.nl/mediadesign/D%C3%A9rive (i moved it here, to make the URL a bit simpler)\n", + "\n", + "And use the `API help page` on the PZI wiki as our main reference: https://pzwiki.wdka.nl/mw-mediadesign/api.php" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# query the wiki page Dérive\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&titles=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# parse the wiki page Dérive\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=parse&page=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Links, contributors, edit history" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can ask the API for different kind of material/information about the page.\n", + "\n", + "Such as:\n", + "\n", + "* a list of wiki links\n", + "* a list of external links\n", + "* a list of images\n", + "* a list of edits\n", + "* a list of contributors\n", + "* page information\n", + "* reverse links (What links here?)\n", + "* ...\n", + "\n", + "We can use the query action again, to ask for these things:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# wiki links: prop=links\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=links&titles=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# external links: prop=extlinks\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=extlinks&titles=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/json": { + "batchcomplete": "", + "query": { + "pages": { + "33524": { + "images": [ + { + "ns": 6, + "title": "File:Debo 009 05 01.jpg" + }, + { + "ns": 6, + "title": "File:Sex-majik-2004.gif" + } + ], + "ns": 0, + "pageid": 33524, + "title": "Dérive" + } + } + } + }, + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": { + "application/json": { + "expanded": false, + "root": "root" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "# images: prop=images\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=images&titles=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# edit history: prop=revisions\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=revisions&titles=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# contributors: prop=contributors\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=contributors&titles=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# page information: prop=info\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=info&titles=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reverse links (What links here?): prop=linkshere + lhlimit=25 (max. nr of results)\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=linkshere&lhlimit=100&titles=Prototyping&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use the `data` responses in Python (and save data in variables)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For example with the action=parse request\n", + "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=parse&page=D%C3%A9rive&format=json'\n", + "response = urllib.request.urlopen(request).read()\n", + "data = json.loads(response)\n", + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "text = data['parse']['text']['*']\n", + "print(text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "title = data['parse']['title']\n", + "print(title)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "images = data['parse']['images']\n", + "print(images)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use these variables to generate HTML pages " + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "# open a HTML file to write to \n", + "output = open('myfilename.html', 'w')" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2813" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# write to this HTML file you just opened\n", + "output.write(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "# close the file again (Jupyter needs this to actually write a file)\n", + "output.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use these variables to generate HTML pages (using the template language Jinja)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Jinja (template language): https://jinja.palletsprojects.com/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/mediawiki-api/mediawiki-api.ipynb b/mediawiki-api/mediawiki-api.ipynb new file mode 100644 index 0000000..94043eb --- /dev/null +++ b/mediawiki-api/mediawiki-api.ipynb @@ -0,0 +1,731 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Today's iPython errors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# to hide the warning for today\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MediaWiki API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's start with an API reqeust example, using the PZI wiki:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# When you visit .... https://pzwiki.wdka.nl/mw-mediadesign/ ........ the URL magically turns into ........ https://pzwiki.wdka.nl/mediadesign/Main_Page\n", + "# This is probably something configured on the server (which is the XPUB XVM server, the wiki is installed there)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# How to access the API?\n", + "\n", + "# Visit in the browser: " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://pzwiki.wdka.nl/mw-mediadesign/api.php (This is the main access point of the API of the PZI wiki.)\n", + "\n", + "https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&titles=Main%20page&format=json (This is an example of an API request.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# What's in this URL?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# api.php " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ?action=query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# &" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# &titles=Main%page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# &format=json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Documentation page of the MediaWiki API: https://pzwiki.wdka.nl/mw-mediadesign/api.php" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dérive in the API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Wander around in the documentation page, edit the URL, make a couple requests!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Try to use the actions: \"query\" and \"parse\". \n", + "# We will focus on these two today." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# (paste your requests on the pad)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use the API in a Notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Using urllib & json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import urllib\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "url = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&titles=Main%20page&format=json'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "request = urllib.request.urlopen(url).read()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = json.loads(request)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Display JSON in Notebooks nicely, using iPython" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import JSON" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "JSON(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Try different *query* and *parse* actions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's write the URL in two parts:\n", + "# - main domain\n", + "# - API request" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wiki = 'https://pzwiki.wdka.nl/mw-mediadesign'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query = f'{ wiki }/api.php?action=query&titles=Category:Situationist_Times&format=json'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parse = f'{ wiki }/api.php?action=parse&page=Category:Situationist_Times&format=json'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "querylinks = f'{ wiki }/api.php?action=query&prop=links&titles=Main%20Page'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Documentation page for query: https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=help&modules=query\n", + "\n", + "Documentation page for parse: https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=help&modules=parse" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# make the request here in the notebook\n", + "request = urllib.request.urlopen(url).read()\n", + "data = json.loads(request)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save HTML as files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# try to use .open() and .write() to open and write the HTML to a file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## $ cp to /var/www/html/PrototypingTimes/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's publish the HTML files that you just created" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We can use terminal commands here (\"bash commands\" to be more precise), by using the \"!\" as the first character in a cell.\n", + "# For example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! figlet hello" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# So, to copy files and folders over to the \"PrototypingTimes\" folder, we can use $ cp (from copy).\n", + "# The folder \"PrototypingTimes\" sits on the Sandbot server on this path: /var/www/html/PrototypingTimes/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# /var/www/html/PrototypingTimes/ == https://hub.xpub.nl/sandbot/PrototypingTimes/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# So to copy a file there, you can use this command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! cp YOURFUNNYFILENAME.html /var/www/html/PrototypingTimes/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# And in case you want to copy over folders, you can use $ cp -r (-r for recursive)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! cp -r YOURFOLDERNAME /var/www/html/PrototypingTimes/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "### Let's also publish this notebook as .html file?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# First, we can convert it to a .html file, using jupyter command line tools:\n", + "# (https://nbconvert.readthedocs.io/en/latest/usage.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! jupyter nbconvert YOURNOTEBOOK.ipynb --to html " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# And then, copy it to the /var/www/html/PrototypingTimes/ folder with $ cp (as we just did above)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}