diff --git a/download_all_image_from_wikipedia_page.ipynb b/mediawiki-api/.ipynb_checkpoints/download_all_image_from_wikipedia_page-checkpoint.ipynb
similarity index 100%
rename from download_all_image_from_wikipedia_page.ipynb
rename to mediawiki-api/.ipynb_checkpoints/download_all_image_from_wikipedia_page-checkpoint.ipynb
diff --git a/mediawiki-api.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-checkpoint.ipynb
similarity index 100%
rename from mediawiki-api.ipynb
rename to mediawiki-api/.ipynb_checkpoints/mediawiki-api-checkpoint.ipynb
diff --git a/mediawiki-api/.ipynb_checkpoints/mediawiki-api-download-images-checkpoint.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-download-images-checkpoint.ipynb
new file mode 100644
index 0000000..9ea8eca
--- /dev/null
+++ b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-download-images-checkpoint.ipynb
@@ -0,0 +1,156 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Download Images from one Wikipage\n",
+ "\n",
+ "(using the Mediawiki API)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import urllib\n",
+ "import json\n",
+ "from IPython.display import JSON # iPython JSON renderer\n",
+ "import sys"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### API request > list of image filenames"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wiki = 'https://pzwiki.wdka.nl/mw-mediadesign' # no slash at the end!\n",
+ "page = 'Category:Situationist_Times'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = f'{ wiki }/api.php?action=parse&prop=images&page={ page }&format=json'\n",
+ "response = urllib.request.urlopen(url).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "images = data['parse']['images']\n",
+ "# images = ['FILENAME.jpg', 'FILENAME2.jpg']\n",
+ "\n",
+ "# We have our variable \"images\"\n",
+ "print(images)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Downloading the image files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's loop through this list and download each image!\n",
+ "for filename in images:\n",
+ " print('Downloading:', filename)\n",
+ " \n",
+ " filename = filename.replace(' ', '_') # let's replace spaces again with _\n",
+ " filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n",
+ " \n",
+ " # first we search for the full filename of the image\n",
+ " url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n",
+ " response = urllib.request.urlopen(url).read()\n",
+ " data = json.loads(response)\n",
+ " \n",
+ " # we select the first search result\n",
+ " # (assuming that this is the image we are looking for)\n",
+ " image = data['query']['allimages'][0]\n",
+ " \n",
+ " # then we download the image\n",
+ " image_url = image['url']\n",
+ " image_filename = image['name']\n",
+ " image_response = urllib.request.urlopen(image_url).read()\n",
+ " \n",
+ " # and we save it as a file\n",
+ " out = open(image_filename, 'wb') \n",
+ " out.write(image_response)\n",
+ " out.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/mediawiki-api-dérive.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-dérive-checkpoint.ipynb
similarity index 100%
rename from mediawiki-api-dérive.ipynb
rename to mediawiki-api/.ipynb_checkpoints/mediawiki-api-dérive-checkpoint.ipynb
diff --git a/mediawiki-api-part-2.ipynb b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-part-2-checkpoint.ipynb
similarity index 99%
rename from mediawiki-api-part-2.ipynb
rename to mediawiki-api/.ipynb_checkpoints/mediawiki-api-part-2-checkpoint.ipynb
index 2d840ad..a338f36 100644
--- a/mediawiki-api-part-2.ipynb
+++ b/mediawiki-api/.ipynb_checkpoints/mediawiki-api-part-2-checkpoint.ipynb
@@ -13,6 +13,7 @@
"source": [
"This notebook:\n",
"\n",
+ "* continues with exploring the connections between `Hypertext` & `Dérive`\n",
"* uses the `query` & `parse` actions of the `MediaWiki API`, which we can use to work with wiki pages as (versioned and hypertextual) technotexts\n",
"\n",
"## Epicpedia\n",
diff --git a/mediawiki-api/download_all_image_from_wikipedia_page.ipynb b/mediawiki-api/download_all_image_from_wikipedia_page.ipynb
new file mode 100644
index 0000000..6b3c883
--- /dev/null
+++ b/mediawiki-api/download_all_image_from_wikipedia_page.ipynb
@@ -0,0 +1,217 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import urllib\n",
+ "import json\n",
+ "from IPython.display import JSON # iPython JSON renderer\n",
+ "import sys"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Download all the images from one wikipedia page :)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wikipediapage = 'Sculpture'\n",
+ "\n",
+ "#https://en.wikipedia.org/wiki/Sculpture"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = f'https://en.wikipedia.org/w/api.php?action=parse&prop=images&page={ wikipediapage }&format=json'\n",
+ "response = urllib.request.urlopen(url).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# We have our variable \"images\"\n",
+ "images = data['parse']['images']\n",
+ "\n",
+ "print(images)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "#ctrl + ? => remove all"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Let's loop through this list and download each image!\n",
+ "for filename in images:\n",
+ " try:\n",
+ " print('Downloading:', filename)\n",
+ "\n",
+ " filename = filename.replace(' ', '_') # let's replace spaces again with _\n",
+ " filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n",
+ "\n",
+ " # first we search for the full URL of the image\n",
+ " url = f'https://commons.wikimedia.org/w/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n",
+ " response = urllib.request.urlopen(url).read()\n",
+ " data = json.loads(response)\n",
+ " image = data['query']['allimages'][0]\n",
+ "\n",
+ " # then we download the image\n",
+ " image_url = image['url']\n",
+ " image_filename = image['name']\n",
+ " image_response = urllib.request.urlopen(image_url).read()\n",
+ "\n",
+ " # and we save it as a file\n",
+ " out = open(\"wikiimage/\"+image_filename, 'wb') \n",
+ " out.write(image_response)\n",
+ " out.close()\n",
+ " \n",
+ " except:\n",
+ " error = sys.exc_info()[0]\n",
+ " print('Skipped:', image)\n",
+ " print('With the error:', error)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "html = ''\n",
+ "\n",
+ "for imagelink in images:\n",
+ " print(imagelink)\n",
+ " \n",
+ " # let's use the \"safe\" pagenames for the filenames \n",
+ " # by replacing the ' ' with '_'\n",
+ " filename = imagelink.replace(' ', '_')\n",
+ " \n",
+ " if '.pdf' in filename:\n",
+ " a=f''\n",
+ " else:\n",
+ " a = f''\n",
+ "\n",
+ " html += a\n",
+ " html += '\\n'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "print(html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "output = open('wikiimage/imageimage.html', 'w')\n",
+ "output.write(html)\n",
+ "output.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#git pull\n",
+ "#git status\n",
+ "#git add FILENAME\n",
+ "#git commit -m \"write a msg\"\n",
+ "#git push"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/mediawiki-api-download-images.ipynb b/mediawiki-api/mediawiki-api-download-images.ipynb
similarity index 51%
rename from mediawiki-api-download-images.ipynb
rename to mediawiki-api/mediawiki-api-download-images.ipynb
index 97568e4..9ea8eca 100644
--- a/mediawiki-api-download-images.ipynb
+++ b/mediawiki-api/mediawiki-api-download-images.ipynb
@@ -4,105 +4,28 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Mediawiki API Download Images"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Let's first test it with one image.\n",
- "# For example: File:Debo 009 05 01.jpg\n",
+ "# Download Images from one Wikipage\n",
"\n",
- "filename = 'Debo 009 05 01.jpg'\n",
- "filename = filename.replace(' ', '_') # let's replace spaces again with _\n",
- "filename = filename.replace('.jpg', '') # and let's remove the file extension"
+ "(using the Mediawiki API)"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# We cannot ask the API for the URL of a specific image (:///), but we can still find it using the \"aifrom=\" parameter.\n",
- "# Note: ai=allimages\n",
- "url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n",
- "response = urllib.request.urlopen(url).read()\n",
- "data = json.loads(response)\n",
- "JSON(data)"
+ "import urllib\n",
+ "import json\n",
+ "from IPython.display import JSON # iPython JSON renderer\n",
+ "import sys"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Select the first result [0], let's assume that that is always the right image that we need :)\n",
- "image = data['query']['allimages'][0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(image)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(image['url'])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "Now we can use this URL to download the images!"
+ "### API request > list of image filenames"
]
},
{
@@ -111,25 +34,20 @@
"metadata": {},
"outputs": [],
"source": [
- "image_url = image['url']\n",
- "image_filename = image['name']\n",
- "image_response = urllib.request.urlopen(image_url).read() # We use urllib for this again, this is basically our tool to download things from the web !"
+ "wiki = 'https://pzwiki.wdka.nl/mw-mediadesign' # no slash at the end!\n",
+ "page = 'Category:Situationist_Times'"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "print(image_response)"
+ "url = f'{ wiki }/api.php?action=parse&prop=images&page={ page }&format=json'\n",
+ "response = urllib.request.urlopen(url).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
]
},
{
@@ -138,33 +56,18 @@
"metadata": {},
"outputs": [],
"source": [
- "out = open(image_filename, 'wb') # 'wb' stands for 'write bytes', we basically ask this file to accept data in byte format\n",
- "out.write(image_response)\n",
- "out.close()"
+ "images = data['parse']['images']\n",
+ "# images = ['FILENAME.jpg', 'FILENAME2.jpg']\n",
+ "\n",
+ "# We have our variable \"images\"\n",
+ "print(images)"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "## Download all the images of our page"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# We have our variable \"images\"\n",
- "print(images)"
+ "### Downloading the image files"
]
},
{
@@ -180,10 +83,13 @@
" filename = filename.replace(' ', '_') # let's replace spaces again with _\n",
" filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension\n",
" \n",
- " # first we search for the full URL of the image\n",
+ " # first we search for the full filename of the image\n",
" url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'\n",
" response = urllib.request.urlopen(url).read()\n",
" data = json.loads(response)\n",
+ " \n",
+ " # we select the first search result\n",
+ " # (assuming that this is the image we are looking for)\n",
" image = data['query']['allimages'][0]\n",
" \n",
" # then we download the image\n",
diff --git a/mediawiki-api/mediawiki-api-dérive.ipynb b/mediawiki-api/mediawiki-api-dérive.ipynb
new file mode 100644
index 0000000..89bfe57
--- /dev/null
+++ b/mediawiki-api/mediawiki-api-dérive.ipynb
@@ -0,0 +1,464 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# MediaWiki API (Dérive)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook:\n",
+ "\n",
+ "* continues with exploring the connections between `Hypertext` & `Dérive`\n",
+ "* saves (parts of) wiki pages as html files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import urllib\n",
+ "import json\n",
+ "from IPython.display import JSON # iPython JSON renderer\n",
+ "import sys"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Parse"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's use another wiki this time: the English Wikipedia.\n",
+ "\n",
+ "You can pick any page, i took the Hypertext page for this notebook as an example: https://en.wikipedia.org/wiki/Hypertext"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# parse the wiki page Hypertext\n",
+ "request = 'https://en.wikipedia.org/w/api.php?action=parse&page=Hypertext&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Wiki links dérive\n",
+ "\n",
+ "Select the wiki links from the `data` response:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "links = data['parse']['links']\n",
+ "JSON(links)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's save the links as a list of pagenames, to make it look like this:\n",
+ "\n",
+ "`['hyperdocuments', 'hyperwords', 'hyperworld']`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# How is \"links\" structured now?\n",
+ "print(links)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It helps to copy paste a small part of the output first:\n",
+ "\n",
+ "`[{'ns': 0, 'exists': '', '*': 'Metatext'}, {'ns': 0, '*': 'De man met de hoed'}]`\n",
+ "\n",
+ "and to write it differently with indentation:\n",
+ "\n",
+ "```\n",
+ "links = [\n",
+ " { \n",
+ " 'ns' : 0,\n",
+ " 'exists' : '',\n",
+ " '*', 'Metatext'\n",
+ " }, \n",
+ " {\n",
+ " 'ns' : 0,\n",
+ " 'exists' : '',\n",
+ " '*' : 'De man met de hoed'\n",
+ " } \n",
+ "]\n",
+ "```\n",
+ "\n",
+ "We can now loop through \"links\" and add all the pagenames to a new list called \"wikilinks\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "wikilinks = []\n",
+ "\n",
+ "for link in links:\n",
+ " \n",
+ " print('link:', link)\n",
+ " \n",
+ " for key, value in link.items():\n",
+ " print('----- key:', key)\n",
+ " print('----- value:', value)\n",
+ " print('-----')\n",
+ " \n",
+ " pagename = link['*']\n",
+ " print('===== pagename:', pagename)\n",
+ " \n",
+ " wikilinks.append(pagename)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "wikilinks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Saving the links in a HTML page\n",
+ "\n",
+ "Let's convert the list of pagenames into HTML link elements (``):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "html = ''\n",
+ "\n",
+ "for wikilink in wikilinks:\n",
+ " print(wikilink)\n",
+ " \n",
+ " # let's use the \"safe\" pagenames for the filenames \n",
+ " # by replacing the ' ' with '_'\n",
+ " filename = wikilink.replace(' ', '_')\n",
+ " \n",
+ " a = f'{ wikilink }'\n",
+ " html += a\n",
+ " html += '\\n'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "print(html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's save this page in a separate folder, i called it \"mediawiki-api-dérive\"\n",
+ "# We can make this folder here using a terminal command, but you can also do it in the interface on the left\n",
+ "! mkdir mediawiki-api-dérive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "output = open('mediawiki-api-dérive/Hypertext.html', 'w')\n",
+ "output.write(html)\n",
+ "output.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Recursive parsing\n",
+ "\n",
+ "We can now repeat the steps for each wikilink that we collected!\n",
+ "\n",
+ "We can make an API request for each wikilink, \\\n",
+ "ask for all the links on the page \\\n",
+ "and save it as an HTML page."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# First we save the Hypertext page again:\n",
+ "\n",
+ "startpage = 'Hypertext'\n",
+ "\n",
+ "# parse the first wiki page\n",
+ "request = f'https://en.wikipedia.org/w/api.php?action=parse&page={ startpage }&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)\n",
+ "\n",
+ "# select the links\n",
+ "links = data['parse']['links']\n",
+ "\n",
+ "# turn it into a list of pagenames\n",
+ "wikilinks = []\n",
+ "for link in links:\n",
+ " pagename = link['*']\n",
+ " wikilinks.append(pagename)\n",
+ "\n",
+ "# turn the wikilinks into a set of links\n",
+ "html = ''\n",
+ "for wikilink in wikilinks:\n",
+ " filename = wikilink.replace(' ', '_')\n",
+ " a = f'{ wikilink }'\n",
+ " html += a\n",
+ " html += '\\n'\n",
+ "\n",
+ "# save it as a HTML page\n",
+ "startpage = startpage.replace(' ', '_') # let's again stay safe on the filename side\n",
+ "output = open(f'mediawiki-api-dérive/{ startpage }.html', 'w')\n",
+ "output.write(html)\n",
+ "output.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Then we loop through the list of wikilinks\n",
+ "# and repeat the steps for each page\n",
+ " \n",
+ "for wikilink in wikilinks:\n",
+ " \n",
+ " # let's copy the current wikilink pagename, to avoid confusion later\n",
+ " currentwikilink = wikilink \n",
+ " print('Now requesting:', currentwikilink)\n",
+ " \n",
+ " # parse this wiki page\n",
+ " wikilink = wikilink.replace(' ', '_')\n",
+ " request = f'https://en.wikipedia.org/w/api.php?action=parse&page={ wikilink }&format=json'\n",
+ " \n",
+ " # --> we insert a \"try and error\" condition, \n",
+ " # to catch errors in case a page does not exist \n",
+ " try: \n",
+ " \n",
+ " # continue the parse request\n",
+ " response = urllib.request.urlopen(request).read()\n",
+ " data = json.loads(response)\n",
+ " JSON(data)\n",
+ "\n",
+ " # select the links\n",
+ " links = data['parse']['links']\n",
+ "\n",
+ " # turn it into a list of pagenames\n",
+ " wikilinks = []\n",
+ " for link in links:\n",
+ " pagename = link['*']\n",
+ " wikilinks.append(pagename)\n",
+ "\n",
+ " # turn the wikilinks into a set of links\n",
+ " html = ''\n",
+ " for wikilink in wikilinks:\n",
+ " filename = wikilink.replace(' ', '_')\n",
+ " a = f'{ wikilink }'\n",
+ " html += a\n",
+ " html += '\\n'\n",
+ "\n",
+ " # save it as a HTML page\n",
+ " currentwikilink = currentwikilink.replace(' ', '_') # let's again stay safe on the filename side\n",
+ " output = open(f'mediawiki-api-dérive/{ currentwikilink }.html', 'w')\n",
+ " output.write(html)\n",
+ " output.close()\n",
+ " \n",
+ " except:\n",
+ " error = sys.exc_info()[0]\n",
+ " print('Skipped:', wikilink)\n",
+ " print('With the error:', error)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## What's next?\n",
+ "\n",
+ "?\n",
+ "\n",
+ "You could add more loops to the recursive parsing, adding more layers ...\n",
+ "\n",
+ "You could request all images of a page (instead of links) ...\n",
+ "\n",
+ "or something else the API offers ... (contributors, text, etc)\n",
+ "\n",
+ "or ..."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/mediawiki-api/mediawiki-api-part-2.ipynb b/mediawiki-api/mediawiki-api-part-2.ipynb
new file mode 100644
index 0000000..6b4879d
--- /dev/null
+++ b/mediawiki-api/mediawiki-api-part-2.ipynb
@@ -0,0 +1,438 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# MediaWiki API (part 2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook:\n",
+ "\n",
+ "* uses the `query` & `parse` actions of the `MediaWiki API`, which we can use to work with wiki pages as (versioned and hypertextual) technotexts\n",
+ "\n",
+ "## Epicpedia\n",
+ "\n",
+ "Reference: Epicpedia (2008), Annemieke van der Hoek \\\n",
+ "(from: https://diversions.constantvzw.org/wiki/index.php?title=Eventual_Consistency#Towards_diffractive_technotexts)\n",
+ "\n",
+ "> In Epicpedia (2008), Annemieke van der Hoek creates a work that makes use of the underlying history that lies beneath the surface of each Wikipedia article.[20] Inspired by the work of Berthold Brecht and the notion of Epic Theater, Epicpedia presents Wikipedia articles as screenplays, where each edit becomes an utterance performed by a cast of characters (both major and minor) that takes place over a span of time, typically many years. The work uses the API of wikipedia to retrieve for a given article the sequence of revisions, their corresponding user handles, the summary message (that allows editors to describe the nature of their edit), and the timestamp to then produce a differential reading. \n",
+ "\n",
+ "![](https://diversions.constantvzw.org/wiki/images/b/b0/Epicpedia_EpicTheater02.png)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import urllib\n",
+ "import json\n",
+ "from IPython.display import JSON # iPython JSON renderer"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Query & Parse"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We will work again with the `Dérive` page on the wiki: https://pzwiki.wdka.nl/mediadesign/D%C3%A9rive (i moved it here, to make the URL a bit simpler)\n",
+ "\n",
+ "And use the `API help page` on the PZI wiki as our main reference: https://pzwiki.wdka.nl/mw-mediadesign/api.php"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# query the wiki page Dérive\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&titles=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# parse the wiki page Dérive\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=parse&page=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Links, contributors, edit history"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can ask the API for different kind of material/information about the page.\n",
+ "\n",
+ "Such as:\n",
+ "\n",
+ "* a list of wiki links\n",
+ "* a list of external links\n",
+ "* a list of images\n",
+ "* a list of edits\n",
+ "* a list of contributors\n",
+ "* page information\n",
+ "* reverse links (What links here?)\n",
+ "* ...\n",
+ "\n",
+ "We can use the query action again, to ask for these things:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# wiki links: prop=links\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=links&titles=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# external links: prop=extlinks\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=extlinks&titles=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/json": {
+ "batchcomplete": "",
+ "query": {
+ "pages": {
+ "33524": {
+ "images": [
+ {
+ "ns": 6,
+ "title": "File:Debo 009 05 01.jpg"
+ },
+ {
+ "ns": 6,
+ "title": "File:Sex-majik-2004.gif"
+ }
+ ],
+ "ns": 0,
+ "pageid": 33524,
+ "title": "Dérive"
+ }
+ }
+ }
+ },
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {
+ "application/json": {
+ "expanded": false,
+ "root": "root"
+ }
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# images: prop=images\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=images&titles=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# edit history: prop=revisions\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=revisions&titles=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# contributors: prop=contributors\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=contributors&titles=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# page information: prop=info\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=info&titles=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# reverse links (What links here?): prop=linkshere + lhlimit=25 (max. nr of results)\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&prop=linkshere&lhlimit=100&titles=Prototyping&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Use the `data` responses in Python (and save data in variables)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# For example with the action=parse request\n",
+ "request = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=parse&page=D%C3%A9rive&format=json'\n",
+ "response = urllib.request.urlopen(request).read()\n",
+ "data = json.loads(response)\n",
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "text = data['parse']['text']['*']\n",
+ "print(text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "title = data['parse']['title']\n",
+ "print(title)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "images = data['parse']['images']\n",
+ "print(images)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Use these variables to generate HTML pages "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# open a HTML file to write to \n",
+ "output = open('myfilename.html', 'w')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2813"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# write to this HTML file you just opened\n",
+ "output.write(text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# close the file again (Jupyter needs this to actually write a file)\n",
+ "output.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Use these variables to generate HTML pages (using the template language Jinja)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Jinja (template language): https://jinja.palletsprojects.com/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/mediawiki-api/mediawiki-api.ipynb b/mediawiki-api/mediawiki-api.ipynb
new file mode 100644
index 0000000..94043eb
--- /dev/null
+++ b/mediawiki-api/mediawiki-api.ipynb
@@ -0,0 +1,731 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Today's iPython errors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# to hide the warning for today\n",
+ "import warnings\n",
+ "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# MediaWiki API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's start with an API reqeust example, using the PZI wiki:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# When you visit .... https://pzwiki.wdka.nl/mw-mediadesign/ ........ the URL magically turns into ........ https://pzwiki.wdka.nl/mediadesign/Main_Page\n",
+ "# This is probably something configured on the server (which is the XPUB XVM server, the wiki is installed there)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# How to access the API?\n",
+ "\n",
+ "# Visit in the browser: "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "https://pzwiki.wdka.nl/mw-mediadesign/api.php (This is the main access point of the API of the PZI wiki.)\n",
+ "\n",
+ "https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&titles=Main%20page&format=json (This is an example of an API request.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# What's in this URL?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# api.php "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ?action=query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# &"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# &titles=Main%page"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# &format=json"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Documentation page of the MediaWiki API: https://pzwiki.wdka.nl/mw-mediadesign/api.php"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Dérive in the API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Wander around in the documentation page, edit the URL, make a couple requests!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Try to use the actions: \"query\" and \"parse\". \n",
+ "# We will focus on these two today."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# (paste your requests on the pad)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Use the API in a Notebook"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Using urllib & json"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import urllib\n",
+ "import json"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = 'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&titles=Main%20page&format=json'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "request = urllib.request.urlopen(url).read()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = json.loads(request)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Display JSON in Notebooks nicely, using iPython"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from IPython.display import JSON"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "JSON(data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Try different *query* and *parse* actions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's write the URL in two parts:\n",
+ "# - main domain\n",
+ "# - API request"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "wiki = 'https://pzwiki.wdka.nl/mw-mediadesign'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query = f'{ wiki }/api.php?action=query&titles=Category:Situationist_Times&format=json'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "parse = f'{ wiki }/api.php?action=parse&page=Category:Situationist_Times&format=json'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "querylinks = f'{ wiki }/api.php?action=query&prop=links&titles=Main%20Page'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Documentation page for query: https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=help&modules=query\n",
+ "\n",
+ "Documentation page for parse: https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=help&modules=parse"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# make the request here in the notebook\n",
+ "request = urllib.request.urlopen(url).read()\n",
+ "data = json.loads(request)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Save HTML as files"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# try to use .open() and .write() to open and write the HTML to a file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## $ cp to /var/www/html/PrototypingTimes/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Let's publish the HTML files that you just created"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We can use terminal commands here (\"bash commands\" to be more precise), by using the \"!\" as the first character in a cell.\n",
+ "# For example:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! figlet hello"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# So, to copy files and folders over to the \"PrototypingTimes\" folder, we can use $ cp (from copy).\n",
+ "# The folder \"PrototypingTimes\" sits on the Sandbot server on this path: /var/www/html/PrototypingTimes/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# /var/www/html/PrototypingTimes/ == https://hub.xpub.nl/sandbot/PrototypingTimes/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# So to copy a file there, you can use this command:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! cp YOURFUNNYFILENAME.html /var/www/html/PrototypingTimes/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# And in case you want to copy over folders, you can use $ cp -r (-r for recursive)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! cp -r YOURFOLDERNAME /var/www/html/PrototypingTimes/"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "### Let's also publish this notebook as .html file?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# First, we can convert it to a .html file, using jupyter command line tools:\n",
+ "# (https://nbconvert.readthedocs.io/en/latest/usage.html)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "! jupyter nbconvert YOURNOTEBOOK.ipynb --to html "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# And then, copy it to the /var/www/html/PrototypingTimes/ folder with $ cp (as we just did above)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}