You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
prototyping-times/Eunaversion_mediawiki-api-d...

18 KiB

Mediawiki API Download Images

In [3]:
import urllib
import json
from IPython.display import JSON # iPython JSON renderer
In [4]:
# Let's first test it with one image.
# For example: File:Debo 009 05 01.jpg

filename = 'Debo 009 05 01.jpg'
filename = filename.replace(' ', '_') # let's replace spaces again with _
filename = filename.replace('.jpg', '') # and let's remove the file extension
In [ ]:
 
In [5]:
# We cannot ask the API for the URL of a specific image (:///), but we can still find it using the "aifrom=" parameter.
# Note: ai=allimages
url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
response = urllib.request.urlopen(url).read()
data = json.loads(response)
JSON(data)
Out[5]:
<IPython.core.display.JSON object>
In [ ]:
 
In [6]:
# Select the first result [0], let's assume that that is always the right image that we need :)
image = data['query']['allimages'][0]
In [ ]:
 
In [7]:
print(image)
{'name': 'Debo_009_05_01.jpg', 'timestamp': '2021-01-21T14:54:44Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/c/c8/Debo_009_05_01.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Debo_009_05_01.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33518', 'ns': 6, 'title': 'File:Debo 009 05 01.jpg'}
In [8]:
print(image['url'])
https://pzwiki.wdka.nl/mw-mediadesign/images/c/c8/Debo_009_05_01.jpg
In [ ]:
 

Now we can use this URL to download the images!

In [9]:
image_url = image['url']
image_filename = image['name']
image_response = urllib.request.urlopen(image_url).read() # We use urllib for this again, this is basically our tool to download things from the web !
In [ ]:
 
In [ ]:
 
In [27]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 

Download all the images of our page

In [37]:
# We have our variable "images"
images = data['query']['allimages']

print(images)
[{'name': 'Debo_009_05_01.jpg', 'timestamp': '2021-01-21T14:54:44Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/c/c8/Debo_009_05_01.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Debo_009_05_01.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33518', 'ns': 6, 'title': 'File:Debo 009 05 01.jpg'}, {'name': 'Debord-societysml.gif', 'timestamp': '2014-11-30T00:19:20Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/b/ba/Debord-societysml.gif', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Debord-societysml.gif', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=14589', 'ns': 6, 'title': 'File:Debord-societysml.gif'}, {'name': 'Dec_6_AWU.pdf', 'timestamp': '2011-12-06T15:23:11Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/7/70/Dec_6_AWU.pdf', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Dec_6_AWU.pdf', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=4462', 'ns': 6, 'title': 'File:Dec 6 AWU.pdf'}, {'name': 'Dec_6_AWUII.pdf', 'timestamp': '2011-12-06T16:34:43Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/f/fd/Dec_6_AWUII.pdf', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Dec_6_AWUII.pdf', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=4463', 'ns': 6, 'title': 'File:Dec 6 AWUII.pdf'}, {'name': 'December.gif', 'timestamp': '2010-12-14T21:07:54Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/3/3f/December.gif', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:December.gif', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=2090', 'ns': 6, 'title': 'File:December.gif'}, {'name': 'Deck_1.jpg', 'timestamp': '2020-11-23T14:31:00Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/7/74/Deck_1.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_1.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33093', 'ns': 6, 'title': 'File:Deck 1.jpg'}, {'name': 'Deck_2.jpg', 'timestamp': '2020-11-23T14:31:00Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/0/08/Deck_2.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_2.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33095', 'ns': 6, 'title': 'File:Deck 2.jpg'}, {'name': 'Deck_3.jpg', 'timestamp': '2020-11-23T14:30:52Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/f/f5/Deck_3.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_3.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33084', 'ns': 6, 'title': 'File:Deck 3.jpg'}, {'name': 'Deck_4.jpg', 'timestamp': '2020-11-23T14:30:52Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/2/24/Deck_4.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_4.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33088', 'ns': 6, 'title': 'File:Deck 4.jpg'}, {'name': 'Deck_5.jpg', 'timestamp': '2020-11-23T14:30:52Z', 'url': 'https://pzwiki.wdka.nl/mw-mediadesign/images/9/93/Deck_5.jpg', 'descriptionurl': 'https://pzwiki.wdka.nl/mediadesign/File:Deck_5.jpg', 'descriptionshorturl': 'https://pzwiki.wdka.nl/mw-mediadesign/index.php?curid=33085', 'ns': 6, 'title': 'File:Deck 5.jpg'}]
In [45]:
images1 = data['query']['allimages']
images=[]

for item in images1:
    filename = item['name']
    
    print(filename)
    
    images.append(filename)
    
print(images)
Debo_009_05_01.jpg
Debord-societysml.gif
Dec_6_AWU.pdf
Dec_6_AWUII.pdf
December.gif
Deck_1.jpg
Deck_2.jpg
Deck_3.jpg
Deck_4.jpg
Deck_5.jpg
['Debo_009_05_01.jpg', 'Debord-societysml.gif', 'Dec_6_AWU.pdf', 'Dec_6_AWUII.pdf', 'December.gif', 'Deck_1.jpg', 'Deck_2.jpg', 'Deck_3.jpg', 'Deck_4.jpg', 'Deck_5.jpg']
In [ ]:
 
In [ ]:
 
In [46]:
# Let's loop through this list and download each image!
for filename in images:
    print('Downloading:', filename)
    
    filename = filename.replace(' ', '_') # let's replace spaces again with _
    filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension
    
    # first we search for the full URL of the image
    url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
    response = urllib.request.urlopen(url).read()
    data = json.loads(response)
    image = data['query']['allimages'][0]
    
    # then we download the image
    image_url = image['url']
    image_filename = image['name']
    image_response = urllib.request.urlopen(image_url).read()
    
    # and we save it as a file
    out = open(image_filename, 'wb') 
    out.write(image_response)
    out.close()
Downloading: Debo_009_05_01.jpg
Downloading: Debord-societysml.gif
Downloading: Dec_6_AWU.pdf
Downloading: Dec_6_AWUII.pdf
Downloading: December.gif
Downloading: Deck_1.jpg
Downloading: Deck_2.jpg
Downloading: Deck_3.jpg
Downloading: Deck_4.jpg
Downloading: Deck_5.jpg
In [ ]:
filename
In [50]:
html = ''

for imagelink in images:
    print(imagelink)
    
    # let's use the "safe" pagenames for the filenames 
    # by replacing the ' ' with '_'
    filename = imagelink.replace(' ', '_')
    
    if '.pdf' in filename:
        a=f'<iframe src="{filename}"></iframe>'
    else:
        a = f'<img src="{ filename }">'

    html += a
    html += '\n'
Debo_009_05_01.jpg
Debord-societysml.gif
Dec_6_AWU.pdf
Dec_6_AWUII.pdf
December.gif
Deck_1.jpg
Deck_2.jpg
Deck_3.jpg
Deck_4.jpg
Deck_5.jpg
In [51]:
print(html)
<img src="Debo_009_05_01.jpg">
<img src="Debord-societysml.gif">
<iframe src="Dec_6_AWU.pdf"></iframe>
<iframe src="Dec_6_AWUII.pdf"></iframe>
<img src="December.gif">
<img src="Deck_1.jpg">
<img src="Deck_2.jpg">
<img src="Deck_3.jpg">
<img src="Deck_4.jpg">
<img src="Deck_5.jpg">

In [52]:
output = open('image/imageimage.html', 'w')
output.write(html)
output.close()
In [ ]: