You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3.7 KiB

Download Images from one Wikipage

(using the Mediawiki API)

In [ ]:
import urllib
import json
from IPython.display import JSON # iPython JSON renderer
import sys

API request > list of image filenames

In [ ]:
wiki = 'https://pzwiki.wdka.nl/mw-mediadesign' # no slash at the end!
page = 'Category:Situationist_Times'
In [ ]:
url = f'{ wiki }/api.php?action=parse&prop=images&page={ page }&format=json'
response = urllib.request.urlopen(url).read()
data = json.loads(response)
JSON(data)
In [ ]:
images = data['parse']['images']
# images = ['FILENAME.jpg', 'FILENAME2.jpg']

# We have our variable "images"
print(images)

Downloading the image files

In [ ]:
# Let's loop through this list and download each image!
for filename in images:
    print('Downloading:', filename)
    
    filename = filename.replace(' ', '_') # let's replace spaces again with _
    filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension
    
    # first we search for the full filename of the image
    url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json'
    response = urllib.request.urlopen(url).read()
    data = json.loads(response)
    
    # we select the first search result
    # (assuming that this is the image we are looking for)
    image = data['query']['allimages'][0]
    
    # then we download the image
    image_url = image['url']
    image_filename = image['name']
    image_response = urllib.request.urlopen(image_url).read()
    
    # and we save it as a file
    out = open(image_filename, 'wb') 
    out.write(image_response)
    out.close()
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: