You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
3.7 KiB
3.7 KiB
Download Images from one Wikipage¶
(using the Mediawiki API)
In [ ]:
import urllib import json from IPython.display import JSON # iPython JSON renderer import sys
API request > list of image filenames¶
In [ ]:
wiki = 'https://pzwiki.wdka.nl/mw-mediadesign' # no slash at the end! page = 'Category:Situationist_Times'
In [ ]:
url = f'{ wiki }/api.php?action=parse&prop=images&page={ page }&format=json' response = urllib.request.urlopen(url).read() data = json.loads(response) JSON(data)
In [ ]:
images = data['parse']['images'] # images = ['FILENAME.jpg', 'FILENAME2.jpg'] # We have our variable "images" print(images)
Downloading the image files¶
In [ ]:
# Let's loop through this list and download each image! for filename in images: print('Downloading:', filename) filename = filename.replace(' ', '_') # let's replace spaces again with _ filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension # first we search for the full filename of the image url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json' response = urllib.request.urlopen(url).read() data = json.loads(response) # we select the first search result # (assuming that this is the image we are looking for) image = data['query']['allimages'][0] # then we download the image image_url = image['url'] image_filename = image['name'] image_response = urllib.request.urlopen(image_url).read() # and we save it as a file out = open(image_filename, 'wb') out.write(image_response) out.close()
In [ ]:
In [ ]:
In [ ]:
In [ ]: