You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
5.7 KiB
5.7 KiB
Mediawiki API Download Images¶
In [ ]:
In [ ]:
# Let's first test it with one image. # For example: File:Debo 009 05 01.jpg filename = 'Debo 009 05 01.jpg' filename = filename.replace(' ', '_') # let's replace spaces again with _ filename = filename.replace('.jpg', '') # and let's remove the file extension
In [ ]:
In [ ]:
# We cannot ask the API for the URL of a specific image (:///), but we can still find it using the "aifrom=" parameter. # Note: ai=allimages url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json' response = urllib.request.urlopen(url).read() data = json.loads(response) JSON(data)
In [ ]:
In [ ]:
# Select the first result [0], let's assume that that is always the right image that we need :) image = data['query']['allimages'][0]
In [ ]:
In [ ]:
print(image)
In [ ]:
print(image['url'])
In [ ]:
Now we can use this URL to download the images!
In [ ]:
image_url = image['url'] image_filename = image['name'] image_response = urllib.request.urlopen(image_url).read() # We use urllib for this again, this is basically our tool to download things from the web !
In [ ]:
In [ ]:
print(image_response)
In [ ]:
out = open(image_filename, 'wb') # 'wb' stands for 'write bytes', we basically ask this file to accept data in byte format out.write(image_response) out.close()
In [ ]:
Download all the images of our page¶
In [ ]:
# We have our variable "images" print(images)
In [ ]:
# Let's loop through this list and download each image! for filename in images: print('Downloading:', filename) filename = filename.replace(' ', '_') # let's replace spaces again with _ filename = filename.replace('.jpg', '').replace('.gif', '').replace('.png','').replace('.jpeg','').replace('.JPG','').replace('.JPEG','') # and let's remove the file extension # first we search for the full URL of the image url = f'https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=query&list=allimages&aifrom={ filename }&format=json' response = urllib.request.urlopen(url).read() data = json.loads(response) image = data['query']['allimages'][0] # then we download the image image_url = image['url'] image_filename = image['name'] image_response = urllib.request.urlopen(image_url).read() # and we save it as a file out = open(image_filename, 'wb') out.write(image_response) out.close()
In [ ]:
In [ ]:
In [ ]:
In [ ]: