script to download images from wiki into html
commit
ffe6af0140
@ -0,0 +1,98 @@
|
|||||||
|
from urllib import request
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import mwclient
|
||||||
|
import os
|
||||||
|
|
||||||
|
data=request.urlopen('https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=parse&page=User:FLEM/Notebook&format=json').read()
|
||||||
|
print(data)
|
||||||
|
|
||||||
|
|
||||||
|
json_data=json.loads(data)
|
||||||
|
print(json_data)
|
||||||
|
|
||||||
|
title=json_data['parse']['title']
|
||||||
|
print(title)
|
||||||
|
|
||||||
|
images=json_data['parse']['images']
|
||||||
|
print(images)
|
||||||
|
#['Inside_overview_(1).gif', 'Inside_overview_(2).gif', 'Inside_pocket.gif', 'Opened.jpeg', 'Outside_cover.jpeg', 'Paper_selection.jpeg', 'Side_view.jpeg', 'Inuse.jpg', 'Multicolumnednotebook_.jpg', 'Multicolumnednotebook_(1).jpg', 'Multicolumnednotebook_(2).jpg', 'Archive_01.jpg', 'Concertina2.jpg', 'Concertina1.jpg', 'Metalring5.jpg', 'Metalring6.jpg', 'Penholder.jpg', 'Paperfoldedupdate.jpg', 'Notesfrompellebyme3.jpg', 'M&Ms_results.jpg', 'Notebookworkflow_(8).jpg', 'Longletternotebook.jpg', 'Longletternotebooktext.jpg', 'Prototype.gif', 'First_notebook_kit.jpg', 'Annotation_scans_(1).jpg']
|
||||||
|
|
||||||
|
text=json_data['parse']['text']['*']
|
||||||
|
print(text)
|
||||||
|
|
||||||
|
|
||||||
|
for image in images:
|
||||||
|
|
||||||
|
here we download images
|
||||||
|
S = requests.Session()
|
||||||
|
|
||||||
|
URL = "https://pzwiki.wdka.nl/mw-mediadesign/api.php"
|
||||||
|
#this is wrong, it gives me the images of all the media design instead of just one page
|
||||||
|
|
||||||
|
|
||||||
|
PARAMS = {
|
||||||
|
"action": "query",
|
||||||
|
"format": "json",
|
||||||
|
"list": "allimages",
|
||||||
|
"aifrom": image, #using variable from the list above
|
||||||
|
"ailimit": "1" #here i can say how many i want from the list
|
||||||
|
}
|
||||||
|
|
||||||
|
R = S.get(url=URL, params=PARAMS)
|
||||||
|
DATA = R.json()
|
||||||
|
|
||||||
|
IMAGES = DATA["query"]["allimages"]
|
||||||
|
|
||||||
|
for image in IMAGES:
|
||||||
|
imageurl = image["url"]
|
||||||
|
filename = image["title"].replace('File:', '')
|
||||||
|
print(imageurl)
|
||||||
|
print(filename)
|
||||||
|
|
||||||
|
response = requests.get(imageurl)
|
||||||
|
#if response.status_code == 200: #what does this mean?
|
||||||
|
with open(filename, 'wb') as f: #what is f? why in the other one we used fd?
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
html=text.split("\n")
|
||||||
|
new_html=""
|
||||||
|
for line in html:
|
||||||
|
for image in images:
|
||||||
|
#replace html img element
|
||||||
|
if image in line:
|
||||||
|
new=f"""<div class="image"><img src="{image}"></div>"""
|
||||||
|
line=new
|
||||||
|
print(line)
|
||||||
|
new_html=new_html+line+"\n"
|
||||||
|
print(new_html)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template=f'''<!DOCTYPE html>
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<link rel="stylesheet"type="text/css" href="style.css">
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>web-to-print hello</title>
|
||||||
|
<script src="paged.polyfill.js"></script>
|
||||||
|
<!-- load the paged.js interface stylesheet -->
|
||||||
|
<link href="interface.css" rel="stylesheet" type="text/css">
|
||||||
|
<!-- load your own stylesheet -->
|
||||||
|
<link rel="stylesheet" type="text/css" href="stylesheet.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
{new_html}
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
||||||
|
'''
|
||||||
|
|
||||||
|
wikicreate=open('notebook_json.html', mode='w')
|
||||||
|
wikicreate.write(template)
|
||||||
|
wikicreate.close()
|
||||||
|
|
Loading…
Reference in New Issue