script to download images from wiki into html

2 years ago · ffe6af0140
commit ffe6af0140
2 changed files with 98 additions and 0 deletions
--- a/README.md
+++ b/README.md
--- a/scripttryout2.py
+++ b/scripttryout2.py
@ -0,0 +1,98 @@
+from urllib import request
+import json
+import requests
+import mwclient 
+import os
+
+data=request.urlopen('https://pzwiki.wdka.nl/mw-mediadesign/api.php?action=parse&page=User:FLEM/Notebook&format=json').read()
+print(data)
+
+
+json_data=json.loads(data)
+print(json_data)
+
+title=json_data['parse']['title']
+print(title)
+
+images=json_data['parse']['images']
+print(images)
+#['Inside_overview_(1).gif', 'Inside_overview_(2).gif', 'Inside_pocket.gif', 'Opened.jpeg', 'Outside_cover.jpeg', 'Paper_selection.jpeg', 'Side_view.jpeg', 'Inuse.jpg', 'Multicolumnednotebook_.jpg', 'Multicolumnednotebook_(1).jpg', 'Multicolumnednotebook_(2).jpg', 'Archive_01.jpg', 'Concertina2.jpg', 'Concertina1.jpg', 'Metalring5.jpg', 'Metalring6.jpg', 'Penholder.jpg', 'Paperfoldedupdate.jpg', 'Notesfrompellebyme3.jpg', 'M&Ms_results.jpg', 'Notebookworkflow_(8).jpg', 'Longletternotebook.jpg', 'Longletternotebooktext.jpg', 'Prototype.gif', 'First_notebook_kit.jpg', 'Annotation_scans_(1).jpg']
+
+text=json_data['parse']['text']['*']
+print(text)
+
+
+for image in images:
+
+    here we download images
+   S = requests.Session()
+
+    URL = "https://pzwiki.wdka.nl/mw-mediadesign/api.php"
+    #this is wrong, it gives me the images of all the media design instead of just one page
+
+
+    PARAMS = {
+        "action": "query",
+        "format": "json",
+        "list": "allimages",
+        "aifrom": image, #using variable from the list above
+        "ailimit": "1" #here i can say how many i want from the list
+    }
+
+    R = S.get(url=URL, params=PARAMS)
+    DATA = R.json()
+
+    IMAGES = DATA["query"]["allimages"]
+
+    for image in IMAGES:
+        imageurl = image["url"]
+        filename = image["title"].replace('File:', '')
+        print(imageurl)
+        print(filename)
+
+        response = requests.get(imageurl)
+            #if response.status_code == 200: #what does this mean?
+        with open(filename, 'wb') as f: #what is f? why in the other one we used fd?
+            f.write(response.content)
+
+
+
+html=text.split("\n")
+new_html=""
+for line in html:
+    for image in images:
+    #replace html img element
+        if image in line:
+            new=f"""<div class="image"><img src="{image}"></div>"""
+            line=new
+            print(line)
+    new_html=new_html+line+"\n"
+print(new_html)
+
+
+
+
+template=f'''<!DOCTYPE html>
+
+<html>
+    <head>
+        <link rel="stylesheet"type="text/css" href="style.css">
+    <meta charset="utf-8">
+    <title>web-to-print hello</title>
+    <script src="paged.polyfill.js"></script>
+    <!-- load the paged.js interface stylesheet -->
+    <link href="interface.css" rel="stylesheet" type="text/css">
+    <!-- load your own stylesheet -->
+    <link rel="stylesheet" type="text/css" href="stylesheet.css">
+</head>
+<body>
+{new_html}
+</body>
+
+</html>
+'''
+
+wikicreate=open('notebook_json.html', mode='w')
+wikicreate.write(template)
+wikicreate.close()
+