diff --git a/.gitignore b/.gitignore
index ee6496f..6a5eccb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
-login.txt
images/
+static_html/
+login.txt
+imgs_info.py
+images.json
# venv dirs & files
.idea/
@@ -8,4 +11,4 @@ lib/
lib64
pyvenv.cfg
share/
-images.json
+__pycache__/
diff --git a/README.md b/README.md
index aea5422..32dd762 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,14 @@
* [mwclient](https://mwclient.readthedocs.io/en/latest/index.html) Python library
* Install:
* `pip3 install mwclient`
+* [jinja2](https://jinja.palletsprojects.com/en/2.11.x/) Python library
+ * Install:
+ * `pip3 install jinja2`
+* [pandoc](https://pandoc.org/)
+ * Install:
+ * Debian/Ubuntu: `sudo apt install pandoc`
+ * Mac: `brew install pandoc`
+
## login.txt
`login.txt` is a secrete file (ignored by git) where you place you itch wiki username and password, in separate lines.
@@ -22,4 +30,14 @@ mypassword
## Run
-* `python3 download_imgs.py`
\ No newline at end of file
+* `python3 download_imgs.py`
+ * Downloads all images from wiki to `images/` directory
+ * and stores each image's metadata to `images.json`
+* `python3 images2html.py`
+ * cycles through the items in `images.json`
+ * querying the File: wiki for image
+ * if it contains essential metadata: Title, Page, Total Pages
+ * retrieves its text content
+ * generates a HTML page from it
+ * saves page onto `static_html/` directory
+
\ No newline at end of file
diff --git a/functions.py b/functions.py
index eb9e9b7..c99623f 100644
--- a/functions.py
+++ b/functions.py
@@ -1,4 +1,19 @@
-import os, json
+import os, json, re
+import subprocess
+
+def pandoc(content, format_in, format_out):
+ pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format(
+ content, format_in, format_out)
+ output = subprocess.check_output(pandoc_cmd, shell=True)
+ return output.decode('utf8')
+
+
+def page_props(wikicontent):
+ exp = re.compile(r'\|(\w*?)\=(.*)', flags=re.M)
+ found = re.findall(exp, wikicontent)
+ found_dict ={item[0]: item[1] for item in found}
+ return found_dict
+
def update_json(imgsjson_fn, img_dict, img_fn):
diff --git a/images2html.py b/images2html.py
new file mode 100644
index 0000000..d195687
--- /dev/null
+++ b/images2html.py
@@ -0,0 +1,76 @@
+import os, json
+from mwclient import Site
+from pprint import pprint
+from jinja2 import Template
+from functions import pandoc, page_props
+
+site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
+wd = os.path.dirname(os.path.abspath(__file__)) # working directory
+imgdir = os.path.join(wd, 'images')
+imgsjson_fn = os.path.join(wd, 'images.json')
+with open(imgsjson_fn, 'r') as imgsjson_file:
+ images_info = json.load(imgsjson_file)
+
+static_html = os.path.join(wd, 'static_html')
+os.makedirs(static_html, exist_ok=True) # create images/ dir
+
+with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
+ loginlines = login.read()
+ user, pwd = loginlines.split('\n')
+ site.login(username=user, password=pwd) # login to wiki
+
+
+page_html_template = '''
+
+
+
+
+
+ {{title}}
+
+
+ {{ title }}
+
+
+
+
+
+ {{ content }}
+
+
+
+
+'''
+page_template = Template(page_html_template)
+
+
+for img_info in images_info.values():
+ print(img_info)
+ page_name = img_info['name']
+ page = site.pages[page_name]
+ # print(page)
+ # pprint(page.__dict__)
+ # print(dir(page))
+ pagetext = page.text()
+ pageproperties = page_props(wikicontent=pagetext)
+ print(pageproperties)
+
+ if pageproperties.get('Title'):
+ pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html')
+ # print('pagetext', pagetext)
+ # print('pagetext_html', pagetext_html)
+ page_html = page_template.render(title=pageproperties.get('Title'),
+ date=pageproperties.get('Date'),
+ imgsrc=os.path.join(imgdir, img_info.get('filename')),
+ content=pagetext_html,
+ part=pageproperties.get('Part'),
+ partof=pageproperties.get('Partof'))
+ htmlpage_fn = "{}_{}.html".format(
+ pageproperties.get('Title').replace(" ", ""),
+ pageproperties.get('Part').zfill(3)
+ )
+ print(htmlpage_fn)
+ with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
+ htmlfile.write(page_html)
diff --git a/imgs_info.py b/imgs_info.py
new file mode 100644
index 0000000..2ce3b9c
--- /dev/null
+++ b/imgs_info.py
@@ -0,0 +1,70 @@
+import os, json
+from mwclient import Site
+from pprint import pprint
+from functions import update_json
+
+site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
+wd = os.path.dirname(os.path.abspath(__file__)) # working directory
+
+imgdir = os.path.join(wd, 'images')
+os.makedirs(imgdir, exist_ok=True) # create images/ dir
+
+imgsjson_fn = os.path.join(wd, 'images.json')
+
+
+
+with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
+ loginlines = login.read()
+ user, pwd = loginlines.split('\n')
+ site.login(username=user, password=pwd) # login to wiki
+
+with open(imgsjson_fn, 'r') as imgsjson_file:
+ images_info = json.load(imgsjson_file)
+
+img_info = images_info["File:CCF 003017.jpg"]
+
+print(img_info)
+page_name = img_info['name']
+page = site.pages[page_name]
+print(page)
+pprint(page.__dict__)
+print(dir(page))
+text = page.text()
+used_in = list(page.imageusage())
+print(text, used_in)
+# response = site.api(action='browsebysubject', subject=page_name)
+# for q in response['query']['data']:
+# print(q['property'], q['dataitem'])
+# print(q)
+# print('result:', response['query']['data'][0]['item'])
+# print('keys', response['query'].keys())
+
+response = site.ask(query='[[Date::+]]|?Date', title=page_name)
+print(response)
+# import pdb; pdb.set_trace()
+for r in response:
+ print('response', r)
+ for title, data in r.items():
+ print(data)
+ # import pdb; pdb.set_trace()
+ print(type(data))
+ if type(data) not in [str, int]:
+ for k, item in data.items():
+ print('item', k, item[k])
+ for subitem in data.items():
+ print('subitem', subitem, )
+ # for k, v in data['printouts']:
+ # print(k,v)
+# action=smwbrowse&browse=page¶ms={ "limit": 10, "offset": 0, "search": "Main", "fullText": true, "fullURL": true }
+# for page in site.pages(page_name):
+# print(page)
+
+
+#
+# for img_key, img_info in images_info.items():
+# if n < 4:
+# print(img_info)
+# page_name = img_info['name']
+# page = site.pages(page_name)
+# print(page)
+# print('\n')
\ No newline at end of file
diff --git a/static/style.css b/static/style.css
new file mode 100644
index 0000000..74ec965
--- /dev/null
+++ b/static/style.css
@@ -0,0 +1,3 @@
+body{font-size: 12pt;}
+
+div#img img {width: 100%;}
\ No newline at end of file