wip: generate an index via ask

andre
Castro0o 5 years ago
parent 74eb3393d3
commit 94f892de33

@ -1,5 +1,7 @@
import os, json, re import os, json, re
import subprocess import subprocess
from datetime import datetime
def pandoc(content, format_in, format_out): def pandoc(content, format_in, format_out):
pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format( pandoc_cmd = "echo '{}' | pandoc -f {} -t {}".format(
@ -15,6 +17,29 @@ def page_props(wikicontent):
return found_dict return found_dict
def unpack_response(response):
# printout is ordered dict
d = {}
printouts = response['printouts']
page = response['fulltext']
d['page'] = page
for prop in printouts:
p_item = response['printouts'][prop]
for prop_val in p_item:
if isinstance(prop_val, dict) is False:
d[prop] = prop_val
else:
# if len(prop_val) > 0:
props = list(prop_val.keys())
if 'fulltext' in props:
val = prop_val.get('fulltext')
elif 'timestamp' in props:
val = datetime.fromtimestamp(int(prop_val.get('timestamp')))
else:
val = list(prop_val.values())[0]
d[prop] = val
return(d)
def update_json(imgsjson_fn, img_dict, img_fn): def update_json(imgsjson_fn, img_dict, img_fn):
# write img_dict to json file # write img_dict to json file

@ -0,0 +1,95 @@
import os, json
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import pandoc, page_props, unpack_response
site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images')
imgsjson_fn = os.path.join(wd, 'images.json')
with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file)
static_html = os.path.join(wd, 'static_html')
os.makedirs(static_html, exist_ok=True) # create images/ dir
with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
loginlines = login.read()
user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki
query = ('[[File:+]][[Title::+]][[Date::+]]|?Title|?Date|?Part|sort=Date,Title,Part|order=asc,asc,asc')
print('Query:', query)
for answer in site.ask(query):
# print(answer, answer.keys())
printout_dict = unpack_response(answer)
pprint(printout_dict)
img_info = images_info[printout_dict['page']]
page = site.pages[[printout_dict['page']]]
pagetext = page.text()
pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html')
img_local = os.path.join(imgdir, img_info.get('filename'))
# pprint(img_info)
print(img_local, pagetext_html, '\n')
#
#
#
# page_html_template = '''
# <!DOCTYPE html>
# <html lang="en">
# <head>
# <meta charset="utf-8">
# <link rel="stylesheet" href="../static/style.css" />
# <title>{{title}}</title>
# </head>
# <body>
# <h1>{{ title }}</h1>
# <p><time datetime="{{date}}">{{date}}</time></p>
# <div id="img">
# <img src="{{ imgsrc }}" />
# </div>
# <div id="content">
# {{ content }}
# </div>
# <footer>
# Part {{part}} of {{partof}}
# </footer>
# </body>
# </html>
# '''
# page_template = Template(page_html_template)
#
#
# for img_info in images_info.values():
# print(img_info)
# page_name = img_info['name']
# page = site.pages[page_name]
# # print(page)
# # pprint(page.__dict__)
# # print(dir(page))
# pagetext = page.text()
# pageproperties = page_props(wikicontent=pagetext)
# print(pageproperties)
#
# if pageproperties.get('Title'):
# pagetext_html = pandoc(content=pagetext, format_in='mediawiki', format_out='html')
# # print('pagetext', pagetext)
# # print('pagetext_html', pagetext_html)
# page_html = page_template.render(title=pageproperties.get('Title'),
# date=pageproperties.get('Date'),
# imgsrc=os.path.join(imgdir, img_info.get('filename')),
# content=pagetext_html,
# part=pageproperties.get('Part'),
# partof=pageproperties.get('Partof'))
# htmlpage_fn = "{}_{}.html".format(
# pageproperties.get('Title').replace(" ", ""),
# pageproperties.get('Part').zfill(3)
# )
# print(htmlpage_fn)
# with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
# htmlfile.write(page_html)
Loading…
Cancel
Save