diff --git a/README.md b/README.md index f19bdd0..a8550d3 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,8 @@ display: none; ## To Do -* add auxiliar JSON creation -* replace direct API calls for mw library + +* remove thumbnail from page_imgs diff --git a/mmdc_modules.py b/mmdc_modules.py index a3fba24..2602eaf 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -import pprint, re, subprocess, shlex +import pprint, re, subprocess, shlex, urllib import xml.etree.ElementTree as ET from mwclient import Site @@ -76,6 +76,29 @@ def write_html_file(html_tree, filename): edited.write(html) edited.close() +def parse_work(site, title, content): +# print title, content + workdict = {'Title':title, 'Creator':u'', 'Date':u'', 'Website':u'', 'Thumbnail':u'', 'Bio':u'', 'Description':u'', 'Extra':u''} + if re.match(u'\{\{\Graduation work', content): + template, extra = (re.findall(u'\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0] + if extra: + workdict['Extra'] = extra + # template's key/value pair + # Note:Extra value is NOT CAPTURED by this regex + keyval = re.findall(u'\|(.*?)\=(.*?\n)', template, re.DOTALL) + for pair in keyval: + key = pair[0] + val = (pair[1]).replace('\n', '') + if 'Creator' in key: + val = val.replace(u', ', u'') + elif 'Thumbnail' in key: + val = mw_img_url(site, val)#api_thumb_url(val) + elif 'Website' in key: + val = urllib.unquote( val) + workdict[key]=val +# pprint.pprint(workdict) + return workdict + def pandoc2html(mw_content): '''convert individual mw sections to html''' mw_content = mw_content.encode('utf-8') diff --git a/page-template.html b/page-template.html index 181ecf7..ea2ea10 100644 --- a/page-template.html +++ b/page-template.html @@ -5,10 +5,10 @@