diff --git a/mmdc_modules.py b/mmdc_modules.py index 83c88c3..d96e79b 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -10,7 +10,11 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" # API MODULES def api_request(action, pagename): #get page: content, metadata, images, imageifnp - url = endpoint + action.format(pagename) + print 'API REQUEST' + print pagename + print 'TEST', action.format(pagename) + url = endpoint + (action.format(pagename)) + print 'API REQUEST', url request = urllib2.urlopen(url) jsonp = json.loads(request.read() ) json_dic= (jsonp.get('query').get('pages')) @@ -29,10 +33,10 @@ def api_page(pageid, query): response = api_request('action=query&pageids={}&prop=images', pageid) elif query == 'file': response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid) - pprint.pprint( response ) elif query == 'imageinfo': pagename = pageid # in imageinfo titles are used instead of id - response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail + print 'IMAGEINFO', pagename + response = api_request("action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500", pagename) # iiurlwidht dermines with of thumbnail return response def api_file_url(filename): # get full urls @@ -44,13 +48,12 @@ def api_file_url(filename): # get full urls return None def api_thumb_url(filename): - '''get thumbnail url of image''' - page_content_dict = api_page(filename, 'imageinfo') - if 'imageinfo' in page_content_dict.keys(): - thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl')) - return thumburl - - + print '''get thumbnail url of image''' + thumburl = api_page(filename, 'imageinfo') + thumburl = ((thumburl.get('imageinfo'))[0].get('thumburl')) + print thumburl + return thumburl + # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300 # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content @@ -93,22 +96,29 @@ def parse_work_page(title, content): work_dict[key]=val return work_dict, extra + -# Alternative to parse_work_page - W/out dictionary +# Alternative to parse_work_page def parse_work(title, content): + workdict = {'Title':title, 'Creator':'', 'Date':'', 'Website':'', 'Thumbnail':'', 'Bio':'', 'Description':'', 'Extra':''} + if re.match('\{\{\Graduation work', content): template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0] + workdict['Extra'] = extra#.encode('utf-8') # template's key/value pair keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL) for pair in keyval: key = pair[0] - val = pair[1] - if 'Description' in key: - description = val - - print keyval - - return extra, description + val = (pair[1]).replace('\n', '') + if 'Creator' in key: + val = val.replace(', ', '') + elif 'Thumbnail' in key: + print 'calling API' + val = api_thumb_url(val) + print 'THUMB', val + workdict[key]=val + pprint.pprint(workdict) + return workdict @@ -171,3 +181,7 @@ def img_fullurl(parent): # fileurl = api_request(src, endpoint)# find url of file + + + + diff --git a/prototype_page.py b/prototype_page.py index 6f493e0..d25c8d3 100755 --- a/prototype_page.py +++ b/prototype_page.py @@ -4,28 +4,59 @@ ########### # Testing downloading and converting mw page content to html ########### + +# OVER VIEW: +# * creating one single html page +# request all the pages +# build index +# build all pages + +import pprint from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work_page, parse_work template = open("web/page-template.html", "r") template = template.read() -template = template.format(title="This is My Title", creator='', date='', website='', thumbnail='', bio='',description='', extra='' ) + # download -pageid='15965' -article = api_page(pageid, 'content') -#print article -#print '----------- article -----' +pageid='16025'#'15965'#Qq #'15986'Jozeph +work = 'Mina'#'User:Joak/graduation/catalog1' + +workpage_mw = api_page(pageid, 'content') +# parsing workpage_mw +workdict = parse_work(work, workpage_mw) +for key in workdict.keys(): + if key in ['Extra', 'Description', 'Bio']: + workdict[key] = pandoc2html(workdict[key].encode('utf-8')) +# print key +# print workdict[key] +# print '--------------------' + +template = template.format(title=workdict['Title'], creator=workdict['Creator'], date=workdict['Date'], website=workdict['Website'], thumbnail=workdict['Thumbnail'], bio=workdict['Bio'],description=workdict['Description'], extra=workdict['Extra'] ) +work_filename = 'web/{}-{}-{}.html'.format(workdict['Date'], (workdict['Creator'].encode('ascii', 'ignore')).replace(' ','_'), pageid) +work_file = open(work_filename, "w") +work_file.write(template) +work_file.close() +#template = template.read() + +#print(template) + + + +#for section in [extra, description, bio]: +# section = pandoc2html(section.encode('utf-8')) +# print section +# print '------------' + +#print template +#for key in workdict.keys(): +# print key, workdict[key].encode('utf-8') + -# parsing article -extra, description = parse_work('Qq', article) -# placing mw content inside dict makes it non convertable. Why? -#print extra #work_dict['Extra'] -html_extra = pandoc2html(extra.encode('utf-8')) -html_description = pandoc2html(description.encode('utf-8')) -print '----------- html -----' -print html_description +#print '----------- html -----' +#print html_description #print html_extra -print template +#print template