diff --git a/mmdc_modules.py b/mmdc_modules.py index 5cbe30f..85b353d 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -4,6 +4,7 @@ import urllib2, json, pprint, re import xml.etree.ElementTree as ET import subprocess, shlex, urllib +from mwclient import Site sid = '1234' useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101" endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" @@ -22,18 +23,18 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif page_content = json_dic.get(page_id) return page_content -def api_page(pageid, query): +def api_page(title, query): if query == 'content': - api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid) + api_response = api_request('action=query&titles={}&prop=revisions&rvprop=content', title) response = ((api_response.get('revisions'))[0])['*'] elif query == 'metadata': - response = api_request('action=query&pageids={}&prop=info', pageid) + response = api_request('action=query&titles={}&prop=info', title) elif query == 'articleimgs': - response = api_request('action=query&pageids={}&prop=images', pageid) + response = api_request('action=query&titles={}&prop=images', title) elif query == 'file': - response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid) + response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',title) elif query == 'imageinfo': - pagename = pageid # in imageinfo titles are used instead of id + pagename = title # in imageinfo titles are used instead of id print 'IMAGEINFO', pagename response = api_request("action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500", pagename) # iiurlwidht dermines with of thumbnail return response @@ -43,6 +44,24 @@ def api_page(pageid, query): ################ # * MUST BE REPLACE BY SMARTER CODE (USING PY MD LIB) ############################## +def mw_cats(args): + site = Site(args.host, path=args.path) + last_names = None + for cats in args.category: + for ci, cname in enumerate(cats): + cat = site.Categories[cname] + pages = list(cat.members()) + # for p in pages: + # pages_by_name[p.name] = p + if last_names == None: + results = pages + else: + results = [p for p in pages if p.name in last_names] + last_names = set([p.name for p in pages]) + results = list(results) + return [p.name for p in results] + + def api_pagecategories(pageid): '''Find all the categories, and their parent category of a page ''' query = 'action=query&pageids={}&prop=categories'.format(pageid) diff --git a/prototype_page.py b/prototype_page.py index 81352cf..66e06e7 100755 --- a/prototype_page.py +++ b/prototype_page.py @@ -17,8 +17,15 @@ import xml.etree.ElementTree as ET import html5lib, re, pprint -from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file - +from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file, mw_cats +from argparse import ArgumentParser + +p = ArgumentParser() +p.add_argument("--host", default="pzwiki.wdka.nl") +p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /") +p.add_argument("--category", "-c", nargs="*", default=[], action="append", help="category to query, use -c foo -c bar to intersect multiple categories") +args = p.parse_args() +print args ######## # QUERY API ######## @@ -29,12 +36,16 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" ######## # CREATE INDEX ######## -memberpages = api_pagesincategories('Graduation work', '2015') #list, containing dictionary of all pages ids. Example: [{u'ns': 0, u'pageid': 15974, u'title': u'Ahhhh'}, {u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}] +memberpages=mw_cats(args) +print 'memberpages', memberpages + +#memberpages = api_pagesincategories('Graduation work', '2015') #list, containing dictionary of all pages ids. Example: [{u'ns': 0, u'pageid': 15974, u'title': u'Ahhhh'}, {u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}] #memberpages = [{u'ns': 0, u'pageid': 15982, u'title': u'The Aesthetics of Ethics'}] #memberpages = [{u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}] #memberpages = [{u'ns': 0, u'pageid': 16007, u'title': u'U ntitled'}] #memberpages = [{u'ns': 0, u'pageid': 15965, u'title': u'Qq'}] -print 'memberpages', memberpages +## output: memberpages [{u'ns': 0, u'pageid': 15982, u'title': u'The Aesthetics of Ethics'}] + ######## # Templates @@ -53,14 +64,14 @@ index_container = index_tree.find(".//div[@class='isotope']") #maybe id is impor for member in memberpages: #print member # download mw work page - pageid=member['pageid'] - pagetitle=(member['title'].encode('utf-8')) - workpage_mw = api_page(pageid, 'content') +# pageid=member['pageid'] +# pagetitle=(member['title'].encode('utf-8')) + workpage_mw = api_page(member, 'content') # parse workpage_mw workpage_mw = replace_gallery(workpage_mw) workpage_mw = replace_video(workpage_mw) - workdict = parse_work(pagetitle, workpage_mw) # create dictionary workpage_mw template + workdict = parse_work(member, workpage_mw) # create dictionary workpage_mw template for key in workdict.keys(): # convert Extra, Description, Bio to HTML if key in ['Extra', 'Description', 'Bio'] and workdict[key]: @@ -96,7 +107,7 @@ for member in memberpages: workpage_html = ET.tostring(tree) creator = workdict['Creator'].decode('ascii', 'ignore') creator = creator.replace(' ','_') - work_filename = 'web/{}-{}-{}.html'.format(workdict['Date'], creator, pageid) + work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator) work_file = open(work_filename, "w") work_file.write(workpage_html) work_file.close() diff --git a/web/2015-Artyom-16005.html b/web/2015-Artyom.html similarity index 100% rename from web/2015-Artyom-16005.html rename to web/2015-Artyom.html diff --git a/web/2015-Henk-Jelle_de_Groot-16007.html b/web/2015-Henk-Jelle_de_Groot.html similarity index 100% rename from web/2015-Henk-Jelle_de_Groot-16007.html rename to web/2015-Henk-Jelle_de_Groot.html diff --git a/web/2015-Lucia_Dossin-16025.html b/web/2015-Lucia_Dossin.html similarity index 100% rename from web/2015-Lucia_Dossin-16025.html rename to web/2015-Lucia_Dossin.html diff --git a/web/2015-Luther_Blisset-15965.html b/web/2015-Luther_Blisset.html similarity index 100% rename from web/2015-Luther_Blisset-15965.html rename to web/2015-Luther_Blisset.html diff --git a/web/2015-Max_Dovey-15999.html b/web/2015-Max_Dovey.html similarity index 100% rename from web/2015-Max_Dovey-15999.html rename to web/2015-Max_Dovey.html diff --git a/web/index.html b/web/index.html index ac3dc43..1e34050 100644 --- a/web/index.html +++ b/web/index.html @@ -102,7 +102,7 @@ <div class="item video flash"><img src="img/project_roel2.png"></div> <div class="item narrative"><img src="img/project_andre.jpeg"></div--> -
+