incorporated mw categories

master
Castro0o 10 years ago
parent c69c2380e0
commit 1b1933aece

@ -4,6 +4,7 @@
import urllib2, json, pprint, re import urllib2, json, pprint, re
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import subprocess, shlex, urllib import subprocess, shlex, urllib
from mwclient import Site
sid = '1234' sid = '1234'
useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101" useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101"
endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
@ -22,18 +23,18 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif
page_content = json_dic.get(page_id) page_content = json_dic.get(page_id)
return page_content return page_content
def api_page(pageid, query): def api_page(title, query):
if query == 'content': if query == 'content':
api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid) api_response = api_request('action=query&titles={}&prop=revisions&rvprop=content', title)
response = ((api_response.get('revisions'))[0])['*'] response = ((api_response.get('revisions'))[0])['*']
elif query == 'metadata': elif query == 'metadata':
response = api_request('action=query&pageids={}&prop=info', pageid) response = api_request('action=query&titles={}&prop=info', title)
elif query == 'articleimgs': elif query == 'articleimgs':
response = api_request('action=query&pageids={}&prop=images', pageid) response = api_request('action=query&titles={}&prop=images', title)
elif query == 'file': elif query == 'file':
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid) response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',title)
elif query == 'imageinfo': elif query == 'imageinfo':
pagename = pageid # in imageinfo titles are used instead of id pagename = title # in imageinfo titles are used instead of id
print 'IMAGEINFO', pagename print 'IMAGEINFO', pagename
response = api_request("action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500", pagename) # iiurlwidht dermines with of thumbnail response = api_request("action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500", pagename) # iiurlwidht dermines with of thumbnail
return response return response
@ -43,6 +44,24 @@ def api_page(pageid, query):
################ ################
# * MUST BE REPLACE BY SMARTER CODE (USING PY MD LIB) # * MUST BE REPLACE BY SMARTER CODE (USING PY MD LIB)
############################## ##############################
def mw_cats(args):
site = Site(args.host, path=args.path)
last_names = None
for cats in args.category:
for ci, cname in enumerate(cats):
cat = site.Categories[cname]
pages = list(cat.members())
# for p in pages:
# pages_by_name[p.name] = p
if last_names == None:
results = pages
else:
results = [p for p in pages if p.name in last_names]
last_names = set([p.name for p in pages])
results = list(results)
return [p.name for p in results]
def api_pagecategories(pageid): def api_pagecategories(pageid):
'''Find all the categories, and their parent category of a page ''' '''Find all the categories, and their parent category of a page '''
query = 'action=query&pageids={}&prop=categories'.format(pageid) query = 'action=query&pageids={}&prop=categories'.format(pageid)

@ -17,8 +17,15 @@
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import html5lib, re, pprint import html5lib, re, pprint
from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file, mw_cats
from argparse import ArgumentParser
p = ArgumentParser()
p.add_argument("--host", default="pzwiki.wdka.nl")
p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /")
p.add_argument("--category", "-c", nargs="*", default=[], action="append", help="category to query, use -c foo -c bar to intersect multiple categories")
args = p.parse_args()
print args
######## ########
# QUERY API # QUERY API
######## ########
@ -29,12 +36,16 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
######## ########
# CREATE INDEX # CREATE INDEX
######## ########
memberpages = api_pagesincategories('Graduation work', '2015') #list, containing dictionary of all pages ids. Example: [{u'ns': 0, u'pageid': 15974, u'title': u'Ahhhh'}, {u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}] memberpages=mw_cats(args)
print 'memberpages', memberpages
#memberpages = api_pagesincategories('Graduation work', '2015') #list, containing dictionary of all pages ids. Example: [{u'ns': 0, u'pageid': 15974, u'title': u'Ahhhh'}, {u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}]
#memberpages = [{u'ns': 0, u'pageid': 15982, u'title': u'The Aesthetics of Ethics'}] #memberpages = [{u'ns': 0, u'pageid': 15982, u'title': u'The Aesthetics of Ethics'}]
#memberpages = [{u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}] #memberpages = [{u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}]
#memberpages = [{u'ns': 0, u'pageid': 16007, u'title': u'U ntitled'}] #memberpages = [{u'ns': 0, u'pageid': 16007, u'title': u'U ntitled'}]
#memberpages = [{u'ns': 0, u'pageid': 15965, u'title': u'Qq'}] #memberpages = [{u'ns': 0, u'pageid': 15965, u'title': u'Qq'}]
print 'memberpages', memberpages ## output: memberpages [{u'ns': 0, u'pageid': 15982, u'title': u'The Aesthetics of Ethics'}]
######## ########
# Templates # Templates
@ -53,14 +64,14 @@ index_container = index_tree.find(".//div[@class='isotope']") #maybe id is impor
for member in memberpages: for member in memberpages:
#print member #print member
# download mw work page # download mw work page
pageid=member['pageid'] # pageid=member['pageid']
pagetitle=(member['title'].encode('utf-8')) # pagetitle=(member['title'].encode('utf-8'))
workpage_mw = api_page(pageid, 'content') workpage_mw = api_page(member, 'content')
# parse workpage_mw # parse workpage_mw
workpage_mw = replace_gallery(workpage_mw) workpage_mw = replace_gallery(workpage_mw)
workpage_mw = replace_video(workpage_mw) workpage_mw = replace_video(workpage_mw)
workdict = parse_work(pagetitle, workpage_mw) # create dictionary workpage_mw template workdict = parse_work(member, workpage_mw) # create dictionary workpage_mw template
for key in workdict.keys(): # convert Extra, Description, Bio to HTML for key in workdict.keys(): # convert Extra, Description, Bio to HTML
if key in ['Extra', 'Description', 'Bio'] and workdict[key]: if key in ['Extra', 'Description', 'Bio'] and workdict[key]:
@ -96,7 +107,7 @@ for member in memberpages:
workpage_html = ET.tostring(tree) workpage_html = ET.tostring(tree)
creator = workdict['Creator'].decode('ascii', 'ignore') creator = workdict['Creator'].decode('ascii', 'ignore')
creator = creator.replace(' ','_') creator = creator.replace(' ','_')
work_filename = 'web/{}-{}-{}.html'.format(workdict['Date'], creator, pageid) work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator)
work_file = open(work_filename, "w") work_file = open(work_filename, "w")
work_file.write(workpage_html) work_file.write(workpage_html)
work_file.close() work_file.close()

@ -102,7 +102,7 @@
<div class="item video flash"><img src="img/project_roel2.png"></div> <div class="item video flash"><img src="img/project_roel2.png"></div>
<div class="item narrative"><img src="img/project_andre.jpeg"></div--> <div class="item narrative"><img src="img/project_andre.jpeg"></div-->
<div class="item" data-creator="Artyom" data-date="2015" data-title="Artyom-graduation-work" id="Extra"><a class="work" href="2015-Artyom-16005.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/6/6a/Screen_Shot_2014-10-26_at_16.10.08.jpg/500px-Screen_Shot_2014-10-26_at_16.10.08.jpg"></a></div><div class="item" data-creator="Joseph Knierzinger" data-date="2015" data-title="User:Joak/graduation/catalog1" id="Extra"><a class="work" href="2015-Joseph_Knierzinger-15986.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Max Dovey" data-date="2015" data-title="User:Max Dovey/maxgradbio" id="Extra"><a class="work" href="2015-Max_Dovey-15999.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Lucia Dossin" data-date="2015" data-title="Mina" id="Extra"><a class="work" href="2015-Lucia_Dossin-16025.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/5/59/3legs.jpg"></a></div><div class="item" data-creator="Luther Blisset" data-date="2015" data-title="Qq" id="Extra"><a class="work" href="2015-Luther_Blisset-15965.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/8/85/Luther-blissett-300.jpg/500px-Luther-blissett-300.jpg"></a></div><div class="item" data-creator="Ana Luísa Moura" data-date="2015" data-title="The Aesthetics of Ethics" id="Extra"><a class="work" href="2015-Ana_Lusa_Moura-15982.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/b/bd/Collage_007_thumbnail.jpg"></a></div><div class="item" data-creator="Henk-Jelle de Groot" data-date="2015" data-title="U ntitled" id="Extra"><a class="work" href="2015-Henk-Jelle_de_Groot-16007.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/e/e7/9m4MBqRM1w-6.png"></a></div></div> <div class="item" data-creator="Max Dovey" data-date="2015" data-title="User:Max Dovey/maxgradbio" id="Extra"><a class="work" href="2015-Max_Dovey.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Joseph Knierzinger" data-date="2015" data-title="User:Joak/graduation/catalog1" id="Extra"><a class="work" href="2015-Joseph_Knierzinger.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Ana Luísa Moura" data-date="2015" data-title="The Aesthetics of Ethics" id="Extra"><a class="work" href="2015-Ana_Lusa_Moura.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/b/bd/Collage_007_thumbnail.jpg"></a></div><div class="item" data-creator="Henk-Jelle de Groot" data-date="2015" data-title="U ntitled" id="Extra"><a class="work" href="2015-Henk-Jelle_de_Groot.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/e/e7/9m4MBqRM1w-6.png"></a></div><div class="item" data-creator="Artyom" data-date="2015" data-title="Artyom-graduation-work" id="Extra"><a class="work" href="2015-Artyom.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/6/6a/Screen_Shot_2014-10-26_at_16.10.08.jpg/500px-Screen_Shot_2014-10-26_at_16.10.08.jpg"></a></div><div class="item" data-creator="Lucia Dossin" data-date="2015" data-title="Mina" id="Extra"><a class="work" href="2015-Lucia_Dossin.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/5/59/3legs.jpg"></a></div><div class="item" data-creator="Luther Blisset" data-date="2015" data-title="Qq" id="Extra"><a class="work" href="2015-Luther_Blisset.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/8/85/Luther-blissett-300.jpg/500px-Luther-blissett-300.jpg"></a></div></div>
</div> </div>
<div class="sidebarBorderLeft zwartArea" id="section03"> <div class="sidebarBorderLeft zwartArea" id="section03">

Loading…
Cancel
Save