incorporated mw categories

10 years ago · 1b1933aece
parent c69c2380e0
commit 1b1933aece
8 changed files with 46 additions and 16 deletions
--- a/mmdc_modules.py
+++ b/mmdc_modules.py
@ -4,6 +4,7 @@
 import urllib2, json, pprint, re
 import xml.etree.ElementTree as ET
 import subprocess, shlex, urllib
+from mwclient import Site
 sid = '1234'
 useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101"
 endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
@ -22,18 +23,18 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif
    page_content = json_dic.get(page_id)
    return page_content

-def api_page(pageid, query):
+def api_page(title, query):
    if query == 'content':
-        api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid)
+        api_response = api_request('action=query&titles={}&prop=revisions&rvprop=content', title)
        response = ((api_response.get('revisions'))[0])['*']
    elif query == 'metadata':
-        response = api_request('action=query&pageids={}&prop=info', pageid)
+        response = api_request('action=query&titles={}&prop=info', title)
    elif query == 'articleimgs':
-        response = api_request('action=query&pageids={}&prop=images', pageid)
+        response = api_request('action=query&titles={}&prop=images', title)
    elif query == 'file':
-        response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid)
+        response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',title)
    elif query == 'imageinfo':
-        pagename = pageid # in imageinfo titles are used instead of id 
+        pagename = title # in imageinfo titles are used instead of id 
        print 'IMAGEINFO', pagename
        response = api_request("action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500", pagename)  # iiurlwidht dermines with of thumbnail
    return response
@ -43,6 +44,24 @@ def api_page(pageid, query):
 ################ 
 # * MUST BE REPLACE BY SMARTER CODE (USING PY MD LIB)
 ##############################
+def mw_cats(args):
+    site = Site(args.host, path=args.path)
+    last_names = None
+    for cats in args.category:
+            for ci, cname in enumerate(cats):
+                    cat = site.Categories[cname]
+                    pages = list(cat.members())
+                    # for p in pages:
+                    # 	pages_by_name[p.name] = p
+                    if last_names == None:
+                            results = pages
+                    else:
+                            results = [p for p in pages if p.name in last_names]                
+                    last_names = set([p.name for p in pages])
+            results = list(results)
+    return [p.name  for p in results]
+
+
 def api_pagecategories(pageid):
    '''Find all the categories, and their parent category of a page '''
    query = 'action=query&pageids={}&prop=categories'.format(pageid)
--- a/prototype_page.py
+++ b/prototype_page.py
@ -17,8 +17,15 @@

 import xml.etree.ElementTree as ET
 import html5lib, re, pprint
-from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file
-
+from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file, mw_cats
+from argparse import ArgumentParser
+
+p = ArgumentParser()
+p.add_argument("--host", default="pzwiki.wdka.nl")
+p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /")
+p.add_argument("--category", "-c", nargs="*", default=[], action="append", help="category to query, use -c foo -c bar to intersect multiple categories")
+args = p.parse_args()
+print args
 ########
 # QUERY API
 ########
@ -29,12 +36,16 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
 ########
 # CREATE INDEX
 ########
-memberpages = api_pagesincategories('Graduation work', '2015') #list, containing dictionary of all pages ids. Example: [{u'ns': 0, u'pageid': 15974, u'title': u'Ahhhh'}, {u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}]
+memberpages=mw_cats(args)
+print 'memberpages', memberpages
+
+#memberpages = api_pagesincategories('Graduation work', '2015') #list, containing dictionary of all pages ids. Example: [{u'ns': 0, u'pageid': 15974, u'title': u'Ahhhh'}, {u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}]
 #memberpages = [{u'ns': 0, u'pageid': 15982, u'title': u'The Aesthetics of Ethics'}]
 #memberpages = [{u'ns': 0, u'pageid': 16005, u'title': u'Artyom-graduation-work'}]
 #memberpages =   [{u'ns': 0, u'pageid': 16007, u'title': u'U ntitled'}]
 #memberpages = [{u'ns': 0, u'pageid': 15965, u'title': u'Qq'}]
-print 'memberpages', memberpages
+## output: memberpages [{u'ns': 0, u'pageid': 15982, u'title': u'The Aesthetics of Ethics'}]
+

 ########
 # Templates
@ -53,14 +64,14 @@ index_container = index_tree.find(".//div[@class='isotope']") #maybe id is impor
 for member in memberpages:
    #print member
    # download mw work page
-    pageid=member['pageid']
-    pagetitle=(member['title'].encode('utf-8'))
-    workpage_mw = api_page(pageid, 'content')
+#    pageid=member['pageid']
+#    pagetitle=(member['title'].encode('utf-8'))
+    workpage_mw = api_page(member, 'content')

    # parse workpage_mw
    workpage_mw = replace_gallery(workpage_mw)
    workpage_mw = replace_video(workpage_mw)
-    workdict = parse_work(pagetitle, workpage_mw) # create dictionary workpage_mw template
+    workdict = parse_work(member, workpage_mw) # create dictionary workpage_mw template
    
    for key in workdict.keys(): # convert Extra, Description, Bio to HTML
        if key in ['Extra', 'Description', 'Bio'] and workdict[key]:
@ -96,7 +107,7 @@ for member in memberpages:
    workpage_html = ET.tostring(tree)
    creator = workdict['Creator'].decode('ascii', 'ignore')
    creator = creator.replace(' ','_')
-    work_filename = 'web/{}-{}-{}.html'.format(workdict['Date'], creator, pageid)
+    work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator)
    work_file = open(work_filename, "w")
    work_file.write(workpage_html)
    work_file.close()
--- a/web/2015-Artyom-16005.html
+++ b/web/2015-Artyom-16005.html
--- a/web/2015-Henk-Jelle_de_Groot-16007.html
+++ b/web/2015-Henk-Jelle_de_Groot-16007.html
--- a/web/2015-Lucia_Dossin-16025.html
+++ b/web/2015-Lucia_Dossin-16025.html
--- a/web/2015-Luther_Blisset-15965.html
+++ b/web/2015-Luther_Blisset-15965.html
--- a/web/2015-Max_Dovey-15999.html
+++ b/web/2015-Max_Dovey-15999.html
--- a/web/index.html
+++ b/web/index.html
@ -102,7 +102,7 @@
 	  &lt;div class="item video flash"&gt;&lt;img src="img/project_roel2.png"&gt;&lt;/div&gt;
 	  &lt;div class="item narrative"&gt;&lt;img src="img/project_andre.jpeg"&gt;&lt;/div-->

-	<div class="item" data-creator="Artyom" data-date="2015" data-title="Artyom-graduation-work" id="Extra"><a class="work" href="2015-Artyom-16005.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/6/6a/Screen_Shot_2014-10-26_at_16.10.08.jpg/500px-Screen_Shot_2014-10-26_at_16.10.08.jpg"></a></div><div class="item" data-creator="Joseph Knierzinger" data-date="2015" data-title="User:Joak/graduation/catalog1" id="Extra"><a class="work" href="2015-Joseph_Knierzinger-15986.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Max Dovey" data-date="2015" data-title="User:Max Dovey/maxgradbio" id="Extra"><a class="work" href="2015-Max_Dovey-15999.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Lucia Dossin" data-date="2015" data-title="Mina" id="Extra"><a class="work" href="2015-Lucia_Dossin-16025.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/5/59/3legs.jpg"></a></div><div class="item" data-creator="Luther Blisset" data-date="2015" data-title="Qq" id="Extra"><a class="work" href="2015-Luther_Blisset-15965.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/8/85/Luther-blissett-300.jpg/500px-Luther-blissett-300.jpg"></a></div><div class="item" data-creator="Ana Luísa Moura" data-date="2015" data-title="The Aesthetics of Ethics" id="Extra"><a class="work" href="2015-Ana_Lusa_Moura-15982.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/b/bd/Collage_007_thumbnail.jpg"></a></div><div class="item" data-creator="Henk-Jelle de Groot" data-date="2015" data-title="U ntitled" id="Extra"><a class="work" href="2015-Henk-Jelle_de_Groot-16007.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/e/e7/9m4MBqRM1w-6.png"></a></div></div>
+	<div class="item" data-creator="Max Dovey" data-date="2015" data-title="User:Max Dovey/maxgradbio" id="Extra"><a class="work" href="2015-Max_Dovey.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Joseph Knierzinger" data-date="2015" data-title="User:Joak/graduation/catalog1" id="Extra"><a class="work" href="2015-Joseph_Knierzinger.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a></div><div class="item" data-creator="Ana Luísa Moura" data-date="2015" data-title="The Aesthetics of Ethics" id="Extra"><a class="work" href="2015-Ana_Lusa_Moura.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/b/bd/Collage_007_thumbnail.jpg"></a></div><div class="item" data-creator="Henk-Jelle de Groot" data-date="2015" data-title="U ntitled" id="Extra"><a class="work" href="2015-Henk-Jelle_de_Groot.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/e/e7/9m4MBqRM1w-6.png"></a></div><div class="item" data-creator="Artyom" data-date="2015" data-title="Artyom-graduation-work" id="Extra"><a class="work" href="2015-Artyom.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/6/6a/Screen_Shot_2014-10-26_at_16.10.08.jpg/500px-Screen_Shot_2014-10-26_at_16.10.08.jpg"></a></div><div class="item" data-creator="Lucia Dossin" data-date="2015" data-title="Mina" id="Extra"><a class="work" href="2015-Lucia_Dossin.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/5/59/3legs.jpg"></a></div><div class="item" data-creator="Luther Blisset" data-date="2015" data-title="Qq" id="Extra"><a class="work" href="2015-Luther_Blisset.html"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/8/85/Luther-blissett-300.jpg/500px-Luther-blissett-300.jpg"></a></div></div>
      </div>

      <div class="sidebarBorderLeft zwartArea" id="section03">