#! /usr/bin/env python # -*- coding: utf-8 -*- import xml.etree.ElementTree as ET import html5lib, pprint from mmdc_modules import pandoc2html, parse_work, replace_gallery, replace_video, index_addwork, write_html_file, mw_cats, mw_imgsurl, site, mw_page_text from argparse import ArgumentParser from mwclient import Site p = ArgumentParser() p.add_argument("--host", default="pzwiki.wdka.nl") p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /") p.add_argument("--category", "-c", nargs="*", default=[], action="append", help="category to query, use -c foo -c bar to intersect multiple categories") args = p.parse_args() print args ######## # Index ######## memberpages=mw_cats(site, args) #memberpages['Ctrl-F Reader','As We Speak'] print 'memberpages', memberpages ######## # Templates ######## page_template = open("web/page-template.html", "r") index_file = open('web/index-template.html', 'r') index_tree = html5lib.parse(index_file, namespaceHTMLElements=False) index_container = index_tree.find(".//div[@class='isotope']") #maybe id is important, to destinguish it ######## # Create Page ######## for member in memberpages: print ' member', member workpage_mw = mw_page_text(site, member)#CONTENT workpage_mw = replace_gallery(workpage_mw) workpage_mw = replace_video(workpage_mw) workdict = parse_work(member, workpage_mw) # create dictionary w/ page content workpage_imgs = mw_imgsurl(site, member) # only parse pages with Creator, Title, Thumbnail if len(workdict['Creator'])>1 and len(workdict['Title'])>1 and len(workdict['Description'])>1 and len(workdict['Thumbnail'])>1: for key in workdict.keys(): # convert Extra, Description, Bio to HTML if key in ['Extra', 'Description', 'Bio'] and workdict[key]: workdict[key] = pandoc2html( (workdict[key].decode('utf-8'))) elif key in ['Creator']: workdict[key] = workdict[key].replace(',','' ) for key in workdict.keys(): if type(workdict[key]) is unicode: workdict[key]=workdict[key].encode('utf-8') # create work page page_tree = html5lib.parse(page_template, namespaceHTMLElements=False) page_title = page_tree.find('.//title') page_title.text=workdict['Title'] page_creator = page_tree.find('.//h2[@id="creator"]') page_creator.text=workdict['Creator'] page_title_date = page_tree.find('.//p[@id="title"]') page_title_date.text="{} {}".format(workdict['Title'], workdict['Date']) page_description = page_tree.find('.//div[@id="description"]') page_description_el = ET.fromstring('