From 9e3e0fb91f0dbd91ff2f0d2f9bd8295787e736ac Mon Sep 17 00:00:00 2001 From: Castro0o Date: Sun, 31 May 2015 21:51:46 +0200 Subject: [PATCH] figcaptions resolved --- mmdc_wiki2web.py | 121 ----------------------------------------------- 1 file changed, 121 deletions(-) delete mode 100755 mmdc_wiki2web.py diff --git a/mmdc_wiki2web.py b/mmdc_wiki2web.py deleted file mode 100755 index 4873437..0000000 --- a/mmdc_wiki2web.py +++ /dev/null @@ -1,121 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -import xml.etree.ElementTree as ET -import html5lib, pprint -from mmdc_modules import pandoc2html, parse_work, replace_gallery, replace_video, index_addwork, write_html_file, mw_cats, mw_imgsurl, mw_page_text, site -from argparse import ArgumentParser - - -p = ArgumentParser() -p.add_argument("--host", default="pzwiki.wdka.nl") -p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /") -p.add_argument("--category", "-c", nargs="*", default=[], action="append", help="category to query, use -c foo -c bar to intersect multiple categories") -args = p.parse_args() -print args - -######## -# Index -######## -memberpages=mw_cats(site, args) -#memberpages = [ u'The Aesthetics of Ethics']# -print 'memberpages', memberpages - -######## -# Templates -######## -page_template = open("web/page-template.html", "r") -index_template = open('web/index-template.html', 'r') -index_tree = html5lib.parse(index_template, namespaceHTMLElements=False) -index_container = index_tree.find(".//div[@class='isotope']") #maybe id is important, to destinguish it - -######## -# Create Page -######## -for member in memberpages: - workpage_mw = mw_page_text(site, member)#CONTENT - # print workpage_mw.encode("utf-8") - # print "** workpage_mw", type(workpage_mw) - workpage_mw = replace_gallery(workpage_mw) - workpage_mw = replace_video(workpage_mw) - workdict = parse_work(member, workpage_mw) # create dictionary w/ page content - workpage_imgs = mw_imgsurl(site, member) - - # print "***", type(workpage_mw), workdict - # for key in workdict: - # print type(workdict[key]), key, workdict[key] - print ' member', member - # only parse pages with Creator, Title, Thumbnail - if len(workdict['Creator'])>1 and len(workdict['Title'])>1 and len(workdict['Thumbnail'])>1: #and len(workdict['Description'])>1 - for key in workdict.keys(): # convert Extra, Description, Bio to HTML - if key in ['Extra', 'Description', 'Bio'] and workdict[key]: - workdict[key] = pandoc2html(workdict[key]) - elif key in ['Creator']: - workdict[key] = workdict[key].replace(',','' ) - - for key in workdict.keys(): - if type(workdict[key]) is unicode: - workdict[key]=workdict[key] - -# print workdict, type(workdict['Creator']) -# print workdict['Creator']#.decode('utf-8') - # create work page - page_tree = html5lib.parse(page_template, namespaceHTMLElements=False) - page_title = page_tree.find('.//title') - page_title.text='Title'#workdict['Title']#.decode('utf-8') - page_creator = page_tree.find('.//h2[@id="creator"]') - page_creator.text=(workdict['Creator']) - page_title_date = page_tree.find('.//p[@id="title"]') - page_title_date.text=u"{} {}".format(workdict['Title'], workdict['Date']) - page_description = page_tree.find('.//div[@id="description"]') - page_description_el = ET.fromstring('
'+workdict['Description'].encode('utf-8')+'
') - page_description.extend(page_description_el) - page_bio = page_tree.find('.//div[@id="bio"]') - page_bio_el = ET.fromstring('
'+workdict['Bio'].encode('utf-8')+'
') - page_bio.extend(page_bio_el) - page_sortArea_title = page_tree.find('.//div[@id="sortArea"]/p') - page_sortArea_title.text =workdict['Title'] - page_extra = page_tree.find('.//div[@id="extra"]') - page_extra_el = ET.fromstring('
'+workdict['Extra'].encode('utf-8')+'
') - page_extra.extend(page_extra_el) - page_website = page_tree.find('.//p[@class="hightlightSidebar"]/a') - page_website.set('href', workdict['Website']) - page_website.text=workdict['Website'] - page_thumb = page_tree.find('.//img[@id="thumbnail"]') - page_thumb.set('src', workdict['Thumbnail']) - - # give work page's imgs full url - imgs = page_tree.findall('.//img') - for img in imgs: - img_class = img.get('class') - if img_class != 'template': - src =unicode(img.get('src')) - for pair in workpage_imgs: - if src.replace("_", " ") in pair[0]:#if img in html matchs img in workpage_imgs - img.set('src', pair[1]) - - # save work page - creator = workdict['Creator'].encode('ascii', 'ignore') - creator = creator.replace(' ','_') - work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator) - write_html_file(page_tree, work_filename) - - ####### - # Insert Work to Index - ####### - index_addwork( parent=index_container, - workid=key, - href=work_filename.replace('web/',''), - title="workdict['Title']",#.decode('utf-8'), - creator="workdict['Creator']",#.decode('utf-8'), - date=workdict['Date'], - thumbnail=workdict['Thumbnail'] - ) - # print '----', workdict['Title'] - # print ET.tostring(tree) - -#print index_tree, type(index_tree) -write_html_file(index_tree, 'web/index.html') -print -print -