#! /usr/bin/env python
# -*- coding: utf-8 -*-
import xml . etree . ElementTree as ET
import html5lib , urllib , pprint
from mmdc_modules import pandoc2html , parse_work , write_html_file , mw_cats , mw_page_imgsurl , mw_img_url , mw_page_text , mwsite , mw_page_cats , mw_page , remove_cats , find_authors , replace_video , replace_img_a_tag , index_addwork
from argparse import ArgumentParser
from random import shuffle
#####
# Args
####
p = ArgumentParser ( )
p . add_argument ( " --host " , default = " pzwiki.wdka.nl " )
p . add_argument ( " --path " , default = " /mw-mediadesign/ " , help = " nb: should end with / " )
p . add_argument ( " --category " , " -c " , nargs = " * " , default = [ [ " 2016 " , " Graduation_work " ] ] , action = " append " , help = " category to query, use -c foo -c bar to intersect multiple categories " )
p . add_argument ( " --preview " , help = ' Preview page. Will override category querying. Use: --page " Name Of Wiki Page " ' )
args = p . parse_args ( )
print ' args ' , args
for i in args . category [ 0 ] :
if ' 20 ' in i :
year = i
print year
######
# DEFS: create_page create_index
######
def create_page ( memberpages , mode ) :
page_template = open ( " {} -template.html " . format ( year ) , " r " )
indexdict = { } #parent dict: contains articledict instances
for member in memberpages :
print member
page = mw_page ( site , member )
page_text = mw_page_text ( site , page )
articledict = parse_work ( site , member , page_text ) # create dictionary
# Title, Creator, Date, Website, Thumbnail, Bio, Description, Extra
if len ( articledict [ ' Creator ' ] ) > 0 and len ( articledict [ ' Title ' ] ) > 0 and len ( articledict [ ' Thumbnail ' ] ) > 0 :
for key in articledict . keys ( ) :
if key in [ ' Extra ' , ' Description ' , ' Bio ' ] :
articledict [ key ] = pandoc2html ( articledict [ key ] )
elif key in [ ' Creator ' ] :
articledict [ key ] = articledict [ key ] . replace ( ' , ' , ' ' )
elif key in [ ' Content ' ] :
articledict [ key ] = remove_cats ( articledict [ ' Content ' ] )
articledict [ key ] = replace_video ( articledict [ ' Content ' ] )
articledict [ ' Imgs ' ] = mw_page_imgsurl ( site , page , articledict [ ' Thumbnail ' ] )
page_tree = html5lib . parse ( page_template , namespaceHTMLElements = False )
page_title = page_tree . find ( ' .//title ' )
page_title . text = articledict [ ' Title ' ] #.decode('utf-8')
page_creator = page_tree . find ( ' .//h2[@id= " creator " ] ' )
page_creator . text = ( articledict [ ' Creator ' ] )
page_title_date = page_tree . find ( ' .//p[@id= " title " ] ' )
page_title_date . text = u " {} {} " . format ( articledict [ ' Title ' ] , articledict [ ' Date ' ] )
page_description = page_tree . find ( ' .//div[@id= " description " ] ' )
page_description_el = ET . fromstring ( ' <div> ' + articledict [ ' Description ' ] . encode ( ' utf-8 ' ) + ' </div> ' )
page_description . extend ( page_description_el )
page_bio = page_tree . find ( ' .//div[@id= " bio " ] ' )
page_bio_el = ET . fromstring ( ' <div> ' + articledict [ ' Bio ' ] . encode ( ' utf-8 ' ) + ' </div> ' )
page_bio . extend ( page_bio_el )
page_sortArea_title = page_tree . find ( ' .//div[@id= " sortArea " ]/p ' )
page_sortArea_title . text = articledict [ ' Title ' ]
page_extra = page_tree . find ( ' .//div[@id= " extra " ] ' )
page_extra_el = ET . fromstring ( ' <div> ' + articledict [ ' Extra ' ] . encode ( ' utf-8 ' ) + ' </div> ' )
page_extra . extend ( page_extra_el )
page_website = page_tree . find ( ' .//p[@class= " hightlightSidebar " ]/a ' )
page_website . set ( ' href ' , articledict [ ' Website ' ] )
page_website . text = articledict [ ' Website ' ]
page_thumb = page_tree . find ( ' .//img[@id= " thumbnail " ] ' )
page_thumb . set ( ' src ' , articledict [ ' Thumbnail ' ] )
figures = page_tree . findall ( ' .//figure ' )
for figure in figures :
img = figure . find ( ' .//img ' )
figcaption = figure . find ( ' .//figcaption ' )
img_src = img . get ( ' src ' )
figcaption_text = figcaption . text
if figcaption_text == img_src : # remove figcation if == src
figure . remove ( figcaption )
src = ( ( ' File: ' + img_src ) . capitalize ( ) ) . decode ( ' utf-8 ' )
if src in articledict [ ' Imgs ' ] . keys ( ) : #full-url
url = ( articledict [ ' Imgs ' ] [ src ] ) . replace ( ' http ' , ' https ' )
print url
img . set ( ' src ' , url )
# save work page
creator = articledict [ ' Creator ' ] . encode ( ' ascii ' , ' ignore ' )
creator = creator . replace ( ' ' , ' _ ' )
work_filename = ' web/works/ {} - {} .html ' . format ( year , creator )
write_html_file ( page_tree , work_filename )
articledict [ ' Path ' ] = work_filename [ 4 : ]
indexdict [ articledict [ ' Title ' ] ] = articledict
return indexdict
def create_index ( indexdict ) :
index_template = open ( " index-template- {} .html " . format ( year ) , " r " )
index_tree = html5lib . parse ( index_template , namespaceHTMLElements = False )
index_container = index_tree . find ( " .//div[@class= ' isotope ' ] " ) #maybe id is imp
keys = indexdict . keys ( )
shuffle ( keys )
for key in keys :
index_addwork ( parent = index_container ,
workid = key ,
href = indexdict [ key ] [ ' Path ' ] ,
title = indexdict [ key ] [ ' Title ' ] , #.decode('utf-8'),
creator = indexdict [ key ] [ ' Creator ' ] ,
date = indexdict [ key ] [ ' Date ' ] ,
thumbnail = ( indexdict [ key ] [ ' Thumbnail ' ] ) . replace ( ' http ' , ' https ' )
)
print ' ---- ' , indexdict [ key ] [ ' Title ' ] , indexdict [ key ] [ ' Path ' ]
write_html_file ( index_tree , ' web/ {} .html ' . format ( ' index ' ) )
#####
# ACTION
#####
site = mwsite ( args . host , args . path )
if args . preview is not None :
print " ** Page Preview Mode** "
memberpages = [ ( args . preview ) . decode ( ' utf-8 ' ) ]
print ' memberpages: ' , memberpages
create_page ( memberpages , ' preview ' )
else :
print " ** New Index Mode ** "
memberpages = mw_cats ( site , args )
# memberpages
print ' memberpages: ' , memberpages
indexdict = create_page ( memberpages , ' index ' )
create_index ( indexdict )