From b629c6e33bed99e6b4c803564e3330125e3799d5 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Sun, 31 May 2015 21:29:33 +0200 Subject: [PATCH] operational --- README.md | 11 -------- mmdc_modules.py | 18 ++++++------- mmdc_x.py | 70 +++++++++---------------------------------------- 3 files changed, 21 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index a8550d3..14c4771 100644 --- a/README.md +++ b/README.md @@ -12,18 +12,7 @@ Or index all the gaduation works: `python mmdc_wiki2web.py --category Graduation_work` -## Thumbnails -thumbnails in work pages are hidden. this can be changed. - -style_projectpage.css: - -`#thumnail { -display: none; -}` - - ## To Do - * remove thumbnail from page_imgs diff --git a/mmdc_modules.py b/mmdc_modules.py index efe280e..3f49c9f 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -48,13 +48,16 @@ def mw_page_cats(site, page): return cats -def mw_page_imgsurl(site, page): +def mw_page_imgsurl(site, page, thumb): #all the imgs in a page + #except thumb: if thumb: remove #returns list of tuples (img.name, img.fullurl) imgs = page.images() - imgs = list(imgs) - urls = { img.name: (img.imageinfo)['url'] for img in imgs} - return urls + imgs = list(imgs) + imgs_dict = { img.name:(img.imageinfo)['url'] for img in imgs if (img.imageinfo)['url'] != thumb } # exclude thumb + imgs_dict = { key.capitalize():value for key, value in imgs_dict.items()} + # capilatize image name, so it can be called later + return imgs_dict def mw_img_url(site, img): #find full of an img @@ -77,14 +80,12 @@ def write_html_file(html_tree, filename): edited.close() def parse_work(site, title, content): -# print title, content workdict = {'Title':title, 'Creator':u'', 'Date':u'', 'Website':u'', 'Thumbnail':u'', 'Bio':u'', 'Description':u'', 'Extra':u''} + if re.match(u'\{\{\Graduation work', content): template, extra = (re.findall(u'\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0] if extra: - workdict['Extra'] = extra - # template's key/value pair - # Note:Extra value is NOT CAPTURED by this regex + workdict['Extra'] = extra keyval = re.findall(u'\|(.*?)\=(.*?\n)', template, re.DOTALL) for pair in keyval: key = pair[0] @@ -96,7 +97,6 @@ def parse_work(site, title, content): elif 'Website' in key: val = urllib.unquote( val) workdict[key]=val -# pprint.pprint(workdict) return workdict def pandoc2html(mw_content): diff --git a/mmdc_x.py b/mmdc_x.py index f27ee89..227f39b 100755 --- a/mmdc_x.py +++ b/mmdc_x.py @@ -4,7 +4,6 @@ import xml.etree.ElementTree as ET import html5lib, urllib, pprint from mmdc_modules import pandoc2html, parse_work, write_html_file, mw_cats, mw_page_imgsurl, mw_img_url, mw_page_text, mwsite, mw_page_cats, mw_page, remove_cats, find_authors, replace_video, replace_img_a_tag, index_addwork -# unsued from bs_modules: replace_gallery, replace_video, index_addwork, from argparse import ArgumentParser from random import shuffle as shuffle @@ -17,9 +16,7 @@ p.add_argument("--host", default="pzwiki.wdka.nl") p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /") p.add_argument("--category", "-c", nargs="*", default=[["2015", "Graduation_work"]], action="append", help="category to query, use -c foo -c bar to intersect multiple categories") p.add_argument("--preview", help='Preview page. Will override category querying. Use: --page "Name Of Wiki Page"') - args = p.parse_args() - print 'args', args ###### @@ -32,7 +29,8 @@ def create_page(memberpages, mode): print member page = mw_page(site, member) page_text = mw_page_text(site, page) - articledict = parse_work(site, member, page_text) # create dictionary w/ page co + articledict = parse_work(site, member, page_text) # create dictionary + # Title, Creator, Date, Website, Thumbnail, Bio, Description, Extra if len(articledict['Creator'])>0 and len(articledict['Title'])>0 and len(articledict['Thumbnail'])>0: for key in articledict.keys():# convert Extra, Description, Bio to HTML if key in ['Extra', 'Description', 'Bio']: @@ -43,12 +41,8 @@ def create_page(memberpages, mode): articledict[key] = remove_cats(articledict['Content']) articledict[key] = replace_video(articledict['Content']) - -# page_imgs = mw_page_imgsurl(site, page) - # page_imgs = { key.capitalize():value for key, value in page_imgs.items()} # capatalize keys, so can be called later - # #articledict = {'Title': member, 'Content': page_text, 'Categories':page_cats, 'Images': page_imgs} - - + articledict['Imgs'] = mw_page_imgsurl(site, page, articledict['Thumbnail'] ) + page_tree = html5lib.parse(page_template, namespaceHTMLElements=False) page_title = page_tree.find('.//title') page_title.text=articledict['Title']#.decode('utf-8') @@ -69,19 +63,17 @@ def create_page(memberpages, mode): page_extra.extend(page_extra_el) page_website = page_tree.find('.//p[@class="hightlightSidebar"]/a') page_website.set('href', articledict['Website']) - page_website.text=articledict['Website'] + page_website.text=articledict['Website'] page_thumb = page_tree.find('.//img[@id="thumbnail"]') page_thumb.set('src', articledict['Thumbnail']) # give work page's imgs full url - imgs = page_tree.findall('.//img') - # for img in imgs: - # img_class = img.get('class') - # if img_class != 'template': - # src =unicode(img.get('src')) - # for pair in workpage_imgs: - # if src.replace("_", " ") in pair[0]:#if img in html matchs img in workpage_imgs - # img.set('src', pair[1]) + imgs = page_tree.findall('.//img') + for img in imgs: #replace src: full url + src = (('File:'+img.get('src')).capitalize()).decode('utf-8') + if src in articledict['Imgs'].keys(): + url = articledict['Imgs'][src] + img.set('src', url) # save work page creator = articledict['Creator'].encode('ascii', 'ignore') @@ -98,7 +90,6 @@ def create_index(indexdict): index_tree = html5lib.parse(index_template, namespaceHTMLElements=False) index_container = index_tree.find(".//div[@class='isotope']") #maybe id is imp for key in indexdict.keys(): - print 'key', key index_addwork( parent=index_container, workid=key, href=indexdict[key]['Path'], @@ -108,47 +99,9 @@ def create_index(indexdict): thumbnail=indexdict[key]['Thumbnail'] ) print '----', indexdict[key]['Title'],indexdict[key]['Path'] - # print ET.tostring(tree) - -#print index_tree, type(index_tree) write_html_file(index_tree, 'web/index.html') - # authors = indexdict[article]['Authors'] - # path = indexdict[article]['Path'] - # issue = indexdict[article]['Category Issue'] - # section = indexdict[article]['Category Section'] - # topics = indexdict[article]['Category Topics'] - # images = indexdict[article]['Images'] - # index_section = index_tree.find('.//ul[@id="section_{}"]'.format(section.encode('utf-8'))) - # index_item = ET.SubElement(index_section, 'li', - # attrib={'class': " ".join(topics)+" "+section, - # 'data-name': article, - # 'data-section':section, - # 'data-categories': " ".join(topics)+" "+section - # }) - # article_link = ET.SubElement(index_item, 'a', attrib={'href':urllib.quote(path)}) - # article_link.text = article - # article_author = ET.SubElement(index_item, 'p', attrib={'class':'authorTitle'}) - # article_author.text = authors - - # for imgurl in images.values(): - # print 'imgurl', imgurl - # index_img_item = ET.SubElement(index_imgs_section, 'li', - # attrib={'class': " ".join(topics)+" "+section, - # 'data-name': article, - # 'data-section':section, - # 'data-categories': " ".join(topics)+" "+section, - # 'style':'position: absolute; left: 0px; top: 0px;' - # }) - # article_img_link = ET.SubElement(index_img_item, 'a', attrib={'href':urllib.quote(path)}) - # article_img_img = ET.SubElement(article_img_link, 'img', attrib={'src':imgurl}) - # title=index_tree.find('.//title') - # title.text = 'Beyond Social: ' + issue_current - # index_filename = 'index.html' - # write_html_file(index_tree, index_filename) - - ##### # ACTION ##### @@ -163,6 +116,7 @@ if args.preview is not None: else: print "** New Index Mode **" memberpages=mw_cats(site, args) + #memberpages=[u'Unintended Images'] shuffle(memberpages) print 'memberpages:', memberpages indexdict = create_page(memberpages, 'index')