From cd4982f1b295cc6f4460352e4aea8f7933f33208 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Mon, 16 Apr 2018 16:40:57 +0200 Subject: [PATCH] updated regexs for av material-using mw widgets --- mmdc_modules.py | 11 ++++++++--- mmdc_wiki2web.py | 36 ++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/mmdc_modules.py b/mmdc_modules.py index 823977d..04fa124 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -153,13 +153,18 @@ def replace_gallery(content): content = re.sub(gallery_exp, '\g<1>', content) #remove gallery wrapper return content -video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}') -vimeo_exp=re.compile('\{\{vimeo\|(.*?)\}\}') -youtube_exp=re.compile('\{\{youtube\|(.*?)\}\}') +video_exp=re.compile('{\{\#widget\:Html5media.*url\=\(.*?)\<\/a\>.*?\}\}') +vimeo_exp=re.compile('\{\{\#widget\:Vimeo\|id\=(.*?)\}\}') +youtube_exp=re.compile('{\{\#widget\:YouTube\|id\=(.*?)\}\}') def replace_video(content): + print '*** VIDEO ***' content = re.sub(vimeo_exp,"", content) content = re.sub(youtube_exp, "", content) + content = re.sub(video_exp, "", content) + + + return content img_exp=re.compile('^.*?\.(?:jpg|jpeg|JPG|JPEG|png|gif)') diff --git a/mmdc_wiki2web.py b/mmdc_wiki2web.py index beae1f7..7ccf003 100755 --- a/mmdc_wiki2web.py +++ b/mmdc_wiki2web.py @@ -2,8 +2,8 @@ # -*- coding: utf-8 -*- import xml.etree.ElementTree as ET -import html5lib, urllib, pprint -from mmdc_modules import pandoc2html, parse_work, write_html_file, mw_cats, mw_page_imgsurl, mw_img_url, mw_page_text, mwsite, mw_page_cats, mw_page, remove_cats, find_authors, replace_video, replace_img_a_tag, index_addwork, years +import html5lib, urllib, pprint, re +from mmdc_modules import pandoc2html, parse_work, write_html_file, mw_cats, mw_page_imgsurl, mw_img_url, mw_page_text, mwsite, mw_page_cats, mw_page, remove_cats, find_authors, replace_video, replace_img_a_tag, index_addwork, years, vimeo_exp from argparse import ArgumentParser from random import shuffle ###### REQUIRES ##### @@ -37,6 +37,10 @@ print 'args', args ###### # DEFS: create_pages create_index ###### + +test_exp=re.compile('\{\{\#widget\:Vimeo\|id\=(.*?)\}\}') + + def create_pages(memberpages, mode): indexdict = {} #parent dict: contains articledict instances for member in memberpages: @@ -45,17 +49,29 @@ def create_pages(memberpages, mode): articledict = parse_work(site, member, page_text) # create dictionary # Title, Creator, Date, Website, Thumbnail, Bio, Description, Extra if len(articledict['Creator'])>0 and len(articledict['Title'])>0 and len(articledict['Thumbnail'])>0: + for key in articledict.keys(): + print key + if key in ['Extra', 'Description', 'Bio']: + print '3 keys' articledict[key] = pandoc2html(articledict[key]) - elif key in ['Creator']: - articledict[key] = articledict[key].replace(',','' ) - elif key in ['Content']: - articledict[key] = remove_cats(articledict['Content']) - articledict[key] = replace_video(articledict['Content']) - + elif key is 'Creator': + print 'creator' + articledict[key] = articledict[key].replace(',','' ) + + if key is 'Extra': + print 'extra', type( articledict['Extra']) + articledict[key] = remove_cats(articledict['Extra']) + articledict[key] = replace_video(articledict['Extra']) +# found = re.findall(vimeo_exp, articledict['Extra']) +# print 'FOUND:', found + + + pprint.pprint( articledict) + articledict['Imgs'] = mw_page_imgsurl(site, page, articledict['Thumbnail'] ) - #pprint.pprint( articledict) + year = articledict['Date'] page_template = open("./work-{}-template.html".format(year), "r") # a template for each year page_tree = html5lib.parse(page_template, namespaceHTMLElements=False) @@ -174,7 +190,7 @@ for key in indexdict.keys(): # populate indexdict_byyear with works indexdict_byyear[ int(indexdict[key]['Date'])][key] = indexdict[key] #print '\n\n******* indexdict_byyear ***********\n\n' -pprint.pprint( indexdict_byyear ) +#pprint.pprint( indexdict_byyear ) for year in indexdict_byyear.keys(): # create index page for each year ie 2016.html print '\n***** ', year, ' *****\n'