diff --git a/mmdc_modules.py b/mmdc_modules.py index 11cee97..a4b4cae 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -8,6 +8,8 @@ sid = '1234' useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101" endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" + + # API MODULES def api_request(action, pagename): #get page: content, metadata, images, imageifnp print 'API REQUEST' @@ -157,20 +159,38 @@ def img_fullurl(parent): img.set('src', fullurl) # fileurl = api_request(src, endpoint)# find url of file + + + +gallery_exp=re.compile('.*?', re.S) +imgfile_exp=re.compile('(File:(.*?\.(gif|jpg|jpeg|png)))')# (?=File:|<\/gallery>)') +img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I) +video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}') + def replace_gallery(content): gallery_imgs = [] gallery_found = re.findall(gallery_exp, content) - content = re.sub(gallery_exp, '', content) for gallery in gallery_found: # in case there is more than 1 - allfiles =re.findall(img_exp, gallery) + allfiles =re.findall(imgfile_exp, gallery) + print 'ALLFILES', allfiles for imgfile in allfiles: imgfile = imgfile[1] - imgsrc = api_file_url(imgfile) # search for original image + #imgsrc = imgfile + imgsrc = api_file_url(imgfile) # seaarch for original image + newimg = '' gallery_imgs.append(imgsrc) - print 'gallery_imgs', gallery_imgs - # from .* imgs, return list of img ET elements - # replace .* with '' - return content, gallery_imgs + print 'GALLERY_IMGS', gallery_imgs + + ungallery_imgs = " ".join(gallery_imgs) + print 'ungallery_imgs', ungallery_imgs + + content = re.sub(gallery_exp, ungallery_imgs, content) + print 'images content', content + + ## BUG: Images are being replaced as + # EG: File:Mb-WordNet-tour-version2-08.png File:Labanotation1.jpg + + return content def replace_video(content): videos = [] @@ -185,12 +205,13 @@ def replace_video(content): video_src="https://player.vimeo.com/video/" + video_hash if video_src: videos.append(video_src) - iframe = "".format(video_src) -# content = re.sub(video_exp, ' iframe ', content) + iframe = "".format(video_src) + content = re.sub(video_exp, iframe, content) else: content = re.sub(video_exp, '', content) + return content +## Video Replacement: problem with video: iframe is placed inside

. It shouldn't - - +## replace gallery - not yet there diff --git a/prototype_page.py b/prototype_page.py index d61b989..b7985c7 100755 --- a/prototype_page.py +++ b/prototype_page.py @@ -15,22 +15,24 @@ import xml.etree.ElementTree as ET import html5lib, re, pprint -from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video - -gallery_exp=re.compile('.*?') -file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)') -img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I) -video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}') - +from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp template = open("web/page-template.html", "r") template = template.read() # download -pageid='16025'#'15965'#Qq #'15986'Jozeph #'16025'Mina -work = 'Mina'#'User:Joak/graduation/catalog1' +pageid='15965'#Qq #'16025' #'15986'Jozeph #'16025'Mina +work = 'Q' #'Mina'#'User:Joak/graduation/catalog1' workpage_mw = api_page(pageid, 'content') +print '------------------- workpage_mw' +if re.search(gallery_exp, workpage_mw): + print 'FOUND GALLERY' + workpage_mw = replace_gallery(workpage_mw) +if re.search(video_exp, workpage_mw): + workpage_mw = replace_video(workpage_mw) + print 'FOUND VIDEO' +''' # parsing workpage_mw workdict = parse_work(work, workpage_mw) for key in workdict.keys(): @@ -45,7 +47,7 @@ imgs = tree.findall('.//img') for img in imgs: src = img.get('src') newsrc = api_file_url(src) - print 'new src', newsrc +# print 'new src', newsrc if newsrc: img.set('src', newsrc) #print 'IMG', ET.tostring(img) @@ -53,7 +55,7 @@ for img in imgs: workpage_html = ET.tostring(tree) -print 'TREE', workpage_html +#print 'TREE', workpage_html # # save @@ -62,3 +64,4 @@ work_file = open(work_filename, "w") work_file.write(workpage_html) work_file.close() +''' diff --git a/web/work-template.html b/web/work-template.html deleted file mode 100644 index f719f01..0000000 --- a/web/work-template.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - $title$ - - - -

-
- $body$ - -
- -