solved video; working on gallery

master
Castro0o 9 years ago
parent 3192565601
commit 2caa4afccf

@ -8,6 +8,8 @@ sid = '1234'
useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101"
endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
# API MODULES
def api_request(action, pagename): #get page: content, metadata, images, imageifnp
print 'API REQUEST'
@ -157,20 +159,38 @@ def img_fullurl(parent):
img.set('src', fullurl)
# fileurl = api_request(src, endpoint)# find url of file
gallery_exp=re.compile('<gallery>.*?</gallery>', re.S)
imgfile_exp=re.compile('(File:(.*?\.(gif|jpg|jpeg|png)))')# (?=File:|<\/gallery>)')
img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')
def replace_gallery(content):
gallery_imgs = []
gallery_found = re.findall(gallery_exp, content)
content = re.sub(gallery_exp, '', content)
for gallery in gallery_found: # in case there is more than 1 <gallery>
allfiles =re.findall(img_exp, gallery)
allfiles =re.findall(imgfile_exp, gallery)
print 'ALLFILES', allfiles
for imgfile in allfiles:
imgfile = imgfile[1]
imgsrc = api_file_url(imgfile) # search for original image
#imgsrc = imgfile
imgsrc = api_file_url(imgfile) # seaarch for original image
newimg = ''
gallery_imgs.append(imgsrc)
print 'gallery_imgs', gallery_imgs
# from <gallery>.*</gallery> imgs, return list of img ET elements
# replace <gallery>.*</gallery> with ''
return content, gallery_imgs
print 'GALLERY_IMGS', gallery_imgs
ungallery_imgs = " ".join(gallery_imgs)
print 'ungallery_imgs', ungallery_imgs
content = re.sub(gallery_exp, ungallery_imgs, content)
print 'images content', content
## BUG: Images are being replaced as <a>
# EG: <a class="uri" href="File:Mb-WordNet-tour-version2-08.png">File:Mb-WordNet-tour-version2-08.png</a> <a class="uri" href="File:Labanotation1.jpg">File:Labanotation1.jpg</a>
return content
def replace_video(content):
videos = []
@ -185,12 +205,13 @@ def replace_video(content):
video_src="https://player.vimeo.com/video/" + video_hash
if video_src:
videos.append(video_src)
iframe = "<iframe src='{}' width='600px' height='450px'></iframe>".format(video_src)
# content = re.sub(video_exp, ' iframe ', content)
iframe = "<iframe src='{}' width='600px' height='450px'> </iframe>".format(video_src)
content = re.sub(video_exp, iframe, content)
else:
content = re.sub(video_exp, '', content)
return content
## Video Replacement: problem with video: iframe is placed inside <p> . It shouldn't
## replace gallery - not yet there

@ -15,22 +15,24 @@
import xml.etree.ElementTree as ET
import html5lib, re, pprint
from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video
gallery_exp=re.compile('<gallery>.*?</gallery>')
file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')
from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp
template = open("web/page-template.html", "r")
template = template.read()
# download
pageid='16025'#'15965'#Qq #'15986'Jozeph #'16025'Mina
work = 'Mina'#'User:Joak/graduation/catalog1'
pageid='15965'#Qq #'16025' #'15986'Jozeph #'16025'Mina
work = 'Q' #'Mina'#'User:Joak/graduation/catalog1'
workpage_mw = api_page(pageid, 'content')
print '------------------- workpage_mw'
if re.search(gallery_exp, workpage_mw):
print 'FOUND GALLERY'
workpage_mw = replace_gallery(workpage_mw)
if re.search(video_exp, workpage_mw):
workpage_mw = replace_video(workpage_mw)
print 'FOUND VIDEO'
'''
# parsing workpage_mw
workdict = parse_work(work, workpage_mw)
for key in workdict.keys():
@ -45,7 +47,7 @@ imgs = tree.findall('.//img')
for img in imgs:
src = img.get('src')
newsrc = api_file_url(src)
print 'new src', newsrc
# print 'new src', newsrc
if newsrc:
img.set('src', newsrc)
#print 'IMG', ET.tostring(img)
@ -53,7 +55,7 @@ for img in imgs:
workpage_html = ET.tostring(tree)
print 'TREE', workpage_html
#print 'TREE', workpage_html
# # save
@ -62,3 +64,4 @@ work_file = open(work_filename, "w")
work_file.write(workpage_html)
work_file.close()
'''

@ -1,23 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>$title$</title> <!-- Work title will go to here -->
<link rel="stylesheet" href="css/style.css">
</head>
<body>
<div id="header">
<div id="title"><h1>$title$</h1></div>
<div id="creator"><h2>$creator$</h2></div>
<div id="date">$date$</div>
<div id="website"><a href="$website$">$website$</a></div>
<div id="thumbnail"><img src="$thumnail$" class="" alt="" /></div>
<div id="bio">$bio$</div>
</div>
<div id="body">
$body$
<!--div id="description">$description$</div>
<div id="extra">$extra$</div-->
</div>
</body>
</html>
Loading…
Cancel
Save