video to iframes

master
Castro0o 10 years ago
parent 4e5303110c
commit c80cc114b6

File diff suppressed because one or more lines are too long

@ -20,7 +20,6 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif
return page_content return page_content
def api_page(pageid, query): def api_page(pageid, query):
print 'API query:', query
if query == 'content': if query == 'content':
api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid) api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid)
response = ((api_response.get('revisions'))[0])['*'] response = ((api_response.get('revisions'))[0])['*']
@ -28,17 +27,21 @@ def api_page(pageid, query):
response = api_request('action=query&pageids={}&prop=info', pageid) response = api_request('action=query&pageids={}&prop=info', pageid)
elif query == 'articleimgs': elif query == 'articleimgs':
response = api_request('action=query&pageids={}&prop=images', pageid) response = api_request('action=query&pageids={}&prop=images', pageid)
elif query == 'file':
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid)
pprint.pprint( response )
elif query == 'imageinfo': elif query == 'imageinfo':
pagename = pageid # in imageinfo titles are used instead of id pagename = pageid # in imageinfo titles are used instead of id
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail
return response return response
def api_img_url(filename): # get full-size's image url def api_file_url(filename): # get full urls
'''get url of image''' page_content_dict = api_page(filename, 'file')
page_content_dict = api_page(filename, 'fullimage')
if 'imageinfo' in page_content_dict.keys(): if 'imageinfo' in page_content_dict.keys():
thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl')) imgurl = ((page_content_dict.get('imageinfo'))[0].get('url'))
return thumburl return imgurl
else:
return None
def api_thumb_url(filename): def api_thumb_url(filename):
'''get thumbnail url of image''' '''get thumbnail url of image'''

@ -6,7 +6,7 @@
##### #####
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import html5lib, urllib2, json, pprint, re import html5lib, urllib2, json, pprint, re
from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url
#import mmdc_create_json import api_thumb_url #import mmdc_create_json import api_thumb_url
json_allworks_file = open('allworks_mmdc.json', 'r') # save json json_allworks_file = open('allworks_mmdc.json', 'r') # save json
json_allworks = json.loads(json_allworks_file.read()) json_allworks = json.loads(json_allworks_file.read())
@ -15,8 +15,8 @@ pages_path = 'web/work'
#def generate_xml(): #def generate_xml():
gallery_exp=re.compile('<gallery>.*?</gallery>') gallery_exp=re.compile('<gallery>.*?</gallery>')
file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)') file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
#img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I) img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
img_exp=re.compile('File:((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|<\/gallery>)', re.I) video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')
def replace_gallery(content): def replace_gallery(content):
# from <gallery>.*</gallery> imgs, return list of img ET elements # from <gallery>.*</gallery> imgs, return list of img ET elements
@ -24,37 +24,54 @@ def replace_gallery(content):
gallery_imgs = [] gallery_imgs = []
gallery_found = re.findall(gallery_exp, content) gallery_found = re.findall(gallery_exp, content)
content = re.sub(gallery_exp, '', content) content = re.sub(gallery_exp, '', content)
print 'gallery_found', gallery_found
for gallery in gallery_found: # in case there is more than 1 <gallery> for gallery in gallery_found: # in case there is more than 1 <gallery>
print 'GALLERY', gallery
allfiles =re.findall(img_exp, gallery) allfiles =re.findall(img_exp, gallery)
print 'ALLFILES', allfiles
for imgfile in allfiles: for imgfile in allfiles:
imgfile = imgfile[0] imgfile = imgfile[1]
imgsrc = api_thumb_url(imgfile) # search for original image imgsrc = api_file_url(imgfile) # search for original image
print imgfile, imgsrc
img_el = ET.Element('img', attrib={'src': imgsrc}) img_el = ET.Element('img', attrib={'src': imgsrc})
gallery_imgs.append(img_el) gallery_imgs.append(img_el)
# print 'gallery img', imgfile, ET.tostring(img_el)
return content, gallery_imgs return content, gallery_imgs
def replace_video(content):
print '-- Replacing Videos --'
videos = []
videos_found = re.findall(video_exp, content)
for video in videos_found:
video_provider = str(video[0])
video_hash = str(video[1])
video_src = None
print video_provider, type(video_provider)
if (video_provider.lower()) == 'youtube':
video_src="https://www.youtube.com/embed/" + video_hash
elif (video_provider.lower()) == 'vimeo':
video_src="https://player.vimeo.com/video/" + video_hash
print 'VIMEO'
if video_src:
iframe_el = ET.Element('iframe', attrib={'src':video_src, 'width':'600px', 'height':'450px'})
videos.append(iframe_el)
content = re.sub(video_exp, '', content)
return content, videos
def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content
for key in allworks_dict.keys(): for key in allworks_dict.keys():
if key in ['Description', 'Extra']: if key in ['Description', 'Extra']:
mw_content = allworks_dict[key] mw_content = allworks_dict[key]
if re.search(gallery_exp, mw_content): if re.search(gallery_exp, mw_content):
mw_content, gallery_imgs = replace_gallery(mw_content) mw_content, gallery_imgs = replace_gallery(mw_content)
print gallery_imgs allworks_dict['Images'] = gallery_imgs
print mw_content.encode('utf-8') elif re.search(video_exp, mw_content):
print '-- Searching for Video --'
mw_content, videos = replace_video(mw_content)
allworks_dict['Video'] = videos
print mw_content, videos
allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML
# print 'allworks_dict[key]', key#, allworks_dict[key]
work_htmltree = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False) work_htmltree = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False)
# replace_gallery(work_htmltree)
# print work_htmltree # print work_htmltree
# print ET.tostring(work_htmltree) # print ET.tostring(work_htmltree)
@ -80,7 +97,7 @@ def create_workpage( allworks_dict, work_key): # replace text content in dict wi
# print work_htmltree # print work_htmltree
allworks_dict[key] = work_htmltree allworks_dict[key] = work_htmltree
allworks_dict.pop('Thumbnail', None) #remove thumnail allworks_dict.pop('Thumbnail', None) #remove thumnail
# pprint.pprint(allworks_dict) pprint.pprint(allworks_dict)

Loading…
Cancel
Save