video to iframes

10 years ago · c80cc114b6
parent 4e5303110c
commit c80cc114b6
3 changed files with 52 additions and 32 deletions
--- a/allworks_mmdc.json
+++ b/allworks_mmdc.json
--- a/mmdc_modules.py
+++ b/mmdc_modules.py
@ -20,7 +20,6 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif
    return page_content
 def api_page(pageid, query):
    print 'API query:', query
    if query == 'content':
        api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid)
        response = ((api_response.get('revisions'))[0])['*']
@ -28,17 +27,21 @@ def api_page(pageid, query):
        response = api_request('action=query&pageids={}&prop=info', pageid)
    elif query == 'articleimgs':
        response = api_request('action=query&pageids={}&prop=images', pageid)
    elif query == 'file':
        response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid)
        pprint.pprint( response )
    elif query == 'imageinfo':
        pagename = pageid # in imageinfo titles are used instead of id 
        response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename)  # iiurlwidht dermines with of thumbnail         
    return response
-def api_img_url(filename): # get full-size's image url
+def api_file_url(filename): # get full urls
-    '''get url of image'''
+    page_content_dict = api_page(filename, 'file')   
   page_content_dict = api_page(filename, 'fullimage')   
    if 'imageinfo' in page_content_dict.keys():
-       thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))
+        imgurl = ((page_content_dict.get('imageinfo'))[0].get('url'))
-       return thumburl
+        return imgurl
    else:
        return None
 def api_thumb_url(filename):
    '''get thumbnail url of image'''
--- a/mmdc_pages.py
+++ b/mmdc_pages.py
@ -6,7 +6,7 @@
 #####
 import xml.etree.ElementTree as ET
 import html5lib, urllib2, json, pprint, re
-from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl
+from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url
 #import mmdc_create_json import api_thumb_url
 json_allworks_file = open('allworks_mmdc.json', 'r') # save json 
 json_allworks = json.loads(json_allworks_file.read())
@ -15,8 +15,8 @@ pages_path = 'web/work'
 #def generate_xml():
 gallery_exp=re.compile('<gallery>.*?</gallery>')
 file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
-#img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I)
+img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
-img_exp=re.compile('File:((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|<\/gallery>)', re.I)
+video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')
 def replace_gallery(content):
    # from <gallery>.*</gallery> imgs, return list of img ET elements
@ -24,37 +24,54 @@ def replace_gallery(content):
    gallery_imgs = []
    gallery_found = re.findall(gallery_exp, content)
    content = re.sub(gallery_exp,  '', content)
    print 'gallery_found', gallery_found
    for gallery in gallery_found: # in case there is more than 1 <gallery>
        print 'GALLERY', gallery
        allfiles =re.findall(img_exp, gallery)
        print 'ALLFILES', allfiles
        for imgfile in allfiles:
-            imgfile = imgfile[0]
+            imgfile = imgfile[1]
-            imgsrc = api_thumb_url(imgfile) # search for original image
+            imgsrc = api_file_url(imgfile) # search for original image
            print imgfile, imgsrc
            img_el = ET.Element('img', attrib={'src': imgsrc})
            gallery_imgs.append(img_el)
 #            print 'gallery img', imgfile, ET.tostring(img_el)
    return content, gallery_imgs
 def replace_video(content):
    print '-- Replacing Videos --'
    videos = []
    videos_found = re.findall(video_exp, content)
    for video in videos_found:
        video_provider =  str(video[0])
        video_hash = str(video[1])
        video_src = None
        print video_provider, type(video_provider)
        if (video_provider.lower()) == 'youtube':            
            video_src="https://www.youtube.com/embed/" + video_hash
        elif (video_provider.lower()) == 'vimeo':            
            video_src="https://player.vimeo.com/video/" + video_hash
            print 'VIMEO'
        if video_src:
            iframe_el = ET.Element('iframe', attrib={'src':video_src,  'width':'600px', 'height':'450px'})
            videos.append(iframe_el)
    content = re.sub(video_exp,  '', content)
    return content, videos
 def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content    
    for key in allworks_dict.keys():
        if key in ['Description', 'Extra']: 
            mw_content = allworks_dict[key]
            if re.search(gallery_exp, mw_content):
                mw_content, gallery_imgs = replace_gallery(mw_content)
-                print gallery_imgs
+                allworks_dict['Images'] = gallery_imgs
-                print mw_content.encode('utf-8')            
+            elif re.search(video_exp, mw_content):
-
+                print '-- Searching for Video --'
-
+                mw_content, videos = replace_video(mw_content)
                allworks_dict['Video'] = videos
                print mw_content, videos
            allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML
 #           print 'allworks_dict[key]', key#, allworks_dict[key]
            work_htmltree = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False)
 #            replace_gallery(work_htmltree)
 #            print work_htmltree
 #            print ET.tostring(work_htmltree)
@ -80,7 +97,7 @@ def create_workpage( allworks_dict, work_key): # replace text content in dict wi
 #        print work_htmltree
        allworks_dict[key] = work_htmltree
        allworks_dict.pop('Thumbnail', None) #remove thumnail
-#    pprint.pprint(allworks_dict)
+    pprint.pprint(allworks_dict)