video to iframes

10 years ago · c80cc114b6
parent 4e5303110c
commit c80cc114b6
3 changed files with 52 additions and 32 deletions
--- a/allworks_mmdc.json
+++ b/allworks_mmdc.json
--- a/mmdc_modules.py
+++ b/mmdc_modules.py
@ -20,7 +20,6 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif
    return page_content

 def api_page(pageid, query):
-    print 'API query:', query
    if query == 'content':
        api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid)
        response = ((api_response.get('revisions'))[0])['*']
@ -28,24 +27,28 @@ def api_page(pageid, query):
        response = api_request('action=query&pageids={}&prop=info', pageid)
    elif query == 'articleimgs':
        response = api_request('action=query&pageids={}&prop=images', pageid)
+    elif query == 'file':
+        response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid)
+        pprint.pprint( response )
    elif query == 'imageinfo':
        pagename = pageid # in imageinfo titles are used instead of id 
        response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename)  # iiurlwidht dermines with of thumbnail         
    return response

-def api_img_url(filename): # get full-size's image url
-    '''get url of image'''
-   page_content_dict = api_page(filename, 'fullimage')   
-   if 'imageinfo' in page_content_dict.keys():
-       thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))
-       return thumburl
-
+def api_file_url(filename): # get full urls
+    page_content_dict = api_page(filename, 'file')   
+    if 'imageinfo' in page_content_dict.keys():
+        imgurl = ((page_content_dict.get('imageinfo'))[0].get('url'))
+        return imgurl
+    else:
+        return None
+   
 def api_thumb_url(filename):
    '''get thumbnail url of image'''
-   page_content_dict = api_page(filename, 'imageinfo')   
-   if 'imageinfo' in page_content_dict.keys():
-       thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))
-       return thumburl
+    page_content_dict = api_page(filename, 'imageinfo')   
+    if 'imageinfo' in page_content_dict.keys():
+        thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))
+        return thumburl

   
 # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300
--- a/mmdc_pages.py
+++ b/mmdc_pages.py
@ -6,7 +6,7 @@
 #####
 import xml.etree.ElementTree as ET
 import html5lib, urllib2, json, pprint, re
-from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl
+from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url
 #import mmdc_create_json import api_thumb_url
 json_allworks_file = open('allworks_mmdc.json', 'r') # save json 
 json_allworks = json.loads(json_allworks_file.read())
@ -15,8 +15,8 @@ pages_path = 'web/work'
 #def generate_xml():
 gallery_exp=re.compile('<gallery>.*?</gallery>')
 file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
-#img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I)
-img_exp=re.compile('File:((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|<\/gallery>)', re.I)
+img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
+video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')

 def replace_gallery(content):
    # from <gallery>.*</gallery> imgs, return list of img ET elements
@ -24,19 +24,37 @@ def replace_gallery(content):
    gallery_imgs = []
    gallery_found = re.findall(gallery_exp, content)
    content = re.sub(gallery_exp,  '', content)
-    print 'gallery_found', gallery_found
    for gallery in gallery_found: # in case there is more than 1 <gallery>
-        print 'GALLERY', gallery
        allfiles =re.findall(img_exp, gallery)
-        print 'ALLFILES', allfiles
        for imgfile in allfiles:
-            imgfile = imgfile[0]
-            imgsrc = api_thumb_url(imgfile) # search for original image
-            print imgfile, imgsrc
+            imgfile = imgfile[1]
+            imgsrc = api_file_url(imgfile) # search for original image
            img_el = ET.Element('img', attrib={'src': imgsrc})
            gallery_imgs.append(img_el)
-#            print 'gallery img', imgfile, ET.tostring(img_el)
    return content, gallery_imgs
+
+def replace_video(content):
+    print '-- Replacing Videos --'
+    videos = []
+    videos_found = re.findall(video_exp, content)
+    for video in videos_found:
+        video_provider =  str(video[0])
+        video_hash = str(video[1])
+        video_src = None
+        print video_provider, type(video_provider)
+
+        if (video_provider.lower()) == 'youtube':            
+            video_src="https://www.youtube.com/embed/" + video_hash
+        elif (video_provider.lower()) == 'vimeo':            
+            video_src="https://player.vimeo.com/video/" + video_hash
+            print 'VIMEO'
+
+        if video_src:
+            iframe_el = ET.Element('iframe', attrib={'src':video_src,  'width':'600px', 'height':'450px'})
+
+            videos.append(iframe_el)
+    content = re.sub(video_exp,  '', content)
+    return content, videos
    
 def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content    
    for key in allworks_dict.keys():
@ -44,17 +62,16 @@ def create_workpage( allworks_dict, work_key): # replace text content in dict wi
            mw_content = allworks_dict[key]
            if re.search(gallery_exp, mw_content):
                mw_content, gallery_imgs = replace_gallery(mw_content)
-                print gallery_imgs
-                print mw_content.encode('utf-8')            
-
-
-
+                allworks_dict['Images'] = gallery_imgs
+            elif re.search(video_exp, mw_content):
+                print '-- Searching for Video --'
+                mw_content, videos = replace_video(mw_content)
+                allworks_dict['Video'] = videos
+                print mw_content, videos

+                
            allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML
- #           print 'allworks_dict[key]', key#, allworks_dict[key]
            work_htmltree = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False)
-#            replace_gallery(work_htmltree)
-            
 #            print work_htmltree
 #            print ET.tostring(work_htmltree)

@ -80,7 +97,7 @@ def create_workpage( allworks_dict, work_key): # replace text content in dict wi
 #        print work_htmltree
        allworks_dict[key] = work_htmltree
        allworks_dict.pop('Thumbnail', None) #remove thumnail
-#    pprint.pprint(allworks_dict)
+    pprint.pprint(allworks_dict)