quick n dirty gallery replacement

10 years ago · 4e5dd55718
parent 9cc73ca1ad
commit 4e5dd55718
4 changed files with 88 additions and 33 deletions
--- a/allworks_mmdc.json
+++ b/allworks_mmdc.json
--- a/mmdc_create_json.py
+++ b/mmdc_create_json.py
@ -89,7 +89,7 @@ def api_category(category, year): #Find all pages incategory and add to allworks
        print '-------------'
        print 
-api_category('Graduation work', '2015')
+api_category('Graduation work', '2012')
 json_allworks = open('allworks_mmdc.json', 'w') # save json 
 json.dump(allworks, json_allworks )
--- a/mmdc_modules.py
+++ b/mmdc_modules.py
@ -1,3 +1,5 @@
 #! /usr/bin/env python
 # -*- coding: utf-8 -*-
 import urllib2, json, pprint, re
 import xml.etree.ElementTree as ET
@ -15,7 +17,6 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif
 #    pprint.pprint( json_dic )
    page_id =  json_dic.keys()[0]
    page_content = json_dic.get(page_id)
    print 'API Resquest URL:', url
    return page_content
 def api_page(pageid, query):
@ -66,3 +67,28 @@ def pandoc2html(mw_content):
        p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE)
        html = (p2.communicate())[0]
        return html
 def img_fullurl(parent):
    imgs = parent.findall('.//img')
    print 'len IMG', len(imgs)
    for img in imgs:
        src = img.get('src')
        fullurl =  api_thumb_url(src)
        print '----- IMG', ET.tostring(img ), src, fullurl
        if fullurl != None:            
            img.set('src', fullurl)
        #        fileurl = api_request(src, endpoint)# find url of file
 def replace_youtube(parent, youtube_id): 
    youtube = parent.findall('.//youtube')[0]
    youtube.text=""
    youtube_url = "http://www.youtube.com/embed/{}".format(youtube_id)
    ET.SubElement(parent, 'iframe', {"width":"560", "height":"315", "frameborder": "0", "allowfullscreen": "allowfullscreen", "src": youtube_url})
    parent.remove(youtube)
 # def replace_gallery(parent):
 #     galleries = parent.findall('.//gallery')
 #     for gallery in galleries:
 #         print 'GALLERY', gallery.text()
--- a/mmdc_pages.py
+++ b/mmdc_pages.py
@ -6,58 +6,87 @@
 #####
 import xml.etree.ElementTree as ET
 import html5lib, urllib2, json, pprint, re
-from mmdc_modules import api_thumb_url, pandoc2html
+from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl
 #import mmdc_create_json import api_thumb_url
 json_allworks_file = open('allworks_mmdc.json', 'r') # save json 
 json_allworks = json.loads(json_allworks_file.read())
 pages_path = 'web/work'
 #def generate_xml():
-    
+gallery_exp=re.compile('<gallery>.*?</gallery>')
 file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
 img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I)
 def replace_gallery(content):
    # from <gallery>.*</gallery> imgs, return list of img ET elements
    # replace <gallery>.*</gallery> with ''
    gallery_imgs = []
    gallery_found = re.findall(gallery_exp, content)
    content = re.sub(gallery_exp,  '', content)
    print 'gallery_found', gallery_found
    for gallery in gallery_found: # in case there is more than 1 <gallery>
        print 'GALLERY', gallery
        allfiles =re.findall(file_exp, gallery)
        print 'ALLFILES', allfiles
        for imgfile in allfiles:
            img = ((re.search(img_exp, imgfile)).group(0))#.decode('utf-8')
            imgsrc = api_thumb_url(img)
            img_el = ET.Element('img', attrib={'src': imgsrc})
            gallery_imgs.append(img_el)
            print 'gallery img', img, ET.tostring(img_el)
    return content, gallery_imgs
        # need to return a list of images that is the gallery
        # need to replace <gallery>.*</gallery> with ''
 def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content    
    for key in allworks_dict.keys():
-        print key
+        if key in ['Description', 'Extra']: 
-        if key in ['Description', 'Extra']: #need conversion to html, dealing:imgs,<gallery>, vimeo/youtube
+            mw_content = allworks_dict[key]
-            allworks_dict[key] = pandoc2html( allworks_dict[key] if key in allworks_dict.keys() else '' ) # convert to HTML
+            if re.search(gallery_exp, mw_content):
-            htmlnode = ET.fromstring(allworks_dict[key]) # make them into node
+                mw_content, gallery_imgs = replace_gallery(mw_content)
                print gallery_imgs
                print mw_content.encode('utf-8')            
            allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML
 #           print 'allworks_dict[key]', key#, allworks_dict[key]
            work_htmltree = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False)
 #            replace_gallery(work_htmltree)
 #            print work_htmltree
 #            print ET.tostring(work_htmltree)
            # vimeo/youtube: {{vimeo|44977056}}
            # External urls: [http://www.scribd.com/doc/105882261/THE-DICTATOR-S-PRACTICAL-INTERNET-GUIDE-TO-POWER-RETENTION scribd]
        elif key in ['Website']:
-            htmlnode = ET.Element('a', attrib={'href': allworks_dict[key], 'id':key})
+            work_htmltree = ET.Element('a', attrib={'href': allworks_dict[key], 'id':key})
-            htmlnode.text = allworks_dict[key]
+            work_htmltree.text = allworks_dict[key]
        elif key in ['Title']:
-            htmlnode = ET.Element('h1', attrib={'id': key})
+            work_htmltree = ET.Element('h1', attrib={'id': key})
-            htmlnode.text
+            work_htmltree.text
        elif key in ['Creator', 'Date', 'Bio']:
-            htmlnode = ET.Element('p', attrib={'id': key})
+            work_htmltree = ET.Element('p', attrib={'id': key})
-            htmlnode.text
+            work_htmltree.text
        elif key in ['Thumbnail_url']:
-            htmlnode = ET.Element('img', attrib={'src': allworks_dict[key], 'id': key})
+            work_htmltree = ET.Element('img', attrib={'src': allworks_dict[key], 'id': key})
-
+            print ET.tostring(work_htmltree)
        else:
-            htmlnode = None
+            work_htmltree = None
            # remove keys with None value?
-        print htmlnode
+#        print work_htmltree
-        allworks_dict[key] = htmlnode
+        allworks_dict[key] = work_htmltree
        allworks_dict.pop('Thumbnail', None) #remove thumnail
-    pprint.pprint(allworks_dict)
+#    pprint.pprint(allworks_dict)
        #     #p
        # elif key in ['Thumbnail_url']:
        #     #<img>
 #        else:
 #            generate_xml()
 #            work_dict[key] = allworks_dict[key] if key in allworks_dict.keys() else ''
 #        print work_dict
 for key in json_allworks.keys():
        graduation_work=json_allworks[key]
-        print graduation_work['Creator']
+        print (graduation_work['Creator']).encode('utf-8')
 #        pprint.pprint(graduation_work)