pzimediadesign/mmdc_pages.py

#! /usr/bin/env python
# -*- coding: utf-8 -*-

##############
# FROM THE JSON DICTIONARY CREATE AN INDEX PAGE
#####
import xml.etree.ElementTree as ET
import html5lib, urllib2, json, pprint, re
from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url,  write_html_file, pandoc
#import mmdc_create_json import api_thumb_url
json_allworks_file = open('allworks_mmdc.json', 'r') # save json 
json_allworks = json.loads(json_allworks_file.read())
pages_path = 'web/work'

#def generate_xml():
gallery_exp=re.compile('<gallery>.*?</gallery>')
file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')

def replace_gallery(content):
    gallery_imgs = []
    gallery_found = re.findall(gallery_exp, content)
    content = re.sub(gallery_exp,  '', content)
    for gallery in gallery_found: # in case there is more than 1 <gallery>
        allfiles =re.findall(img_exp, gallery)
        for imgfile in allfiles:
            imgfile = imgfile[1]
            imgsrc = api_file_url(imgfile) # search for original image
            gallery_imgs.append(imgsrc)
            print 'gallery_imgs', gallery_imgs
    # from <gallery>.*</gallery> imgs, return list of img ET elements
    # replace <gallery>.*</gallery> with ''
    return content, gallery_imgs

def replace_video(content):
    videos = []
    videos_found = re.findall(video_exp, content)
    for video in videos_found:
        video_provider =  str(video[0])
        video_hash = str(video[1])
        video_src = None
        if (video_provider.lower()) == 'youtube':            
            video_src="https://www.youtube.com/embed/" + video_hash
        elif (video_provider.lower()) == 'vimeo':            
            video_src="https://player.vimeo.com/video/" + video_hash
        if video_src:
            videos.append(video_src)
            iframe = "<iframe src='{}' width='600px' height='450px'></iframe>".format(video_src)
#            content = re.sub(video_exp, '       iframe    ', content)
        else:
            content = re.sub(video_exp,  '', content)
    return content, videos
    

def workpage_div_content(tree, div_id, element, content):            
    parent_str = ".//div[@id='{}']".format( div_id.lower() )
    parent = tree.find(parent_str)
    if element is 'img':
        lxml.SubElement(parent, 'img', attrib={'src': content})    
    elif element in ['p','h1','h2']:
        sub = lxml.SubElement(parent, element)    
        sub.text = content
    elif element == 'a':
        sub = ET.SubElement(parent, element, attrib={'href':content})    
        sub.text = content
    else:# element == 'span':
        test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})
        test = ET.SubElement(parent, test_el)
#        parent.append(test)
#        sub = ET.SubElement(parent, element)    
        #sub.text = content  = ' '+ div_id
 #        parent.append(content)
        
    print  parent_str, ET.tostring(parent)


def create_workpage( work, work_key, tree): # replace text content in dict with html nodes, holding the content
#    pprint.pprint( work)
    for key in work.keys():
#        print work[key]

        div_header = tree.find(".//div[@class='header']")
        div_body = tree.find(".//div[@class='body']")
        div_av = tree.find(".//div[@class='av']")

        if key in ['Creator', 'Date', 'Bio']:
            workpage_div_content(tree, key, 'p', work[key])
        elif key == 'Title':
            workpage_div_content(tree, key, 'h1', work[key])
        elif key == 'Thumbnail':
            thumb = api_file_url(work[key])
            workpage_div_content(tree, key, 'img', thumb)                        
        elif key == 'Website':
            workpage_div_content(tree, key, 'a', work[key])            
        elif key in ['Description', 'Extra']:
            test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})
            workpage_div_content(tree, key, test_el, '')

            # HOW TO INSERT EXISTING HTML CHUNCK INTO TEMPLATE ???

            
            # test_sub = ET.SubElement(test_el, 'span')
        # test_sub.text = content
        # sub = ET.SubElement(parent, test_el)    

        #             mw_content = work[key]
# #             if re.search(gallery_exp, mw_content):
# #                 # replace_gallery must replace the gallery inline
# #                 mw_content, gallery_imgs = replace_gallery(mw_content)
# #                 work['Images'] = gallery_imgs
# #                 for imgsrc in gallery_imgs:
# #                     img_el = ET.SubElement(div_av, 'img', attrib={'src': imgsrc})
# #             elif re.search(video_exp, mw_content):
# #                 mw_content, videos = replace_video(mw_content)
# #                 work['Video'] = videos
# # #                for video in videos:
# # #                    iframe_el = ET.SubElement(div_av, 'iframe', attrib={'src': video, 'width':'600px', 'height':'450px'})
# # #                    print 'VIDEO',  ET.tostring(iframe_el)

#             print '--------------'                    
#             print 'mw_content',  mw_content
#             print '--------------'
#             html_content = pandoc2html( mw_content if key in work.keys() else '') # convert to HTML
#             print 'html_content', html_content
#             document_el = html5lib.parse(html_content, namespaceHTMLElements=False)#ET.fromstring(html_content)
#             print ET.tostring(document_el)
#             print 'document_el', document_el, ET.iselement(document_el)
#             all_el = document_el.findall('body//')
#             if all_el:
#                 all_el.reverse()

#             for el in all_el:
#                 print 'el', ET.tostring(el) 
#                 div_body.append(el) 
#             imgs = document_el.findall('.//img')
#             # if imgs:
#             #     for img in  imgs:
#             #         src = api_file_url(img.get('src'))
#             #         img.set('src', src)                    
#             #         print 'IMG', img, src
#             print "****************************"
#             print ET.tostring(div_body)
#             print "****************************"

# #        elif key in ['Thumbnail_url']:
# #            print Thumbnail_url, work[key]
            
            
#         elif key in ['Thumbnail_url']:
#             print  'THUMBNAIL_URL',  work[key]
#             # ERROR - Thumbnail url is None
#             # work_el = ET.SubElement(div_header, 'img', attrib={'src': work[key], 'id': key})        
#         else:
#             work_el = None       # remove keys with None value?
#         work[key] = work_el
#         work.pop('Thumbnail_url', None) #remove Thumbnail_url
#     pprint.pprint(work)


def edit_index(filepath, json_allworks_dict):
    input_file = open(filepath, 'r') 
    tree = html5lib.parse(input_file, namespaceHTMLElements=False)
    div_section02 = tree.find(".//div[@id='section02']")
    for key in json_allworks_dict.keys():
        graduation_work=json_allworks_dict[key]
        insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key )
    return tree


#worktemplate = open('web/work-template.html', 'r') 
for key in json_allworks.keys():
    work=json_allworks[key]    
    creator = work['Creator'].encode('utf-8') if 'Creator' in work else ''
    date = work['Date'] if 'Date' in work else ''
    website=work['Website'] if 'Website' in work else '' 
    thumbnail=work['Thumbnail_url'] if 'Thumbnail_url' in work else ''
    bio=(work['Bio'].encode('utf-8')).replace('"','\\"') if 'Bio' in work else '' 
    description=(work['Description']).replace('"','\\"').encode('utf-8') if 'Description' in work else '' 
    extra=(work['Extra'].encode('utf-8')).replace('"','\\"') if 'Extra' in work else ''     
    work_file = 'web/{}-{}-{}.html'.format(work['Date'], (work['Creator'].encode('ascii', 'ignore')).replace(' ','_'), key)
    title = work['Title']

    print work_file, website
    # if key in work.keys() else ''

    if extra:
        extra_html = pandoc2html(extra)
        print (extra_html)

    # pandoc( filename=work_file, \
    #         template='web/work-template.html', \
    #         title=title, \
    #         creator=creator, \
    #         date=date, \
    #         website=website, \
    #         thumbnail=thumbnail, \
    #         bio=bio, \
    #         description=description, \
    #         extra=extra ) # convert to HTML
    # print '= = = = = = ='
    # print html_content 
    # print '= = = = = = ='
    
#    creator = (work['Creator'].encode('ascii', 'ignore')).replace(' ','_')        
#    description = work['Description']

#        print work_file
#        work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False)
#        create_workpage(work, key, work_tree )
#        write_html_file(work_tree, work_file)


### ISSSUES
# pandoc mw->HTML   NOT WORKING
# sub gallery/videos with corresponding elements


# Gallaries, Files, videos,  in orginal places correct place
# Specificy  positions in template 
# insert <p> into <div class="body"> 
# separate Extra and Description
mmdc_pages.py pandoc conversion working 10 years ago			`#! /usr/bin/env python`
			`# -- coding: utf-8 --`

			`##############`
			`# FROM THE JSON DICTIONARY CREATE AN INDEX PAGE`
			`#####`
			`import xml.etree.ElementTree as ET`
			`import html5lib, urllib2, json, pprint, re`
approach only with pandoc: failled 10 years ago			`from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url, write_html_file, pandoc`
creating pages makeing script 10 years ago			`#import mmdc_create_json import api_thumb_url`
mmdc_pages.py pandoc conversion working 10 years ago			`json_allworks_file = open('allworks_mmdc.json', 'r') # save json`
			`json_allworks = json.loads(json_allworks_file.read())`
			`pages_path = 'web/work'`

creating pages makeing script 10 years ago			`#def generate_xml():`
quick n dirty gallery replacement 10 years ago			`gallery_exp=re.compile('<gallery>.*?</gallery>')`
			`file_exp=re.compile('File:(.*?)(?=File:\|<\/gallery>)')`
video to iframes 10 years ago			`img_exp=re.compile('(File:\|Image:)((.*?)\.(gif\|jpg\|jpeg\|png))(?=\\|\|File:\|Image:\|<\/gallery>)', re.I)`
			`video_exp=re.compile('\{\{(.?)\\|(.?)\}\}')`
quick n dirty gallery replacement 10 years ago
			`def replace_gallery(content):`
			`gallery_imgs = []`
			`gallery_found = re.findall(gallery_exp, content)`
			`content = re.sub(gallery_exp, '', content)`
			`for gallery in gallery_found: # in case there is more than 1 <gallery>`
refined gallery regex 10 years ago			`allfiles =re.findall(img_exp, gallery)`
quick n dirty gallery replacement 10 years ago			`for imgfile in allfiles:`
video to iframes 10 years ago			`imgfile = imgfile[1]`
			`imgsrc = api_file_url(imgfile) # search for original image`
inserting content into pages template 10 years ago			`gallery_imgs.append(imgsrc)`
simple pages, without video or image 10 years ago			`print 'gallery_imgs', gallery_imgs`
			`# from <gallery>.*</gallery> imgs, return list of img ET elements`
			`# replace <gallery>.*</gallery> with ''`
quick n dirty gallery replacement 10 years ago			`return content, gallery_imgs`
video to iframes 10 years ago
			`def replace_video(content):`
			`videos = []`
			`videos_found = re.findall(video_exp, content)`
			`for video in videos_found:`
			`video_provider = str(video[0])`
			`video_hash = str(video[1])`
			`video_src = None`
			`if (video_provider.lower()) == 'youtube':`
			`video_src="https://www.youtube.com/embed/" + video_hash`
			`elif (video_provider.lower()) == 'vimeo':`
			`video_src="https://player.vimeo.com/video/" + video_hash`
			`if video_src:`
simple pages, without video or image 10 years ago			`videos.append(video_src)`
			`iframe = "<iframe src='{}' width='600px' height='450px'></iframe>".format(video_src)`
			`# content = re.sub(video_exp, ' iframe ', content)`
			`else:`
			`content = re.sub(video_exp, '', content)`
video to iframes 10 years ago			`return content, videos`
creating pages makeing script 10 years ago
Failure: we'll start a new a approach from now onwards 10 years ago
			`def workpage_div_content(tree, div_id, element, content):`
			`parent_str = ".//div[@id='{}']".format( div_id.lower() )`
			`parent = tree.find(parent_str)`
			`if element is 'img':`
			`lxml.SubElement(parent, 'img', attrib={'src': content})`
			`elif element in ['p','h1','h2']:`
			`sub = lxml.SubElement(parent, element)`
			`sub.text = content`
			`elif element == 'a':`
			`sub = ET.SubElement(parent, element, attrib={'href':content})`
			`sub.text = content`
			`else:# element == 'span':`
			`test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})`
			`test = ET.SubElement(parent, test_el)`
			`# parent.append(test)`
			`# sub = ET.SubElement(parent, element)`
			`#sub.text = content = ' '+ div_id`
			`# parent.append(content)`

			`print parent_str, ET.tostring(parent)`


refining page creation 10 years ago			`def create_workpage( work, work_key, tree): # replace text content in dict with html nodes, holding the content`
page creation: still no html 10 years ago			`# pprint.pprint( work)`
refining page creation 10 years ago			`for key in work.keys():`
page creation: still no html 10 years ago			`# print work[key]`
refining page creation 10 years ago
page creation: still no html 10 years ago			`div_header = tree.find(".//div[@class='header']")`
			`div_body = tree.find(".//div[@class='body']")`
			`div_av = tree.find(".//div[@class='av']")`
Failure: we'll start a new a approach from now onwards 10 years ago
			`if key in ['Creator', 'Date', 'Bio']:`
			`workpage_div_content(tree, key, 'p', work[key])`
			`elif key == 'Title':`
			`workpage_div_content(tree, key, 'h1', work[key])`
			`elif key == 'Thumbnail':`
			`thumb = api_file_url(work[key])`
			`workpage_div_content(tree, key, 'img', thumb)`
			`elif key == 'Website':`
			`workpage_div_content(tree, key, 'a', work[key])`
			`elif key in ['Description', 'Extra']:`
			`test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})`
			`workpage_div_content(tree, key, test_el, '')`

			`# HOW TO INSERT EXISTING HTML CHUNCK INTO TEMPLATE ???`


			`# test_sub = ET.SubElement(test_el, 'span')`
			`# test_sub.text = content`
			`# sub = ET.SubElement(parent, test_el)`

approach only with pandoc: failled 10 years ago			`# mw_content = work[key]`
Failure: we'll start a new a approach from now onwards 10 years ago			`# # if re.search(gallery_exp, mw_content):`
			`# # # replace_gallery must replace the gallery inline`
			`# # mw_content, gallery_imgs = replace_gallery(mw_content)`
			`# # work['Images'] = gallery_imgs`
			`# # for imgsrc in gallery_imgs:`
			`# # img_el = ET.SubElement(div_av, 'img', attrib={'src': imgsrc})`
			`# # elif re.search(video_exp, mw_content):`
			`# # mw_content, videos = replace_video(mw_content)`
			`# # work['Video'] = videos`
			`# # # for video in videos:`
			`# # # iframe_el = ET.SubElement(div_av, 'iframe', attrib={'src': video, 'width':'600px', 'height':'450px'})`
			`# # # print 'VIDEO', ET.tostring(iframe_el)`

			`# print '--------------'`
			`# print 'mw_content', mw_content`
			`# print '--------------'`
			`# html_content = pandoc2html( mw_content if key in work.keys() else '') # convert to HTML`
			`# print 'html_content', html_content`
			`# document_el = html5lib.parse(html_content, namespaceHTMLElements=False)#ET.fromstring(html_content)`
			`# print ET.tostring(document_el)`
			`# print 'document_el', document_el, ET.iselement(document_el)`
			`# all_el = document_el.findall('body//')`
			`# if all_el:`
			`# all_el.reverse()`

			`# for el in all_el:`
			`# print 'el', ET.tostring(el)`
approach only with pandoc: failled 10 years ago			`# div_body.append(el)`
Failure: we'll start a new a approach from now onwards 10 years ago			`# imgs = document_el.findall('.//img')`
			`# # if imgs:`
			`# # for img in imgs:`
			`# # src = api_file_url(img.get('src'))`
			`# # img.set('src', src)`
			`# # print 'IMG', img, src`
refining page creation 10 years ago			`# print "****************************"`
			`# print ET.tostring(div_body)`
			`# print "****************************"`

			`# # elif key in ['Thumbnail_url']:`
			`# # print Thumbnail_url, work[key]`


			`# elif key in ['Thumbnail_url']:`
			`# print 'THUMBNAIL_URL', work[key]`
			`# # ERROR - Thumbnail url is None`
			`# # work_el = ET.SubElement(div_header, 'img', attrib={'src': work[key], 'id': key})`
			`# else:`
			`# work_el = None # remove keys with None value?`
			`# work[key] = work_el`
			`# work.pop('Thumbnail_url', None) #remove Thumbnail_url`
			`# pprint.pprint(work)`


creating work page 10 years ago			`def edit_index(filepath, json_allworks_dict):`
			`input_file = open(filepath, 'r')`
			`tree = html5lib.parse(input_file, namespaceHTMLElements=False)`
page creation: still no html 10 years ago			`div_section02 = tree.find(".//div[@id='section02']")`
creating work page 10 years ago			`for key in json_allworks_dict.keys():`
			`graduation_work=json_allworks_dict[key]`
			`insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key )`
			`return tree`
creating pages makeing script 10 years ago

approach only with pandoc: failled 10 years ago			`#worktemplate = open('web/work-template.html', 'r')`
creating work page 10 years ago			`for key in json_allworks.keys():`
approach only with pandoc: failled 10 years ago			`work=json_allworks[key]`
			`creator = work['Creator'].encode('utf-8') if 'Creator' in work else ''`
			`date = work['Date'] if 'Date' in work else ''`
			`website=work['Website'] if 'Website' in work else ''`
			`thumbnail=work['Thumbnail_url'] if 'Thumbnail_url' in work else ''`
			`bio=(work['Bio'].encode('utf-8')).replace('"','\\"') if 'Bio' in work else ''`
			`description=(work['Description']).replace('"','\\"').encode('utf-8') if 'Description' in work else ''`
			`extra=(work['Extra'].encode('utf-8')).replace('"','\\"') if 'Extra' in work else ''`
			`work_file = 'web/{}-{}-{}.html'.format(work['Date'], (work['Creator'].encode('ascii', 'ignore')).replace(' ','_'), key)`
			`title = work['Title']`

			`print work_file, website`
			`# if key in work.keys() else ''`
rewriting all mechanism from scrath - w/out json 10 years ago
			`if extra:`
			`extra_html = pandoc2html(extra)`
			`print (extra_html)`

			`# pandoc( filename=work_file, \`
			`# template='web/work-template.html', \`
			`# title=title, \`
			`# creator=creator, \`
			`# date=date, \`
			`# website=website, \`
			`# thumbnail=thumbnail, \`
			`# bio=bio, \`
			`# description=description, \`
			`# extra=extra ) # convert to HTML`
approach only with pandoc: failled 10 years ago			`# print '= = = = = = ='`
			`# print html_content`
			`# print '= = = = = = ='`

			`# creator = (work['Creator'].encode('ascii', 'ignore')).replace(' ','_')`
			`# description = work['Description']`

page creation: still no html 10 years ago			`# print work_file`
approach only with pandoc: failled 10 years ago			`# work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False)`
			`# create_workpage(work, key, work_tree )`
			`# write_html_file(work_tree, work_file)`
inserting content into pages template 10 years ago

			`### ISSSUES`
simple pages, without video or image 10 years ago			`# pandoc mw->HTML NOT WORKING`
			`# sub gallery/videos with corresponding elements`



refining page creation 10 years ago			`# Gallaries, Files, videos, in orginal places correct place`
			`# Specificy positions in template`
page creation: still no html 10 years ago			`# insert <p> into <div class="body">`
refining page creation 10 years ago			`# separate Extra and Description`