#! /usr/bin/env python # -*- coding: utf-8 -*- ############## # FROM THE JSON DICTIONARY CREATE AN INDEX PAGE ##### import xml.etree.ElementTree as ET import html5lib, urllib2, json, pprint, re from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url, write_html_file, pandoc #import mmdc_create_json import api_thumb_url json_allworks_file = open('allworks_mmdc.json', 'r') # save json json_allworks = json.loads(json_allworks_file.read()) pages_path = 'web/work' #def generate_xml(): gallery_exp=re.compile('.*?') file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)') img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I) video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}') def replace_gallery(content): gallery_imgs = [] gallery_found = re.findall(gallery_exp, content) content = re.sub(gallery_exp, '', content) for gallery in gallery_found: # in case there is more than 1 allfiles =re.findall(img_exp, gallery) for imgfile in allfiles: imgfile = imgfile[1] imgsrc = api_file_url(imgfile) # search for original image gallery_imgs.append(imgsrc) print 'gallery_imgs', gallery_imgs # from .* imgs, return list of img ET elements # replace .* with '' return content, gallery_imgs def replace_video(content): videos = [] videos_found = re.findall(video_exp, content) for video in videos_found: video_provider = str(video[0]) video_hash = str(video[1]) video_src = None if (video_provider.lower()) == 'youtube': video_src="https://www.youtube.com/embed/" + video_hash elif (video_provider.lower()) == 'vimeo': video_src="https://player.vimeo.com/video/" + video_hash if video_src: videos.append(video_src) iframe = "".format(video_src) # content = re.sub(video_exp, ' iframe ', content) else: content = re.sub(video_exp, '', content) return content, videos def workpage_div_content(tree, div_id, element, content): parent_str = ".//div[@id='{}']".format( div_id.lower() ) parent = tree.find(parent_str) if element is 'img': lxml.SubElement(parent, 'img', attrib={'src': content}) elif element in ['p','h1','h2']: sub = lxml.SubElement(parent, element) sub.text = content elif element == 'a': sub = ET.SubElement(parent, element, attrib={'href':content}) sub.text = content else:# element == 'span': test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'}) test = ET.SubElement(parent, test_el) # parent.append(test) # sub = ET.SubElement(parent, element) #sub.text = content = ' '+ div_id # parent.append(content) print parent_str, ET.tostring(parent) def create_workpage( work, work_key, tree): # replace text content in dict with html nodes, holding the content # pprint.pprint( work) for key in work.keys(): # print work[key] div_header = tree.find(".//div[@class='header']") div_body = tree.find(".//div[@class='body']") div_av = tree.find(".//div[@class='av']") if key in ['Creator', 'Date', 'Bio']: workpage_div_content(tree, key, 'p', work[key]) elif key == 'Title': workpage_div_content(tree, key, 'h1', work[key]) elif key == 'Thumbnail': thumb = api_file_url(work[key]) workpage_div_content(tree, key, 'img', thumb) elif key == 'Website': workpage_div_content(tree, key, 'a', work[key]) elif key in ['Description', 'Extra']: test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'}) workpage_div_content(tree, key, test_el, '') # HOW TO INSERT EXISTING HTML CHUNCK INTO TEMPLATE ??? # test_sub = ET.SubElement(test_el, 'span') # test_sub.text = content # sub = ET.SubElement(parent, test_el) # mw_content = work[key] # # if re.search(gallery_exp, mw_content): # # # replace_gallery must replace the gallery inline # # mw_content, gallery_imgs = replace_gallery(mw_content) # # work['Images'] = gallery_imgs # # for imgsrc in gallery_imgs: # # img_el = ET.SubElement(div_av, 'img', attrib={'src': imgsrc}) # # elif re.search(video_exp, mw_content): # # mw_content, videos = replace_video(mw_content) # # work['Video'] = videos # # # for video in videos: # # # iframe_el = ET.SubElement(div_av, 'iframe', attrib={'src': video, 'width':'600px', 'height':'450px'}) # # # print 'VIDEO', ET.tostring(iframe_el) # print '--------------' # print 'mw_content', mw_content # print '--------------' # html_content = pandoc2html( mw_content if key in work.keys() else '') # convert to HTML # print 'html_content', html_content # document_el = html5lib.parse(html_content, namespaceHTMLElements=False)#ET.fromstring(html_content) # print ET.tostring(document_el) # print 'document_el', document_el, ET.iselement(document_el) # all_el = document_el.findall('body//') # if all_el: # all_el.reverse() # for el in all_el: # print 'el', ET.tostring(el) # div_body.append(el) # imgs = document_el.findall('.//img') # # if imgs: # # for img in imgs: # # src = api_file_url(img.get('src')) # # img.set('src', src) # # print 'IMG', img, src # print "****************************" # print ET.tostring(div_body) # print "****************************" # # elif key in ['Thumbnail_url']: # # print Thumbnail_url, work[key] # elif key in ['Thumbnail_url']: # print 'THUMBNAIL_URL', work[key] # # ERROR - Thumbnail url is None # # work_el = ET.SubElement(div_header, 'img', attrib={'src': work[key], 'id': key}) # else: # work_el = None # remove keys with None value? # work[key] = work_el # work.pop('Thumbnail_url', None) #remove Thumbnail_url # pprint.pprint(work) def edit_index(filepath, json_allworks_dict): input_file = open(filepath, 'r') tree = html5lib.parse(input_file, namespaceHTMLElements=False) div_section02 = tree.find(".//div[@id='section02']") for key in json_allworks_dict.keys(): graduation_work=json_allworks_dict[key] insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key ) return tree #worktemplate = open('web/work-template.html', 'r') for key in json_allworks.keys(): work=json_allworks[key] creator = work['Creator'].encode('utf-8') if 'Creator' in work else '' date = work['Date'] if 'Date' in work else '' website=work['Website'] if 'Website' in work else '' thumbnail=work['Thumbnail_url'] if 'Thumbnail_url' in work else '' bio=(work['Bio'].encode('utf-8')).replace('"','\\"') if 'Bio' in work else '' description=(work['Description']).replace('"','\\"').encode('utf-8') if 'Description' in work else '' extra=(work['Extra'].encode('utf-8')).replace('"','\\"') if 'Extra' in work else '' work_file = 'web/{}-{}-{}.html'.format(work['Date'], (work['Creator'].encode('ascii', 'ignore')).replace(' ','_'), key) title = work['Title'] print work_file, website # if key in work.keys() else '' if extra: extra_html = pandoc2html(extra) print (extra_html) # pandoc( filename=work_file, \ # template='web/work-template.html', \ # title=title, \ # creator=creator, \ # date=date, \ # website=website, \ # thumbnail=thumbnail, \ # bio=bio, \ # description=description, \ # extra=extra ) # convert to HTML # print '= = = = = = =' # print html_content # print '= = = = = = =' # creator = (work['Creator'].encode('ascii', 'ignore')).replace(' ','_') # description = work['Description'] # print work_file # work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False) # create_workpage(work, key, work_tree ) # write_html_file(work_tree, work_file) ### ISSSUES # pandoc mw->HTML NOT WORKING # sub gallery/videos with corresponding elements # Gallaries, Files, videos, in orginal places correct place # Specificy positions in template # insert

into

# separate Extra and Description