#! /usr/bin/env python # -*- coding: utf-8 -*- ############## # FROM THE JSON DICTIONARY CREATE AN INDEX PAGE ##### import xml.etree.ElementTree as ET import html5lib, urllib2, json, pprint, re from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url, write_html_file #import mmdc_create_json import api_thumb_url json_allworks_file = open('allworks_mmdc.json', 'r') # save json json_allworks = json.loads(json_allworks_file.read()) pages_path = 'web/work' #def generate_xml(): gallery_exp=re.compile('.*?') file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)') img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I) video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}') def replace_gallery(content): # from .* imgs, return list of img ET elements # replace .* with '' gallery_imgs = [] gallery_found = re.findall(gallery_exp, content) content = re.sub(gallery_exp, '', content) for gallery in gallery_found: # in case there is more than 1 allfiles =re.findall(img_exp, gallery) for imgfile in allfiles: imgfile = imgfile[1] imgsrc = api_file_url(imgfile) # search for original image gallery_imgs.append(imgsrc) return content, gallery_imgs def replace_video(content): videos = [] videos_found = re.findall(video_exp, content) for video in videos_found: video_provider = str(video[0]) video_hash = str(video[1]) video_src = None if (video_provider.lower()) == 'youtube': video_src="https://www.youtube.com/embed/" + video_hash elif (video_provider.lower()) == 'vimeo': video_src="https://player.vimeo.com/video/" + video_hash if video_src: videos.append(video_src) content = re.sub(video_exp, '', content) return content, videos def create_workpage( allworks_dict, work_key, tree): # replace text content in dict with html nodes, holding the content for key in allworks_dict.keys(): div_header = (tree.findall(".//div[@class='header']"))[0] div_body = (tree.findall(".//div[@class='body']"))[0] div_av = (tree.findall(".//div[@class='av']"))[0] # p = ET.Element('p') # p.text = " oooo oooo oo" # print 'ELEMENTS', ET.tostring(div_header) # print 'ELEMENTS', ET.tostring(div_body) # print 'ELEMENTS', ET.tostring(div_av) # ET.SubElement(div_header, 'p' ) if key in ['Description', 'Extra']: mw_content = allworks_dict[key] if re.search(gallery_exp, mw_content): mw_content, gallery_imgs = replace_gallery(mw_content) allworks_dict['Images'] = gallery_imgs for imgsrc in gallery_imgs: img_el = ET.SubElement(div_av, 'img', attrib={'src': imgsrc}) print 'IMG', ET.tostring(img_el) elif re.search(video_exp, mw_content): mw_content, videos = replace_video(mw_content) allworks_dict['Video'] = videos for video in videos: iframe_el = ET.SubElement(div_av, 'iframe', attrib={'src': video, 'width':'600px', 'height':'450px'}) print 'VIDEO', ET.tostring(iframe_el) allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML work_el = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False) div_body.append( work_el ) print "****************************" print ET.tostring(div_body) print "****************************" elif key in ['Website']: work_el = ET.SubElement(div_header, 'a', attrib={'href': allworks_dict[key], 'id':key}) work_el.text = 'LINK'#allworks_dict[key] elif key in ['Title']: work_el = ET.SubElement(div_header, 'h1', attrib={'id': key}) work_el.text = (allworks_dict[key]).replace('_', ' ') elif key in ['Creator', 'Date', 'Bio']: work_el =ET.SubElement(div_header, 'p', attrib={'id': key}) work_el.text = allworks_dict[key] elif key in ['Thumbnail_url']: print key # ERROR - Thumbnail url is None # work_el = ET.SubElement(div_header, 'img', attrib={'src': allworks_dict[key], 'id': key}) else: work_el = None # remove keys with None value? allworks_dict[key] = work_el allworks_dict.pop('Thumbnail_url', None) #remove Thumbnail_url pprint.pprint(allworks_dict) def edit_index(filepath, json_allworks_dict): input_file = open(filepath, 'r') tree = html5lib.parse(input_file, namespaceHTMLElements=False) div_section02 = (tree.findall(".//div[@id='section02']"))[0] for key in json_allworks_dict.keys(): graduation_work=json_allworks_dict[key] insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key ) return tree worktemplate = open('web/work-template.html', 'r') for key in json_allworks.keys(): graduation_work=json_allworks[key] graduation_work_title = (json_allworks[key]['Title']).encode('ascii', 'ignore') work_file = 'web/' + key + '-' + graduation_work_title + '.html' work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False) create_workpage(graduation_work, key, work_tree ) write_html_file(work_tree, work_file) print '----------' # print ET.tostring(work_tree) print graduation_work['Creator'] print graduation_work_title print '----------' ### ISSSUES # Error in thumbnail_url: it is None in JSON # Specific positions