#! /usr/bin/env python # -*- coding: utf-8 -*- ############## # FROM THE JSON DICTIONARY CREATE AN INDEX PAGE ##### import xml.etree.ElementTree as ET import html5lib, urllib2, json, pprint, re from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl #import mmdc_create_json import api_thumb_url json_allworks_file = open('allworks_mmdc.json', 'r') # save json json_allworks = json.loads(json_allworks_file.read()) pages_path = 'web/work' #def generate_xml(): gallery_exp=re.compile('.*?') file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)') img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I) def replace_gallery(content): # from .* imgs, return list of img ET elements # replace .* with '' gallery_imgs = [] gallery_found = re.findall(gallery_exp, content) content = re.sub(gallery_exp, '', content) print 'gallery_found', gallery_found for gallery in gallery_found: # in case there is more than 1 print 'GALLERY', gallery allfiles =re.findall(file_exp, gallery) print 'ALLFILES', allfiles for imgfile in allfiles: img = ((re.search(img_exp, imgfile)).group(0))#.decode('utf-8') imgsrc = api_thumb_url(img) img_el = ET.Element('img', attrib={'src': imgsrc}) gallery_imgs.append(img_el) print 'gallery img', img, ET.tostring(img_el) return content, gallery_imgs # need to return a list of images that is the gallery # need to replace .* with '' def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content for key in allworks_dict.keys(): if key in ['Description', 'Extra']: mw_content = allworks_dict[key] if re.search(gallery_exp, mw_content): mw_content, gallery_imgs = replace_gallery(mw_content) print gallery_imgs print mw_content.encode('utf-8') allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML # print 'allworks_dict[key]', key#, allworks_dict[key] work_htmltree = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False) # replace_gallery(work_htmltree) # print work_htmltree # print ET.tostring(work_htmltree) # vimeo/youtube: {{vimeo|44977056}} # External urls: [http://www.scribd.com/doc/105882261/THE-DICTATOR-S-PRACTICAL-INTERNET-GUIDE-TO-POWER-RETENTION scribd] elif key in ['Website']: work_htmltree = ET.Element('a', attrib={'href': allworks_dict[key], 'id':key}) work_htmltree.text = allworks_dict[key] elif key in ['Title']: work_htmltree = ET.Element('h1', attrib={'id': key}) work_htmltree.text elif key in ['Creator', 'Date', 'Bio']: work_htmltree = ET.Element('p', attrib={'id': key}) work_htmltree.text elif key in ['Thumbnail_url']: work_htmltree = ET.Element('img', attrib={'src': allworks_dict[key], 'id': key}) print ET.tostring(work_htmltree) else: work_htmltree = None # remove keys with None value? # print work_htmltree allworks_dict[key] = work_htmltree allworks_dict.pop('Thumbnail', None) #remove thumnail # pprint.pprint(allworks_dict) for key in json_allworks.keys(): graduation_work=json_allworks[key] print (graduation_work['Creator']).encode('utf-8') # pprint.pprint(graduation_work) # purge graduation_work from keys with empty vals # for key in graduation_work: # if graduation_work[key] in [None, '']: # print graduation_work create_workpage(graduation_work, key ) print '----------'