quick n dirty gallery replacement

master
Castro0o 10 years ago
parent 9cc73ca1ad
commit 4e5dd55718

File diff suppressed because one or more lines are too long

@ -89,7 +89,7 @@ def api_category(category, year): #Find all pages incategory and add to allworks
print '-------------'
print
api_category('Graduation work', '2015')
api_category('Graduation work', '2012')
json_allworks = open('allworks_mmdc.json', 'w') # save json
json.dump(allworks, json_allworks )

@ -1,3 +1,5 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2, json, pprint, re
import xml.etree.ElementTree as ET
@ -15,7 +17,6 @@ def api_request(action, pagename): #get page: content, metadata, images, imageif
# pprint.pprint( json_dic )
page_id = json_dic.keys()[0]
page_content = json_dic.get(page_id)
print 'API Resquest URL:', url
return page_content
def api_page(pageid, query):
@ -66,3 +67,28 @@ def pandoc2html(mw_content):
p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE)
html = (p2.communicate())[0]
return html
def img_fullurl(parent):
imgs = parent.findall('.//img')
print 'len IMG', len(imgs)
for img in imgs:
src = img.get('src')
fullurl = api_thumb_url(src)
print '----- IMG', ET.tostring(img ), src, fullurl
if fullurl != None:
img.set('src', fullurl)
# fileurl = api_request(src, endpoint)# find url of file
def replace_youtube(parent, youtube_id):
youtube = parent.findall('.//youtube')[0]
youtube.text=""
youtube_url = "http://www.youtube.com/embed/{}".format(youtube_id)
ET.SubElement(parent, 'iframe', {"width":"560", "height":"315", "frameborder": "0", "allowfullscreen": "allowfullscreen", "src": youtube_url})
parent.remove(youtube)
# def replace_gallery(parent):
# galleries = parent.findall('.//gallery')
# for gallery in galleries:
# print 'GALLERY', gallery.text()

@ -6,58 +6,87 @@
#####
import xml.etree.ElementTree as ET
import html5lib, urllib2, json, pprint, re
from mmdc_modules import api_thumb_url, pandoc2html
from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl
#import mmdc_create_json import api_thumb_url
json_allworks_file = open('allworks_mmdc.json', 'r') # save json
json_allworks = json.loads(json_allworks_file.read())
pages_path = 'web/work'
#def generate_xml():
gallery_exp=re.compile('<gallery>.*?</gallery>')
file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I)
def replace_gallery(content):
# from <gallery>.*</gallery> imgs, return list of img ET elements
# replace <gallery>.*</gallery> with ''
gallery_imgs = []
gallery_found = re.findall(gallery_exp, content)
content = re.sub(gallery_exp, '', content)
print 'gallery_found', gallery_found
for gallery in gallery_found: # in case there is more than 1 <gallery>
print 'GALLERY', gallery
allfiles =re.findall(file_exp, gallery)
print 'ALLFILES', allfiles
for imgfile in allfiles:
img = ((re.search(img_exp, imgfile)).group(0))#.decode('utf-8')
imgsrc = api_thumb_url(img)
img_el = ET.Element('img', attrib={'src': imgsrc})
gallery_imgs.append(img_el)
print 'gallery img', img, ET.tostring(img_el)
return content, gallery_imgs
# need to return a list of images that is the gallery
# need to replace <gallery>.*</gallery> with ''
def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content
for key in allworks_dict.keys():
print key
if key in ['Description', 'Extra']: #need conversion to html, dealing:imgs,<gallery>, vimeo/youtube
allworks_dict[key] = pandoc2html( allworks_dict[key] if key in allworks_dict.keys() else '' ) # convert to HTML
htmlnode = ET.fromstring(allworks_dict[key]) # make them into node
if key in ['Description', 'Extra']:
mw_content = allworks_dict[key]
if re.search(gallery_exp, mw_content):
mw_content, gallery_imgs = replace_gallery(mw_content)
print gallery_imgs
print mw_content.encode('utf-8')
allworks_dict[key] = pandoc2html( mw_content if key in allworks_dict.keys() else '' ) # convert to HTML
# print 'allworks_dict[key]', key#, allworks_dict[key]
work_htmltree = html5lib.parseFragment(allworks_dict[key], namespaceHTMLElements=False)
# replace_gallery(work_htmltree)
# print work_htmltree
# print ET.tostring(work_htmltree)
# vimeo/youtube: {{vimeo|44977056}}
# External urls: [http://www.scribd.com/doc/105882261/THE-DICTATOR-S-PRACTICAL-INTERNET-GUIDE-TO-POWER-RETENTION scribd]
elif key in ['Website']:
htmlnode = ET.Element('a', attrib={'href': allworks_dict[key], 'id':key})
htmlnode.text = allworks_dict[key]
work_htmltree = ET.Element('a', attrib={'href': allworks_dict[key], 'id':key})
work_htmltree.text = allworks_dict[key]
elif key in ['Title']:
htmlnode = ET.Element('h1', attrib={'id': key})
htmlnode.text
work_htmltree = ET.Element('h1', attrib={'id': key})
work_htmltree.text
elif key in ['Creator', 'Date', 'Bio']:
htmlnode = ET.Element('p', attrib={'id': key})
htmlnode.text
work_htmltree = ET.Element('p', attrib={'id': key})
work_htmltree.text
elif key in ['Thumbnail_url']:
htmlnode = ET.Element('img', attrib={'src': allworks_dict[key], 'id': key})
work_htmltree = ET.Element('img', attrib={'src': allworks_dict[key], 'id': key})
print ET.tostring(work_htmltree)
else:
htmlnode = None
work_htmltree = None
# remove keys with None value?
print htmlnode
allworks_dict[key] = htmlnode
# print work_htmltree
allworks_dict[key] = work_htmltree
allworks_dict.pop('Thumbnail', None) #remove thumnail
pprint.pprint(allworks_dict)
# pprint.pprint(allworks_dict)
# #p
# elif key in ['Thumbnail_url']:
# #<img>
# else:
# generate_xml()
# work_dict[key] = allworks_dict[key] if key in allworks_dict.keys() else ''
# print work_dict
for key in json_allworks.keys():
graduation_work=json_allworks[key]
print graduation_work['Creator']
print (graduation_work['Creator']).encode('utf-8')
# pprint.pprint(graduation_work)

Loading…
Cancel
Save