renaming, deleting, documenting

master
Castro0o 9 years ago
parent 4322349886
commit d5c847a2c9

@ -1,8 +1,15 @@
# MMD&C Graduation Website CMS: from Mediawiki to HTML
The process of creating a Website for MMD&C on the backend entails 4 steps:
Update the website with graduation from work from a particular year, by running:
`python mmdc_wiki2web.py --category Graduation_work 2015`
Or index all the gaduation works:
`python mmdc_wiki2web.py --category Graduation_work`
## To Do
* add auxiliar JSON creation
* replace direct API calls for mw library
* create JSON dictionary
* create index
* create pages
* (parse pages

9789
jquery-1.10.2.js vendored

File diff suppressed because it is too large Load Diff

@ -1,42 +0,0 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
##############
# FROM THE JSON DICTIONARY CREATE AN INDEX PAGE
#####
import xml.etree.ElementTree as ET
import html5lib, urllib2, json, pprint, re
from mmdc_modules import write_html_file
json_allworks_file = open('allworks_mmdc.json', 'r') # save json
json_allworks = json.loads(json_allworks_file.read())
def insert_work(parent, element, work_dict, work_key):
if element == 'Graduation_work thumbnail':
print 'Graduation_work thumbnail'
# Content from json_allworks
thumb = work_dict['Thumbnail_url']
date = work_dict['Date']
title = (work_dict['Title']).replace('_', ' ')
creator = work_dict['Creator']
website = work_dict['Website'] if 'Website' in work_dict.keys() else ''
# HTML Elements
child_div = ET.SubElement(parent, 'div', attrib={'class':'item', 'id':work_key})
grandchild_a = ET.SubElement(child_div, 'a', attrib={'href':'#', 'class':'work'}) #href article
grandchild_img = ET.SubElement(grandchild_a, 'img', attrib={'class':'work', 'src':thumb})
grandchild_textbox = ET.SubElement(child_div, 'div', attrib={'class':'work'})
for content in [title, creator, date]:
grandgrandchild_p = ET.SubElement(grandchild_textbox, 'p', attrib={'class':'work'})
grandgrandchild_p.text = content
def edit_index(filepath, json_allworks_dict):
input_file = open(filepath, 'r')
tree = html5lib.parse(input_file, namespaceHTMLElements=False)
div_section02 = (tree.findall(".//div[@id='section02']"))[0]
for key in json_allworks_dict.keys():
graduation_work=json_allworks_dict[key]
insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key )
return tree
index_tree = edit_index('web/index-template.html', json_allworks)
write_html_file(index_tree, 'web/index.html')

@ -1,225 +0,0 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
##############
# FROM THE JSON DICTIONARY CREATE AN INDEX PAGE
#####
import xml.etree.ElementTree as ET
import html5lib, urllib2, json, pprint, re
from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url, write_html_file, pandoc
#import mmdc_create_json import api_thumb_url
json_allworks_file = open('allworks_mmdc.json', 'r') # save json
json_allworks = json.loads(json_allworks_file.read())
pages_path = 'web/work'
#def generate_xml():
gallery_exp=re.compile('<gallery>.*?</gallery>')
file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')
def replace_gallery(content):
gallery_imgs = []
gallery_found = re.findall(gallery_exp, content)
content = re.sub(gallery_exp, '', content)
for gallery in gallery_found: # in case there is more than 1 <gallery>
allfiles =re.findall(img_exp, gallery)
for imgfile in allfiles:
imgfile = imgfile[1]
imgsrc = api_file_url(imgfile) # search for original image
gallery_imgs.append(imgsrc)
print 'gallery_imgs', gallery_imgs
# from <gallery>.*</gallery> imgs, return list of img ET elements
# replace <gallery>.*</gallery> with ''
return content, gallery_imgs
def replace_video(content):
videos = []
videos_found = re.findall(video_exp, content)
for video in videos_found:
video_provider = str(video[0])
video_hash = str(video[1])
video_src = None
if (video_provider.lower()) == 'youtube':
video_src="https://www.youtube.com/embed/" + video_hash
elif (video_provider.lower()) == 'vimeo':
video_src="https://player.vimeo.com/video/" + video_hash
if video_src:
videos.append(video_src)
iframe = "<iframe src='{}' width='600px' height='450px'></iframe>".format(video_src)
# content = re.sub(video_exp, ' iframe ', content)
else:
content = re.sub(video_exp, '', content)
return content, videos
def workpage_div_content(tree, div_id, element, content):
parent_str = ".//div[@id='{}']".format( div_id.lower() )
parent = tree.find(parent_str)
if element is 'img':
lxml.SubElement(parent, 'img', attrib={'src': content})
elif element in ['p','h1','h2']:
sub = lxml.SubElement(parent, element)
sub.text = content
elif element == 'a':
sub = ET.SubElement(parent, element, attrib={'href':content})
sub.text = content
else:# element == 'span':
test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})
test = ET.SubElement(parent, test_el)
# parent.append(test)
# sub = ET.SubElement(parent, element)
#sub.text = content = ' '+ div_id
# parent.append(content)
print parent_str, ET.tostring(parent)
def create_workpage( work, work_key, tree): # replace text content in dict with html nodes, holding the content
# pprint.pprint( work)
for key in work.keys():
# print work[key]
div_header = tree.find(".//div[@class='header']")
div_body = tree.find(".//div[@class='body']")
div_av = tree.find(".//div[@class='av']")
if key in ['Creator', 'Date', 'Bio']:
workpage_div_content(tree, key, 'p', work[key])
elif key == 'Title':
workpage_div_content(tree, key, 'h1', work[key])
elif key == 'Thumbnail':
thumb = api_file_url(work[key])
workpage_div_content(tree, key, 'img', thumb)
elif key == 'Website':
workpage_div_content(tree, key, 'a', work[key])
elif key in ['Description', 'Extra']:
test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})
workpage_div_content(tree, key, test_el, '')
# HOW TO INSERT EXISTING HTML CHUNCK INTO TEMPLATE ???
# test_sub = ET.SubElement(test_el, 'span')
# test_sub.text = content
# sub = ET.SubElement(parent, test_el)
# mw_content = work[key]
# # if re.search(gallery_exp, mw_content):
# # # replace_gallery must replace the gallery inline
# # mw_content, gallery_imgs = replace_gallery(mw_content)
# # work['Images'] = gallery_imgs
# # for imgsrc in gallery_imgs:
# # img_el = ET.SubElement(div_av, 'img', attrib={'src': imgsrc})
# # elif re.search(video_exp, mw_content):
# # mw_content, videos = replace_video(mw_content)
# # work['Video'] = videos
# # # for video in videos:
# # # iframe_el = ET.SubElement(div_av, 'iframe', attrib={'src': video, 'width':'600px', 'height':'450px'})
# # # print 'VIDEO', ET.tostring(iframe_el)
# print '--------------'
# print 'mw_content', mw_content
# print '--------------'
# html_content = pandoc2html( mw_content if key in work.keys() else '') # convert to HTML
# print 'html_content', html_content
# document_el = html5lib.parse(html_content, namespaceHTMLElements=False)#ET.fromstring(html_content)
# print ET.tostring(document_el)
# print 'document_el', document_el, ET.iselement(document_el)
# all_el = document_el.findall('body//')
# if all_el:
# all_el.reverse()
# for el in all_el:
# print 'el', ET.tostring(el)
# div_body.append(el)
# imgs = document_el.findall('.//img')
# # if imgs:
# # for img in imgs:
# # src = api_file_url(img.get('src'))
# # img.set('src', src)
# # print 'IMG', img, src
# print "****************************"
# print ET.tostring(div_body)
# print "****************************"
# # elif key in ['Thumbnail_url']:
# # print Thumbnail_url, work[key]
# elif key in ['Thumbnail_url']:
# print 'THUMBNAIL_URL', work[key]
# # ERROR - Thumbnail url is None
# # work_el = ET.SubElement(div_header, 'img', attrib={'src': work[key], 'id': key})
# else:
# work_el = None # remove keys with None value?
# work[key] = work_el
# work.pop('Thumbnail_url', None) #remove Thumbnail_url
# pprint.pprint(work)
def edit_index(filepath, json_allworks_dict):
input_file = open(filepath, 'r')
tree = html5lib.parse(input_file, namespaceHTMLElements=False)
div_section02 = tree.find(".//div[@id='section02']")
for key in json_allworks_dict.keys():
graduation_work=json_allworks_dict[key]
insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key )
return tree
#worktemplate = open('web/work-template.html', 'r')
for key in json_allworks.keys():
work=json_allworks[key]
creator = work['Creator'].encode('utf-8') if 'Creator' in work else ''
date = work['Date'] if 'Date' in work else ''
website=work['Website'] if 'Website' in work else ''
thumbnail=work['Thumbnail_url'] if 'Thumbnail_url' in work else ''
bio=(work['Bio'].encode('utf-8')).replace('"','\\"') if 'Bio' in work else ''
description=(work['Description']).replace('"','\\"').encode('utf-8') if 'Description' in work else ''
extra=(work['Extra'].encode('utf-8')).replace('"','\\"') if 'Extra' in work else ''
work_file = 'web/{}-{}-{}.html'.format(work['Date'], (work['Creator'].encode('ascii', 'ignore')).replace(' ','_'), key)
title = work['Title']
print work_file, website
# if key in work.keys() else ''
if extra:
extra_html = pandoc2html(extra)
print (extra_html)
# pandoc( filename=work_file, \
# template='web/work-template.html', \
# title=title, \
# creator=creator, \
# date=date, \
# website=website, \
# thumbnail=thumbnail, \
# bio=bio, \
# description=description, \
# extra=extra ) # convert to HTML
# print '= = = = = = ='
# print html_content
# print '= = = = = = ='
# creator = (work['Creator'].encode('ascii', 'ignore')).replace(' ','_')
# description = work['Description']
# print work_file
# work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False)
# create_workpage(work, key, work_tree )
# write_html_file(work_tree, work_file)
### ISSSUES
# pandoc mw->HTML NOT WORKING
# sub gallery/videos with corresponding elements
# Gallaries, Files, videos, in orginal places correct place
# Specificy positions in template
# insert <p> into <div class="body">
# separate Extra and Description

@ -1,47 +0,0 @@
<!DOCTYPE HTML>
<html>
<head>
<meta charset="utf-8" />
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
<!--script type="text/javascript" src="jquery-1.10.2.js"></script-->
<script type="text/javascript">
var myjson;
function query(workid){
console.log(workid);
var title = myjson[workid]['Title'];
var creator = myjson[workid]['Creator'];
var description = myjson[workid]['Description'];
console.log(title, creator, description);
}
function readJSON(){
$.getJSON( "allworks_mmdc.json", function(data){
myjson=data;
console.log(myjson);
console.log(Object.keys(myjson));
testJSON(myjson);
hover();
})
$('span').hover(
function(){
var thisid = $(this).attr('id')
query(thisid);
}
)
}
</script>
</head>
<body onload="javascript:readJSON();" >
<h3>Testing <span id="9961">JSON</span></h3>
<h3>Hover over the words <span id="9939">JSON</span> and look at the console</h3>
</body>
</html>

@ -0,0 +1,2 @@
#!/bin/sh
python mmdc_wiki2web.py --category Graduation_work 2015
Loading…
Cancel
Save