renaming, deleting, documenting

10 years ago · d5c847a2c9
parent 4322349886
commit d5c847a2c9
7 changed files with 14 additions and 10108 deletions
--- a/README.md
+++ b/README.md
@ -1,8 +1,15 @@
 # MMD&C Graduation Website CMS: from Mediawiki to HTML

-The process of creating a Website for MMD&C on the backend entails 4 steps:
+Update the website with graduation from work from a particular year, by running:
+`python mmdc_wiki2web.py --category Graduation_work 2015`
+
+Or index all the gaduation works:
+`python mmdc_wiki2web.py --category Graduation_work` 
+
+
+## To Do
+* add auxiliar JSON creation
+* replace direct API calls for mw library
+
+

-* create JSON dictionary
-* create index
-* create pages
-* (parse pages
--- a/jquery-1.10.2.js
+++ b/jquery-1.10.2.js
--- a/mmdc_index.py
+++ b/mmdc_index.py
@ -1,42 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-##############
-# FROM THE JSON DICTIONARY CREATE AN INDEX PAGE
-#####
-import xml.etree.ElementTree as ET
-import html5lib, urllib2, json, pprint, re
-from mmdc_modules import write_html_file
-
-json_allworks_file = open('allworks_mmdc.json', 'r') # save json 
-json_allworks = json.loads(json_allworks_file.read())
-
-def insert_work(parent, element, work_dict, work_key):
-    if element == 'Graduation_work thumbnail':
-        print 'Graduation_work thumbnail'
-        # Content from json_allworks
-        thumb = work_dict['Thumbnail_url']
-        date = work_dict['Date']
-        title = (work_dict['Title']).replace('_', ' ')
-        creator = work_dict['Creator']
-        website = work_dict['Website'] if 'Website' in work_dict.keys() else ''
-        # HTML Elements
-        child_div = ET.SubElement(parent, 'div', attrib={'class':'item', 'id':work_key})
-        grandchild_a = ET.SubElement(child_div, 'a', attrib={'href':'#', 'class':'work'}) #href article
-        grandchild_img = ET.SubElement(grandchild_a, 'img', attrib={'class':'work', 'src':thumb})
-        grandchild_textbox = ET.SubElement(child_div, 'div', attrib={'class':'work'})
-        for content in [title, creator, date]:
-            grandgrandchild_p = ET.SubElement(grandchild_textbox, 'p', attrib={'class':'work'})
-            grandgrandchild_p.text = content
-            
-def edit_index(filepath, json_allworks_dict):
-    input_file = open(filepath, 'r') 
-    tree = html5lib.parse(input_file, namespaceHTMLElements=False)
-    div_section02 = (tree.findall(".//div[@id='section02']"))[0]
-    for key in json_allworks_dict.keys():
-        graduation_work=json_allworks_dict[key]
-        insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key )
-    return tree
-    
-index_tree = edit_index('web/index-template.html', json_allworks)
-write_html_file(index_tree, 'web/index.html')
--- a/mmdc_pages.py
+++ b/mmdc_pages.py
@ -1,225 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-
-##############
-# FROM THE JSON DICTIONARY CREATE AN INDEX PAGE
-#####
-import xml.etree.ElementTree as ET
-import html5lib, urllib2, json, pprint, re
-from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url,  write_html_file, pandoc
-#import mmdc_create_json import api_thumb_url
-json_allworks_file = open('allworks_mmdc.json', 'r') # save json 
-json_allworks = json.loads(json_allworks_file.read())
-pages_path = 'web/work'
-
-#def generate_xml():
-gallery_exp=re.compile('<gallery>.*?</gallery>')
-file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
-img_exp=re.compile('(File:|Image:)((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|Image:|<\/gallery>)', re.I)
-video_exp=re.compile('\{\{(.*?)\|(.*?)\}\}')
-
-def replace_gallery(content):
-    gallery_imgs = []
-    gallery_found = re.findall(gallery_exp, content)
-    content = re.sub(gallery_exp,  '', content)
-    for gallery in gallery_found: # in case there is more than 1 <gallery>
-        allfiles =re.findall(img_exp, gallery)
-        for imgfile in allfiles:
-            imgfile = imgfile[1]
-            imgsrc = api_file_url(imgfile) # search for original image
-            gallery_imgs.append(imgsrc)
-            print 'gallery_imgs', gallery_imgs
-    # from <gallery>.*</gallery> imgs, return list of img ET elements
-    # replace <gallery>.*</gallery> with ''
-    return content, gallery_imgs
-
-def replace_video(content):
-    videos = []
-    videos_found = re.findall(video_exp, content)
-    for video in videos_found:
-        video_provider =  str(video[0])
-        video_hash = str(video[1])
-        video_src = None
-        if (video_provider.lower()) == 'youtube':            
-            video_src="https://www.youtube.com/embed/" + video_hash
-        elif (video_provider.lower()) == 'vimeo':            
-            video_src="https://player.vimeo.com/video/" + video_hash
-        if video_src:
-            videos.append(video_src)
-            iframe = "<iframe src='{}' width='600px' height='450px'></iframe>".format(video_src)
-#            content = re.sub(video_exp, '       iframe    ', content)
-        else:
-            content = re.sub(video_exp,  '', content)
-    return content, videos
-    
-
-def workpage_div_content(tree, div_id, element, content):            
-    parent_str = ".//div[@id='{}']".format( div_id.lower() )
-    parent = tree.find(parent_str)
-    if element is 'img':
-        lxml.SubElement(parent, 'img', attrib={'src': content})    
-    elif element in ['p','h1','h2']:
-        sub = lxml.SubElement(parent, element)    
-        sub.text = content
-    elif element == 'a':
-        sub = ET.SubElement(parent, element, attrib={'href':content})    
-        sub.text = content
-    else:# element == 'span':
-        test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})
-        test = ET.SubElement(parent, test_el)
-#        parent.append(test)
-#        sub = ET.SubElement(parent, element)    
-        #sub.text = content  = ' '+ div_id
- #        parent.append(content)
-        
-    print  parent_str, ET.tostring(parent)
-
-
-def create_workpage( work, work_key, tree): # replace text content in dict with html nodes, holding the content
-#    pprint.pprint( work)
-    for key in work.keys():
-#        print work[key]
-
-        div_header = tree.find(".//div[@class='header']")
-        div_body = tree.find(".//div[@class='body']")
-        div_av = tree.find(".//div[@class='av']")
-
-        if key in ['Creator', 'Date', 'Bio']:
-            workpage_div_content(tree, key, 'p', work[key])
-        elif key == 'Title':
-            workpage_div_content(tree, key, 'h1', work[key])
-        elif key == 'Thumbnail':
-            thumb = api_file_url(work[key])
-            workpage_div_content(tree, key, 'img', thumb)                        
-        elif key == 'Website':
-            workpage_div_content(tree, key, 'a', work[key])            
-        elif key in ['Description', 'Extra']:
-            test_el = ET.Element('div', attrib={'id':'test', 'style':'background:yellow'})
-            workpage_div_content(tree, key, test_el, '')
-
-            # HOW TO INSERT EXISTING HTML CHUNCK INTO TEMPLATE ???
-
-            
-            # test_sub = ET.SubElement(test_el, 'span')
-        # test_sub.text = content
-        # sub = ET.SubElement(parent, test_el)    
-
-        #             mw_content = work[key]
-# #             if re.search(gallery_exp, mw_content):
-# #                 # replace_gallery must replace the gallery inline
-# #                 mw_content, gallery_imgs = replace_gallery(mw_content)
-# #                 work['Images'] = gallery_imgs
-# #                 for imgsrc in gallery_imgs:
-# #                     img_el = ET.SubElement(div_av, 'img', attrib={'src': imgsrc})
-# #             elif re.search(video_exp, mw_content):
-# #                 mw_content, videos = replace_video(mw_content)
-# #                 work['Video'] = videos
-# # #                for video in videos:
-# # #                    iframe_el = ET.SubElement(div_av, 'iframe', attrib={'src': video, 'width':'600px', 'height':'450px'})
-# # #                    print 'VIDEO',  ET.tostring(iframe_el)
-
-#             print '--------------'                    
-#             print 'mw_content',  mw_content
-#             print '--------------'
-#             html_content = pandoc2html( mw_content if key in work.keys() else '') # convert to HTML
-#             print 'html_content', html_content
-#             document_el = html5lib.parse(html_content, namespaceHTMLElements=False)#ET.fromstring(html_content)
-#             print ET.tostring(document_el)
-#             print 'document_el', document_el, ET.iselement(document_el)
-#             all_el = document_el.findall('body//')
-#             if all_el:
-#                 all_el.reverse()
-
-#             for el in all_el:
-#                 print 'el', ET.tostring(el) 
-#                 div_body.append(el) 
-#             imgs = document_el.findall('.//img')
-#             # if imgs:
-#             #     for img in  imgs:
-#             #         src = api_file_url(img.get('src'))
-#             #         img.set('src', src)                    
-#             #         print 'IMG', img, src
-#             print "****************************"
-#             print ET.tostring(div_body)
-#             print "****************************"
-
-# #        elif key in ['Thumbnail_url']:
-# #            print Thumbnail_url, work[key]
-            
-            
-#         elif key in ['Thumbnail_url']:
-#             print  'THUMBNAIL_URL',  work[key]
-#             # ERROR - Thumbnail url is None
-#             # work_el = ET.SubElement(div_header, 'img', attrib={'src': work[key], 'id': key})        
-#         else:
-#             work_el = None       # remove keys with None value?
-#         work[key] = work_el
-#         work.pop('Thumbnail_url', None) #remove Thumbnail_url
-#     pprint.pprint(work)
-
-
-def edit_index(filepath, json_allworks_dict):
-    input_file = open(filepath, 'r') 
-    tree = html5lib.parse(input_file, namespaceHTMLElements=False)
-    div_section02 = tree.find(".//div[@id='section02']")
-    for key in json_allworks_dict.keys():
-        graduation_work=json_allworks_dict[key]
-        insert_work(div_section02, 'Graduation_work thumbnail', graduation_work, key )
-    return tree
-
-
-#worktemplate = open('web/work-template.html', 'r') 
-for key in json_allworks.keys():
-    work=json_allworks[key]    
-    creator = work['Creator'].encode('utf-8') if 'Creator' in work else ''
-    date = work['Date'] if 'Date' in work else ''
-    website=work['Website'] if 'Website' in work else '' 
-    thumbnail=work['Thumbnail_url'] if 'Thumbnail_url' in work else ''
-    bio=(work['Bio'].encode('utf-8')).replace('"','\\"') if 'Bio' in work else '' 
-    description=(work['Description']).replace('"','\\"').encode('utf-8') if 'Description' in work else '' 
-    extra=(work['Extra'].encode('utf-8')).replace('"','\\"') if 'Extra' in work else ''     
-    work_file = 'web/{}-{}-{}.html'.format(work['Date'], (work['Creator'].encode('ascii', 'ignore')).replace(' ','_'), key)
-    title = work['Title']
-
-    print work_file, website
-    # if key in work.keys() else ''
-
-    if extra:
-        extra_html = pandoc2html(extra)
-        print (extra_html)
-
-    # pandoc( filename=work_file, \
-    #         template='web/work-template.html', \
-    #         title=title, \
-    #         creator=creator, \
-    #         date=date, \
-    #         website=website, \
-    #         thumbnail=thumbnail, \
-    #         bio=bio, \
-    #         description=description, \
-    #         extra=extra ) # convert to HTML
-    # print '= = = = = = ='
-    # print html_content 
-    # print '= = = = = = ='
-    
-#    creator = (work['Creator'].encode('ascii', 'ignore')).replace(' ','_')        
-#    description = work['Description']
-
-#        print work_file
-#        work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False)
-#        create_workpage(work, key, work_tree )
-#        write_html_file(work_tree, work_file)
-
-
-### ISSSUES
-# pandoc mw->HTML   NOT WORKING
-# sub gallery/videos with corresponding elements
-
-
-
-# Gallaries, Files, videos,  in orginal places correct place
-# Specificy  positions in template 
-# insert <p> into <div class="body"> 
-# separate Extra and Description
-
-
--- a/prototype_page.py
+++ b/prototype_page.py
--- a/prototype_json.html
+++ b/prototype_json.html
@ -1,47 +0,0 @@
-<!DOCTYPE HTML>
-<html>
-  <head>
-    <meta charset="utf-8" />
- <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
-    <!--script type="text/javascript" src="jquery-1.10.2.js"></script-->
-    <script type="text/javascript">
-
-
-var myjson;
-
-function query(workid){
-    console.log(workid);
-    var title = myjson[workid]['Title'];
-    var creator = myjson[workid]['Creator'];
-    var description = myjson[workid]['Description'];
-    console.log(title, creator, description);
-
-}
-
-function readJSON(){
-    $.getJSON( "allworks_mmdc.json", function(data){
-    	myjson=data;
-	console.log(myjson);	
-        console.log(Object.keys(myjson));
-	testJSON(myjson);
-	hover();
-    })    
-
-$('span').hover(
-    function(){
-	var thisid = $(this).attr('id')
-	query(thisid);
-    }
-)
-
-}
-      
-    </script>
-
-</head>
-
-  <body onload="javascript:readJSON();" >
-    <h3>Testing <span id="9961">JSON</span></h3>
-    <h3>Hover over the words <span id="9939">JSON</span> and look at the console</h3>
-  </body>
-</html>
--- a/update.sh
+++ b/update.sh
@ -0,0 +1,2 @@
+#!/bin/sh
+python mmdc_wiki2web.py --category Graduation_work 2015