cleaning scripts

10 years ago · 4322349886
parent 1b1933aece
commit 4322349886
2 changed files with 4 additions and 79 deletions
--- a/mmdc_modules.py
+++ b/mmdc_modules.py
@ -41,8 +41,6 @@ def api_page(title, query):

 ##############################
 # CATEGORIES AND PAGES
-################ 
-# * MUST BE REPLACE BY SMARTER CODE (USING PY MD LIB)
 ##############################
 def mw_cats(args):
    site = Site(args.host, path=args.path)
@ -62,37 +60,6 @@ def mw_cats(args):
    return [p.name  for p in results]


-def api_pagecategories(pageid):
-    '''Find all the categories, and their parent category of a page '''
-    query = 'action=query&pageids={}&prop=categories'.format(pageid)
-    url = endpoint + query
-    request = urllib2.urlopen(url)
-    jsonp = json.loads(request.read())    
-    json_dic = jsonp['query']['pages']
-    page_id =  json_dic.keys()[0]
-    page_categories = json_dic[page_id][u'categories']
-    all_cats = [ entry[u'title'].encode('utf-8') for entry in page_categories ] #.replace('Category:', '')
-    return all_cats
-
-
-def api_pagesincategories(category, year):
-    # Find all pages incategory and add to allworks dictionary
-    category =  category.replace(' ', '_')
-    apiCatMembers = endpoint + 'action=query&list=categorymembers&cmlimit=1000&cmtitle=Category:{}'.format(category)    
-    request = urllib2.urlopen(apiCatMembers)
-    jsonp = json.loads(request.read())    
-    graduationWorkMembers = jsonp['query']['categorymembers']
-    intersectCatMembers = []
-    if year:          
-        for member in graduationWorkMembers:
-            page_cats = api_pagecategories(member['pageid'])
-            if ('Category:{}'.format(year)) in page_cats:
-                print year, 'in', page_cats
-                intersectCatMembers.append(member)# add member to intersectCatMembers
-    else:
-        intersectCatMembers =  graduation_work_members
-    return intersectCatMembers
-
 def api_file_url(filename): # get full urls
    page_content_dict = api_page(filename, 'file')   
    if 'imageinfo' in page_content_dict.keys():
@ -115,31 +82,6 @@ def write_html_file(html_tree, filename):
    edited.write(html)
    edited.close()

-# mw article modules
-def parse_work_page(title, content):
-#    content = content.encode('utf-8')
-    if re.match('\{\{\Graduation work', content):
-        work_dict = {}
-        work_dict['Title']=title
-        template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
-        keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL)    # template's key/value pair
-        if extra: #append extra
-            extra = ('Extra', extra)
-            keyval.append(extra) #?
-            for pair in keyval:
-                key = pair[0]
-                val = pair[1]
-                val = val.replace('\n','')            
-                if 'Creator' in key:
-                    val = val.replace(', ', '')
-                elif 'Thumbnail' in key:
-                    thumburl = api_thumb_url(val)
-                    work_dict['Thumbnail_url']=thumburl
-                work_dict[key]=val
-            return work_dict, extra
-
-        
-
 # Alternative to parse_work_page
 def parse_work(title, content):
    workdict = {'Title':title, 'Creator':'', 'Date':'', 'Website':'', 'Thumbnail':'', 'Bio':'', 'Description':'', 'Extra':''}    
@ -158,14 +100,11 @@ def parse_work(title, content):
            elif 'Thumbnail' in key:
                val = api_thumb_url(val)
            elif 'Website' in key:
-                val = urllib.unquote(val)
-                
+                val = urllib.unquote(val)                
            workdict[key]=val.encode('utf-8')
 #    pprint.pprint(workdict)
    return workdict

-    
-# Conversion Modules
 def pandoc2html(mw_content):
    '''convert individual mw sections to html'''
    mw_content = mw_content.encode('utf-8')
@ -177,14 +116,6 @@ def pandoc2html(mw_content):
    html = (p2.communicate())[0]
    return html
    
-def img_fullurl(parent):
-    imgs = parent.findall('.//img')
-    for img in imgs:
-        src = img.get('src')
-        fullurl =  api_thumb_url(src)
-        if fullurl != None:            
-            img.set('src', fullurl)
-
        
 gallery_exp=re.compile('<gallery>(.*?)</gallery>', re.S)
 imgfile_exp=re.compile('(File:(.*?)\.(gif|jpg|jpeg|png))')
@ -213,11 +144,5 @@ def index_addwork(parent, workid, href, thumbnail, title, creator, date):
                                                     'data-date':date})

    grandchild_a = ET.SubElement(child_div, 'a', attrib={'href':href, 'class':'work'}) 
-    grandgrandchild_img = ET.SubElement(grandchild_a, 'img', attrib={'class':'work', 'src':thumbnail})
-# TEXT CONTENT ?
-#    grandchild_text = ET.SubElement(child_div, 'div', attrib={'class':'work'}) 
-#    grandchild_text.text=creator
-
-
-    
+    grandgrandchild_img = ET.SubElement(grandchild_a, 'img', attrib={'class':'work', 'src':thumbnail})    
    # need to add css width to div.item
--- a/prototype_page.py
+++ b/prototype_page.py
@ -16,8 +16,8 @@
 # build all pages

 import xml.etree.ElementTree as ET
-import html5lib, re, pprint
-from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file, mw_cats
+import html5lib, pprint
+from mmdc_modules import api_page, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, index_addwork, write_html_file, mw_cats
 from argparse import ArgumentParser

 p = ArgumentParser()