From 4322349886d2d29422cd3bb87b82303a1c0ec3ea Mon Sep 17 00:00:00 2001 From: Castro0o Date: Thu, 7 May 2015 08:20:43 +0200 Subject: [PATCH] cleaning scripts --- mmdc_modules.py | 79 ++--------------------------------------------- prototype_page.py | 4 +-- 2 files changed, 4 insertions(+), 79 deletions(-) diff --git a/mmdc_modules.py b/mmdc_modules.py index 85b353d..86ebd1a 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -41,8 +41,6 @@ def api_page(title, query): ############################## # CATEGORIES AND PAGES -################ -# * MUST BE REPLACE BY SMARTER CODE (USING PY MD LIB) ############################## def mw_cats(args): site = Site(args.host, path=args.path) @@ -62,37 +60,6 @@ def mw_cats(args): return [p.name for p in results] -def api_pagecategories(pageid): - '''Find all the categories, and their parent category of a page ''' - query = 'action=query&pageids={}&prop=categories'.format(pageid) - url = endpoint + query - request = urllib2.urlopen(url) - jsonp = json.loads(request.read()) - json_dic = jsonp['query']['pages'] - page_id = json_dic.keys()[0] - page_categories = json_dic[page_id][u'categories'] - all_cats = [ entry[u'title'].encode('utf-8') for entry in page_categories ] #.replace('Category:', '') - return all_cats - - -def api_pagesincategories(category, year): - # Find all pages incategory and add to allworks dictionary - category = category.replace(' ', '_') - apiCatMembers = endpoint + 'action=query&list=categorymembers&cmlimit=1000&cmtitle=Category:{}'.format(category) - request = urllib2.urlopen(apiCatMembers) - jsonp = json.loads(request.read()) - graduationWorkMembers = jsonp['query']['categorymembers'] - intersectCatMembers = [] - if year: - for member in graduationWorkMembers: - page_cats = api_pagecategories(member['pageid']) - if ('Category:{}'.format(year)) in page_cats: - print year, 'in', page_cats - intersectCatMembers.append(member)# add member to intersectCatMembers - else: - intersectCatMembers = graduation_work_members - return intersectCatMembers - def api_file_url(filename): # get full urls page_content_dict = api_page(filename, 'file') if 'imageinfo' in page_content_dict.keys(): @@ -115,31 +82,6 @@ def write_html_file(html_tree, filename): edited.write(html) edited.close() -# mw article modules -def parse_work_page(title, content): -# content = content.encode('utf-8') - if re.match('\{\{\Graduation work', content): - work_dict = {} - work_dict['Title']=title - template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0] - keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL) # template's key/value pair - if extra: #append extra - extra = ('Extra', extra) - keyval.append(extra) #? - for pair in keyval: - key = pair[0] - val = pair[1] - val = val.replace('\n','') - if 'Creator' in key: - val = val.replace(', ', '') - elif 'Thumbnail' in key: - thumburl = api_thumb_url(val) - work_dict['Thumbnail_url']=thumburl - work_dict[key]=val - return work_dict, extra - - - # Alternative to parse_work_page def parse_work(title, content): workdict = {'Title':title, 'Creator':'', 'Date':'', 'Website':'', 'Thumbnail':'', 'Bio':'', 'Description':'', 'Extra':''} @@ -158,14 +100,11 @@ def parse_work(title, content): elif 'Thumbnail' in key: val = api_thumb_url(val) elif 'Website' in key: - val = urllib.unquote(val) - + val = urllib.unquote(val) workdict[key]=val.encode('utf-8') # pprint.pprint(workdict) return workdict - -# Conversion Modules def pandoc2html(mw_content): '''convert individual mw sections to html''' mw_content = mw_content.encode('utf-8') @@ -177,14 +116,6 @@ def pandoc2html(mw_content): html = (p2.communicate())[0] return html -def img_fullurl(parent): - imgs = parent.findall('.//img') - for img in imgs: - src = img.get('src') - fullurl = api_thumb_url(src) - if fullurl != None: - img.set('src', fullurl) - gallery_exp=re.compile('(.*?)', re.S) imgfile_exp=re.compile('(File:(.*?)\.(gif|jpg|jpeg|png))') @@ -213,11 +144,5 @@ def index_addwork(parent, workid, href, thumbnail, title, creator, date): 'data-date':date}) grandchild_a = ET.SubElement(child_div, 'a', attrib={'href':href, 'class':'work'}) - grandgrandchild_img = ET.SubElement(grandchild_a, 'img', attrib={'class':'work', 'src':thumbnail}) -# TEXT CONTENT ? -# grandchild_text = ET.SubElement(child_div, 'div', attrib={'class':'work'}) -# grandchild_text.text=creator - - - + grandgrandchild_img = ET.SubElement(grandchild_a, 'img', attrib={'class':'work', 'src':thumbnail}) # need to add css width to div.item diff --git a/prototype_page.py b/prototype_page.py index 66e06e7..07b0e3a 100755 --- a/prototype_page.py +++ b/prototype_page.py @@ -16,8 +16,8 @@ # build all pages import xml.etree.ElementTree as ET -import html5lib, re, pprint -from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, gallery_exp, video_exp, api_pagesincategories, index_addwork, write_html_file, mw_cats +import html5lib, pprint +from mmdc_modules import api_page, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, index_addwork, write_html_file, mw_cats from argparse import ArgumentParser p = ArgumentParser()