operational

master
Castro0o 9 years ago
parent 0f4e675604
commit b629c6e33b

@ -12,18 +12,7 @@ Or index all the gaduation works:
`python mmdc_wiki2web.py --category Graduation_work`
## Thumbnails
thumbnails in work pages are hidden. this can be changed.
style_projectpage.css:
`#thumnail {
display: none;
}`
## To Do
* remove thumbnail from page_imgs

@ -48,13 +48,16 @@ def mw_page_cats(site, page):
return cats
def mw_page_imgsurl(site, page):
def mw_page_imgsurl(site, page, thumb):
#all the imgs in a page
#except thumb: if thumb: remove
#returns list of tuples (img.name, img.fullurl)
imgs = page.images()
imgs = list(imgs)
urls = { img.name: (img.imageinfo)['url'] for img in imgs}
return urls
imgs = list(imgs)
imgs_dict = { img.name:(img.imageinfo)['url'] for img in imgs if (img.imageinfo)['url'] != thumb } # exclude thumb
imgs_dict = { key.capitalize():value for key, value in imgs_dict.items()}
# capilatize image name, so it can be called later
return imgs_dict
def mw_img_url(site, img): #find full of an img
@ -77,14 +80,12 @@ def write_html_file(html_tree, filename):
edited.close()
def parse_work(site, title, content):
# print title, content
workdict = {'Title':title, 'Creator':u'', 'Date':u'', 'Website':u'', 'Thumbnail':u'', 'Bio':u'', 'Description':u'', 'Extra':u''}
if re.match(u'\{\{\Graduation work', content):
template, extra = (re.findall(u'\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
if extra:
workdict['Extra'] = extra
# template's key/value pair
# Note:Extra value is NOT CAPTURED by this regex
workdict['Extra'] = extra
keyval = re.findall(u'\|(.*?)\=(.*?\n)', template, re.DOTALL)
for pair in keyval:
key = pair[0]
@ -96,7 +97,6 @@ def parse_work(site, title, content):
elif 'Website' in key:
val = urllib.unquote( val)
workdict[key]=val
# pprint.pprint(workdict)
return workdict
def pandoc2html(mw_content):

@ -4,7 +4,6 @@
import xml.etree.ElementTree as ET
import html5lib, urllib, pprint
from mmdc_modules import pandoc2html, parse_work, write_html_file, mw_cats, mw_page_imgsurl, mw_img_url, mw_page_text, mwsite, mw_page_cats, mw_page, remove_cats, find_authors, replace_video, replace_img_a_tag, index_addwork
# unsued from bs_modules: replace_gallery, replace_video, index_addwork,
from argparse import ArgumentParser
from random import shuffle as shuffle
@ -17,9 +16,7 @@ p.add_argument("--host", default="pzwiki.wdka.nl")
p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with /")
p.add_argument("--category", "-c", nargs="*", default=[["2015", "Graduation_work"]], action="append", help="category to query, use -c foo -c bar to intersect multiple categories")
p.add_argument("--preview", help='Preview page. Will override category querying. Use: --page "Name Of Wiki Page"')
args = p.parse_args()
print 'args', args
######
@ -32,7 +29,8 @@ def create_page(memberpages, mode):
print member
page = mw_page(site, member)
page_text = mw_page_text(site, page)
articledict = parse_work(site, member, page_text) # create dictionary w/ page co
articledict = parse_work(site, member, page_text) # create dictionary
# Title, Creator, Date, Website, Thumbnail, Bio, Description, Extra
if len(articledict['Creator'])>0 and len(articledict['Title'])>0 and len(articledict['Thumbnail'])>0:
for key in articledict.keys():# convert Extra, Description, Bio to HTML
if key in ['Extra', 'Description', 'Bio']:
@ -43,12 +41,8 @@ def create_page(memberpages, mode):
articledict[key] = remove_cats(articledict['Content'])
articledict[key] = replace_video(articledict['Content'])
# page_imgs = mw_page_imgsurl(site, page)
# page_imgs = { key.capitalize():value for key, value in page_imgs.items()} # capatalize keys, so can be called later
# #articledict = {'Title': member, 'Content': page_text, 'Categories':page_cats, 'Images': page_imgs}
articledict['Imgs'] = mw_page_imgsurl(site, page, articledict['Thumbnail'] )
page_tree = html5lib.parse(page_template, namespaceHTMLElements=False)
page_title = page_tree.find('.//title')
page_title.text=articledict['Title']#.decode('utf-8')
@ -69,19 +63,17 @@ def create_page(memberpages, mode):
page_extra.extend(page_extra_el)
page_website = page_tree.find('.//p[@class="hightlightSidebar"]/a')
page_website.set('href', articledict['Website'])
page_website.text=articledict['Website']
page_website.text=articledict['Website']
page_thumb = page_tree.find('.//img[@id="thumbnail"]')
page_thumb.set('src', articledict['Thumbnail'])
# give work page's imgs full url
imgs = page_tree.findall('.//img')
# for img in imgs:
# img_class = img.get('class')
# if img_class != 'template':
# src =unicode(img.get('src'))
# for pair in workpage_imgs:
# if src.replace("_", " ") in pair[0]:#if img in html matchs img in workpage_imgs
# img.set('src', pair[1])
imgs = page_tree.findall('.//img')
for img in imgs: #replace src: full url
src = (('File:'+img.get('src')).capitalize()).decode('utf-8')
if src in articledict['Imgs'].keys():
url = articledict['Imgs'][src]
img.set('src', url)
# save work page
creator = articledict['Creator'].encode('ascii', 'ignore')
@ -98,7 +90,6 @@ def create_index(indexdict):
index_tree = html5lib.parse(index_template, namespaceHTMLElements=False)
index_container = index_tree.find(".//div[@class='isotope']") #maybe id is imp
for key in indexdict.keys():
print 'key', key
index_addwork( parent=index_container,
workid=key,
href=indexdict[key]['Path'],
@ -108,47 +99,9 @@ def create_index(indexdict):
thumbnail=indexdict[key]['Thumbnail']
)
print '----', indexdict[key]['Title'],indexdict[key]['Path']
# print ET.tostring(tree)
#print index_tree, type(index_tree)
write_html_file(index_tree, 'web/index.html')
# authors = indexdict[article]['Authors']
# path = indexdict[article]['Path']
# issue = indexdict[article]['Category Issue']
# section = indexdict[article]['Category Section']
# topics = indexdict[article]['Category Topics']
# images = indexdict[article]['Images']
# index_section = index_tree.find('.//ul[@id="section_{}"]'.format(section.encode('utf-8')))
# index_item = ET.SubElement(index_section, 'li',
# attrib={'class': " ".join(topics)+" "+section,
# 'data-name': article,
# 'data-section':section,
# 'data-categories': " ".join(topics)+" "+section
# })
# article_link = ET.SubElement(index_item, 'a', attrib={'href':urllib.quote(path)})
# article_link.text = article
# article_author = ET.SubElement(index_item, 'p', attrib={'class':'authorTitle'})
# article_author.text = authors
# for imgurl in images.values():
# print 'imgurl', imgurl
# index_img_item = ET.SubElement(index_imgs_section, 'li',
# attrib={'class': " ".join(topics)+" "+section,
# 'data-name': article,
# 'data-section':section,
# 'data-categories': " ".join(topics)+" "+section,
# 'style':'position: absolute; left: 0px; top: 0px;'
# })
# article_img_link = ET.SubElement(index_img_item, 'a', attrib={'href':urllib.quote(path)})
# article_img_img = ET.SubElement(article_img_link, 'img', attrib={'src':imgurl})
# title=index_tree.find('.//title')
# title.text = 'Beyond Social: ' + issue_current
# index_filename = 'index.html'
# write_html_file(index_tree, index_filename)
#####
# ACTION
#####
@ -163,6 +116,7 @@ if args.preview is not None:
else:
print "** New Index Mode **"
memberpages=mw_cats(site, args)
#memberpages=[u'Unintended Images']
shuffle(memberpages)
print 'memberpages:', memberpages
indexdict = create_page(memberpages, 'index')

Loading…
Cancel
Save