working..

master
Castro0o 10 years ago
parent 99e7df5270
commit c8aa3bef42

@ -7,6 +7,12 @@ Or index all the gaduation works:
`python mmdc_wiki2web.py --category Graduation_work` `python mmdc_wiki2web.py --category Graduation_work`
## template files
web/page-template.html
web/index-template.html
## To Do ## To Do
* add auxiliar JSON creation * add auxiliar JSON creation
* replace direct API calls for mw library * replace direct API calls for mw library

@ -77,51 +77,60 @@ for member in memberpages:
if key in ['Extra', 'Description', 'Bio'] and workdict[key]: if key in ['Extra', 'Description', 'Bio'] and workdict[key]:
workdict[key] = pandoc2html( (workdict[key].decode('utf-8')) ) workdict[key] = pandoc2html( (workdict[key].decode('utf-8')) )
print 'EXTRA', type (workdict['Extra']), workdict['Extra']
# fill template with dictionary/mw_page values # fill template with dictionary/mw_page values
workpage_html = page_template.format(title=(workdict['Title']), pprint.pprint(workdict)
creator=(workdict['Creator']),
date=workdict['Date'], if len(workdict['Creator'])>1 and len(workdict['Creator'])>1:
website=workdict['Website'],
thumbnail=workdict['Thumbnail'], workpage_html = page_template.format(title=('Title'),#workdict['Title']),
bio=(workdict['Bio']), creator=(workdict['Creator']),
description=(workdict['Description']), date=workdict['Date'],
extra=( workdict['Extra'] ) website=workdict['Website'],
) thumbnail=workdict['Thumbnail'],
bio=(workdict['Bio']),
# parse workpage_html # process html: img full url description=('desc'),#workdict['Description']),
tree = html5lib.parse(workpage_html, namespaceHTMLElements=False) extra=( 'extra')#workdict['Extra'] )
imgs = tree.findall('.//img') )
for img in imgs:
if img.get('id') is not 'logo': # parse workpage_html # process html: img full url
src = img.get('src') tree = html5lib.parse(workpage_html, namespaceHTMLElements=False)
newsrc = api_file_url(src) ## MOVE FULL URl OPERATION TO MW CONTENT imgs = tree.findall('.//img')
if newsrc: for img in imgs:
img.set('src', newsrc) if img.get('id') is not 'logo':
src = img.get('src')
website = tree.find('.//p[@class="hightlightSidebar"]/a') newsrc = api_file_url(src) ## MOVE FULL URl OPERATION TO MW CONTENT
if not website.get('href'): # if no website link. if newsrc:
#remove empty <a> .//p[@class="hightlightSidebar"]/a img.set('src', newsrc)
website_parent = tree.find('.//p[@class="hightlightSidebar"]')
website_parent.remove(website) website = tree.find('.//p[@class="hightlightSidebar"]/a')
if not website.get('href'): # if no website link.
# save workpage_html #remove empty <a> .//p[@class="hightlightSidebar"]/a
workpage_html = ET.tostring(tree) website_parent = tree.find('.//p[@class="hightlightSidebar"]')
creator = workdict['Creator'].decode('ascii', 'ignore') website_parent.remove(website)
creator = creator.replace(' ','_')
work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator) # save workpage_html
work_file = open(work_filename, "w") workpage_html = ET.tostring(tree)
work_file.write(workpage_html) creator = workdict['Creator'].decode('ascii', 'ignore')
work_file.close() creator = creator.replace(' ','_')
work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator)
# insert work to index work_file = open(work_filename, "w")
index_addwork( parent=index_container, work_file.write(workpage_html)
workid=key, work_file.close()
href=work_filename.replace('web/',''),
title=(workdict['Title']).decode('utf-8'),
creator=workdict['Creator'].decode('utf-8'), print 'DICT', type(workdict['Title'])
date=workdict['Date'], # insert work to index
thumbnail=workdict['Thumbnail'], index_addwork( parent=index_container,
) workid=key,
href=work_filename.replace('web/',''),
title=(workdict['Title']),#.decode('utf-8'),
creator=workdict['Creator'].decode('utf-8'),
date=workdict['Date'],
thumbnail=workdict['Thumbnail'],
)
write_html_file(index_tree, 'web/index.html') write_html_file(index_tree, 'web/index.html')
print print

Loading…
Cancel
Save