working..

master
Castro0o 9 years ago
parent 99e7df5270
commit c8aa3bef42

@ -7,6 +7,12 @@ Or index all the gaduation works:
`python mmdc_wiki2web.py --category Graduation_work`
## template files
web/page-template.html
web/index-template.html
## To Do
* add auxiliar JSON creation
* replace direct API calls for mw library

@ -77,51 +77,60 @@ for member in memberpages:
if key in ['Extra', 'Description', 'Bio'] and workdict[key]:
workdict[key] = pandoc2html( (workdict[key].decode('utf-8')) )
# fill template with dictionary/mw_page values
workpage_html = page_template.format(title=(workdict['Title']),
creator=(workdict['Creator']),
date=workdict['Date'],
website=workdict['Website'],
thumbnail=workdict['Thumbnail'],
bio=(workdict['Bio']),
description=(workdict['Description']),
extra=( workdict['Extra'] )
)
# parse workpage_html # process html: img full url
tree = html5lib.parse(workpage_html, namespaceHTMLElements=False)
imgs = tree.findall('.//img')
for img in imgs:
if img.get('id') is not 'logo':
src = img.get('src')
newsrc = api_file_url(src) ## MOVE FULL URl OPERATION TO MW CONTENT
if newsrc:
img.set('src', newsrc)
website = tree.find('.//p[@class="hightlightSidebar"]/a')
if not website.get('href'): # if no website link.
#remove empty <a> .//p[@class="hightlightSidebar"]/a
website_parent = tree.find('.//p[@class="hightlightSidebar"]')
website_parent.remove(website)
# save workpage_html
workpage_html = ET.tostring(tree)
creator = workdict['Creator'].decode('ascii', 'ignore')
creator = creator.replace(' ','_')
work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator)
work_file = open(work_filename, "w")
work_file.write(workpage_html)
work_file.close()
# insert work to index
index_addwork( parent=index_container,
workid=key,
href=work_filename.replace('web/',''),
title=(workdict['Title']).decode('utf-8'),
creator=workdict['Creator'].decode('utf-8'),
date=workdict['Date'],
thumbnail=workdict['Thumbnail'],
)
print 'EXTRA', type (workdict['Extra']), workdict['Extra']
# fill template with dictionary/mw_page values
pprint.pprint(workdict)
if len(workdict['Creator'])>1 and len(workdict['Creator'])>1:
workpage_html = page_template.format(title=('Title'),#workdict['Title']),
creator=(workdict['Creator']),
date=workdict['Date'],
website=workdict['Website'],
thumbnail=workdict['Thumbnail'],
bio=(workdict['Bio']),
description=('desc'),#workdict['Description']),
extra=( 'extra')#workdict['Extra'] )
)
# parse workpage_html # process html: img full url
tree = html5lib.parse(workpage_html, namespaceHTMLElements=False)
imgs = tree.findall('.//img')
for img in imgs:
if img.get('id') is not 'logo':
src = img.get('src')
newsrc = api_file_url(src) ## MOVE FULL URl OPERATION TO MW CONTENT
if newsrc:
img.set('src', newsrc)
website = tree.find('.//p[@class="hightlightSidebar"]/a')
if not website.get('href'): # if no website link.
#remove empty <a> .//p[@class="hightlightSidebar"]/a
website_parent = tree.find('.//p[@class="hightlightSidebar"]')
website_parent.remove(website)
# save workpage_html
workpage_html = ET.tostring(tree)
creator = workdict['Creator'].decode('ascii', 'ignore')
creator = creator.replace(' ','_')
work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator)
work_file = open(work_filename, "w")
work_file.write(workpage_html)
work_file.close()
print 'DICT', type(workdict['Title'])
# insert work to index
index_addwork( parent=index_container,
workid=key,
href=work_filename.replace('web/',''),
title=(workdict['Title']),#.decode('utf-8'),
creator=workdict['Creator'].decode('utf-8'),
date=workdict['Date'],
thumbnail=workdict['Thumbnail'],
)
write_html_file(index_tree, 'web/index.html')
print

Loading…
Cancel
Save