working..

master
Castro0o 10 years ago
parent 99e7df5270
commit c8aa3bef42

@ -7,6 +7,12 @@ Or index all the gaduation works:
`python mmdc_wiki2web.py --category Graduation_work` `python mmdc_wiki2web.py --category Graduation_work`
## template files
web/page-template.html
web/index-template.html
## To Do ## To Do
* add auxiliar JSON creation * add auxiliar JSON creation
* replace direct API calls for mw library * replace direct API calls for mw library

@ -77,51 +77,60 @@ for member in memberpages:
if key in ['Extra', 'Description', 'Bio'] and workdict[key]: if key in ['Extra', 'Description', 'Bio'] and workdict[key]:
workdict[key] = pandoc2html( (workdict[key].decode('utf-8')) ) workdict[key] = pandoc2html( (workdict[key].decode('utf-8')) )
# fill template with dictionary/mw_page values
workpage_html = page_template.format(title=(workdict['Title']),
creator=(workdict['Creator']), print 'EXTRA', type (workdict['Extra']), workdict['Extra']
date=workdict['Date'], # fill template with dictionary/mw_page values
website=workdict['Website'], pprint.pprint(workdict)
thumbnail=workdict['Thumbnail'],
bio=(workdict['Bio']), if len(workdict['Creator'])>1 and len(workdict['Creator'])>1:
description=(workdict['Description']),
extra=( workdict['Extra'] ) workpage_html = page_template.format(title=('Title'),#workdict['Title']),
) creator=(workdict['Creator']),
date=workdict['Date'],
# parse workpage_html # process html: img full url website=workdict['Website'],
tree = html5lib.parse(workpage_html, namespaceHTMLElements=False) thumbnail=workdict['Thumbnail'],
imgs = tree.findall('.//img') bio=(workdict['Bio']),
for img in imgs: description=('desc'),#workdict['Description']),
if img.get('id') is not 'logo': extra=( 'extra')#workdict['Extra'] )
src = img.get('src') )
newsrc = api_file_url(src) ## MOVE FULL URl OPERATION TO MW CONTENT
if newsrc: # parse workpage_html # process html: img full url
img.set('src', newsrc) tree = html5lib.parse(workpage_html, namespaceHTMLElements=False)
imgs = tree.findall('.//img')
website = tree.find('.//p[@class="hightlightSidebar"]/a') for img in imgs:
if not website.get('href'): # if no website link. if img.get('id') is not 'logo':
#remove empty <a> .//p[@class="hightlightSidebar"]/a src = img.get('src')
website_parent = tree.find('.//p[@class="hightlightSidebar"]') newsrc = api_file_url(src) ## MOVE FULL URl OPERATION TO MW CONTENT
website_parent.remove(website) if newsrc:
img.set('src', newsrc)
# save workpage_html
workpage_html = ET.tostring(tree) website = tree.find('.//p[@class="hightlightSidebar"]/a')
creator = workdict['Creator'].decode('ascii', 'ignore') if not website.get('href'): # if no website link.
creator = creator.replace(' ','_') #remove empty <a> .//p[@class="hightlightSidebar"]/a
work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator) website_parent = tree.find('.//p[@class="hightlightSidebar"]')
work_file = open(work_filename, "w") website_parent.remove(website)
work_file.write(workpage_html)
work_file.close() # save workpage_html
workpage_html = ET.tostring(tree)
# insert work to index creator = workdict['Creator'].decode('ascii', 'ignore')
index_addwork( parent=index_container, creator = creator.replace(' ','_')
workid=key, work_filename = 'web/{}-{}.html'.format(workdict['Date'], creator)
href=work_filename.replace('web/',''), work_file = open(work_filename, "w")
title=(workdict['Title']).decode('utf-8'), work_file.write(workpage_html)
creator=workdict['Creator'].decode('utf-8'), work_file.close()
date=workdict['Date'],
thumbnail=workdict['Thumbnail'],
) print 'DICT', type(workdict['Title'])
# insert work to index
index_addwork( parent=index_container,
workid=key,
href=work_filename.replace('web/',''),
title=(workdict['Title']),#.decode('utf-8'),
creator=workdict['Creator'].decode('utf-8'),
date=workdict['Date'],
thumbnail=workdict['Thumbnail'],
)
write_html_file(index_tree, 'web/index.html') write_html_file(index_tree, 'web/index.html')
print print

Loading…
Cancel
Save