prototyping page creation: effective

master
Castro0o 10 years ago
parent 4fbfd6b88a
commit 3a79e7bfe8

@ -10,7 +10,11 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
# API MODULES # API MODULES
def api_request(action, pagename): #get page: content, metadata, images, imageifnp def api_request(action, pagename): #get page: content, metadata, images, imageifnp
url = endpoint + action.format(pagename) print 'API REQUEST'
print pagename
print 'TEST', action.format(pagename)
url = endpoint + (action.format(pagename))
print 'API REQUEST', url
request = urllib2.urlopen(url) request = urllib2.urlopen(url)
jsonp = json.loads(request.read() ) jsonp = json.loads(request.read() )
json_dic= (jsonp.get('query').get('pages')) json_dic= (jsonp.get('query').get('pages'))
@ -29,10 +33,10 @@ def api_page(pageid, query):
response = api_request('action=query&pageids={}&prop=images', pageid) response = api_request('action=query&pageids={}&prop=images', pageid)
elif query == 'file': elif query == 'file':
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid) response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',pageid)
pprint.pprint( response )
elif query == 'imageinfo': elif query == 'imageinfo':
pagename = pageid # in imageinfo titles are used instead of id pagename = pageid # in imageinfo titles are used instead of id
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail print 'IMAGEINFO', pagename
response = api_request("action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500", pagename) # iiurlwidht dermines with of thumbnail
return response return response
def api_file_url(filename): # get full urls def api_file_url(filename): # get full urls
@ -44,13 +48,12 @@ def api_file_url(filename): # get full urls
return None return None
def api_thumb_url(filename): def api_thumb_url(filename):
'''get thumbnail url of image''' print '''get thumbnail url of image'''
page_content_dict = api_page(filename, 'imageinfo') thumburl = api_page(filename, 'imageinfo')
if 'imageinfo' in page_content_dict.keys(): thumburl = ((thumburl.get('imageinfo'))[0].get('thumburl'))
thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl')) print thumburl
return thumburl return thumburl
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300 # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content
@ -94,21 +97,28 @@ def parse_work_page(title, content):
return work_dict, extra return work_dict, extra
# Alternative to parse_work_page - W/out dictionary
# Alternative to parse_work_page
def parse_work(title, content): def parse_work(title, content):
workdict = {'Title':title, 'Creator':'', 'Date':'', 'Website':'', 'Thumbnail':'', 'Bio':'', 'Description':'', 'Extra':''}
if re.match('\{\{\Graduation work', content): if re.match('\{\{\Graduation work', content):
template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0] template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
workdict['Extra'] = extra#.encode('utf-8')
# template's key/value pair # template's key/value pair
keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL) keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL)
for pair in keyval: for pair in keyval:
key = pair[0] key = pair[0]
val = pair[1] val = (pair[1]).replace('\n', '')
if 'Description' in key: if 'Creator' in key:
description = val val = val.replace(', ', '')
elif 'Thumbnail' in key:
print keyval print 'calling API'
val = api_thumb_url(val)
return extra, description print 'THUMB', val
workdict[key]=val
pprint.pprint(workdict)
return workdict
@ -171,3 +181,7 @@ def img_fullurl(parent):
# fileurl = api_request(src, endpoint)# find url of file # fileurl = api_request(src, endpoint)# find url of file

@ -4,28 +4,59 @@
########### ###########
# Testing downloading and converting mw page content to html # Testing downloading and converting mw page content to html
########### ###########
# OVER VIEW:
# * creating one single html page
# request all the pages
# build index
# build all pages
import pprint
from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work_page, parse_work from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work_page, parse_work
template = open("web/page-template.html", "r") template = open("web/page-template.html", "r")
template = template.read() template = template.read()
template = template.format(title="This is My Title", creator='', date='', website='', thumbnail='', bio='',description='', extra='' )
# download # download
pageid='15965' pageid='16025'#'15965'#Qq #'15986'Jozeph
article = api_page(pageid, 'content') work = 'Mina'#'User:Joak/graduation/catalog1'
#print article
#print '----------- article -----' workpage_mw = api_page(pageid, 'content')
# parsing workpage_mw
workdict = parse_work(work, workpage_mw)
for key in workdict.keys():
if key in ['Extra', 'Description', 'Bio']:
workdict[key] = pandoc2html(workdict[key].encode('utf-8'))
# print key
# print workdict[key]
# print '--------------------'
template = template.format(title=workdict['Title'], creator=workdict['Creator'], date=workdict['Date'], website=workdict['Website'], thumbnail=workdict['Thumbnail'], bio=workdict['Bio'],description=workdict['Description'], extra=workdict['Extra'] )
work_filename = 'web/{}-{}-{}.html'.format(workdict['Date'], (workdict['Creator'].encode('ascii', 'ignore')).replace(' ','_'), pageid)
work_file = open(work_filename, "w")
work_file.write(template)
work_file.close()
#template = template.read()
#print(template)
#for section in [extra, description, bio]:
# section = pandoc2html(section.encode('utf-8'))
# print section
# print '------------'
#print template
#for key in workdict.keys():
# print key, workdict[key].encode('utf-8')
# parsing article
extra, description = parse_work('Qq', article)
# placing mw content inside dict makes it non convertable. Why?
#print extra #work_dict['Extra']
html_extra = pandoc2html(extra.encode('utf-8'))
html_description = pandoc2html(description.encode('utf-8'))
print '----------- html -----' #print '----------- html -----'
print html_description #print html_description
#print html_extra #print html_extra
print template #print template

Loading…
Cancel
Save