From 9cc73ca1ad579f3af41119e68c67ba2dd0a15b29 Mon Sep 17 00:00:00 2001 From: Castro0o Date: Sat, 11 Apr 2015 13:27:30 +0200 Subject: [PATCH] mmdc_modules.py with modules for all the scripts --- mmdc_create_json.py | 35 +--------------------- mmdc_modules.py | 68 +++++++++++++++++++++++++++++++++++++++++++ mmdc_pages.py | 16 +--------- web/index_edited.html | 2 +- 4 files changed, 71 insertions(+), 50 deletions(-) create mode 100644 mmdc_modules.py diff --git a/mmdc_create_json.py b/mmdc_create_json.py index 396d433..afdb268 100755 --- a/mmdc_create_json.py +++ b/mmdc_create_json.py @@ -4,8 +4,8 @@ ############## # CREATE JSON DICTIONARY WITH AN ENTRY FOR EACH WORK ##### - import urllib2, json, pprint, re +from mmdc_modules import api_request, api_page, api_thumb_url sid = '1234' useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101" @@ -13,42 +13,9 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" allworks = {} mainkeys = ['Thumbnail','Date','Creator'] -def api_request(action, pagename): #get page: content, metadata, images, imageifnp - url = endpoint + action.format(pagename) - request = urllib2.urlopen(url) - jsonp = json.loads(request.read() ) - json_dic= (jsonp.get('query').get('pages')) -# pprint.pprint( json_dic ) - page_id = json_dic.keys()[0] - page_content = json_dic.get(page_id) - print 'API Resquest URL:', url - return page_content - -def api_page(pageid, query): - print 'API query:', query - if query == 'content': - api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid) - response = ((api_response.get('revisions'))[0])['*'] - elif query == 'metadata': - response = api_request('action=query&pageids={}&prop=info', pageid) - elif query == 'articleimgs': - response = api_request('action=query&pageids={}&prop=images', pageid) - elif query == 'imageinfo': - pagename = pageid # in imageinfo titles are used instead of id - response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail - return response - # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300 # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content - -def api_thumb_url(filename): - '''get thumbnail url of image''' - page_content_dict = api_page(filename, 'imageinfo') - if 'imageinfo' in page_content_dict.keys(): - thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl')) - return thumburl - def parse_work_page(title, content): content = content.encode('utf-8') diff --git a/mmdc_modules.py b/mmdc_modules.py new file mode 100644 index 0000000..5d29a78 --- /dev/null +++ b/mmdc_modules.py @@ -0,0 +1,68 @@ + +import urllib2, json, pprint, re +import xml.etree.ElementTree as ET +import subprocess, shlex +sid = '1234' +useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101" +endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" + +# API MODULES +def api_request(action, pagename): #get page: content, metadata, images, imageifnp + url = endpoint + action.format(pagename) + request = urllib2.urlopen(url) + jsonp = json.loads(request.read() ) + json_dic= (jsonp.get('query').get('pages')) +# pprint.pprint( json_dic ) + page_id = json_dic.keys()[0] + page_content = json_dic.get(page_id) + print 'API Resquest URL:', url + return page_content + +def api_page(pageid, query): + print 'API query:', query + if query == 'content': + api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid) + response = ((api_response.get('revisions'))[0])['*'] + elif query == 'metadata': + response = api_request('action=query&pageids={}&prop=info', pageid) + elif query == 'articleimgs': + response = api_request('action=query&pageids={}&prop=images', pageid) + elif query == 'imageinfo': + pagename = pageid # in imageinfo titles are used instead of id + response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail + return response + + +def api_thumb_url(filename): + '''get thumbnail url of image''' + page_content_dict = api_page(filename, 'imageinfo') + if 'imageinfo' in page_content_dict.keys(): + thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl')) + return thumburl + + +# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300 +# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content +# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content + +# XML MODULES + +def write_html_file(html_tree, filename): + doctype = "" + html = doctype + ET.tostring(html_tree, encoding='utf-8', method='html') + edited = open(filename, 'w') #write + edited.write(html) + edited.close() + + +# Conversion Modules +def pandoc2html(mw_content): + if mw_content: + mw_content = mw_content.encode('utf-8') + # convert from mw to html + args_echo =shlex.split( ('echo "{}"'.format(mw_content)) ) + args_pandoc = shlex.split( 'pandoc -f mediawiki -t html5' ) + p1 = subprocess.Popen(args_echo, stdout=subprocess.PIPE) + p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE) + html = (p2.communicate())[0] + return html diff --git a/mmdc_pages.py b/mmdc_pages.py index bf513c3..a4ab85d 100755 --- a/mmdc_pages.py +++ b/mmdc_pages.py @@ -6,26 +6,12 @@ ##### import xml.etree.ElementTree as ET import html5lib, urllib2, json, pprint, re -import subprocess, shlex +from mmdc_modules import api_thumb_url, pandoc2html #import mmdc_create_json import api_thumb_url json_allworks_file = open('allworks_mmdc.json', 'r') # save json json_allworks = json.loads(json_allworks_file.read()) pages_path = 'web/work' - - - -def pandoc2html(mw_content): - if mw_content: - mw_content = mw_content.encode('utf-8') - # convert from mw to html - args_echo =shlex.split( ('echo "{}"'.format(mw_content)) ) - args_pandoc = shlex.split( 'pandoc -f mediawiki -t html5' ) - p1 = subprocess.Popen(args_echo, stdout=subprocess.PIPE) - p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE) - html = (p2.communicate())[0] - return html - #def generate_xml(): diff --git a/web/index_edited.html b/web/index_edited.html index 5b56eaa..4a24460 100644 --- a/web/index_edited.html +++ b/web/index_edited.html @@ -35,7 +35,7 @@ -

User:Joak/graduation/catalog1

Joseph Knierzinger

2015

Ahhhh

JOK

2015

The Aesthetics of Ethics

Ana Luísa Moura

2015

Qq

Luther Blisset

2015

+

User:Joak/graduation/catalog1

Joseph Knierzinger

2015

User:Max Dovey/maxgradbio

Max Dovey

2015

Ahhhh

JOK

2015

The Aesthetics of Ethics

Ana Luísa Moura

2015

Qq

Luther Blisset

2015