mmdc_modules.py with modules for all the scripts

master
Castro0o 10 years ago
parent 4ff22943b8
commit 9cc73ca1ad

@ -4,8 +4,8 @@
############## ##############
# CREATE JSON DICTIONARY WITH AN ENTRY FOR EACH WORK # CREATE JSON DICTIONARY WITH AN ENTRY FOR EACH WORK
##### #####
import urllib2, json, pprint, re import urllib2, json, pprint, re
from mmdc_modules import api_request, api_page, api_thumb_url
sid = '1234' sid = '1234'
useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101" useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101"
@ -13,43 +13,10 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
allworks = {} allworks = {}
mainkeys = ['Thumbnail','Date','Creator'] mainkeys = ['Thumbnail','Date','Creator']
def api_request(action, pagename): #get page: content, metadata, images, imageifnp
url = endpoint + action.format(pagename)
request = urllib2.urlopen(url)
jsonp = json.loads(request.read() )
json_dic= (jsonp.get('query').get('pages'))
# pprint.pprint( json_dic )
page_id = json_dic.keys()[0]
page_content = json_dic.get(page_id)
print 'API Resquest URL:', url
return page_content
def api_page(pageid, query):
print 'API query:', query
if query == 'content':
api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid)
response = ((api_response.get('revisions'))[0])['*']
elif query == 'metadata':
response = api_request('action=query&pageids={}&prop=info', pageid)
elif query == 'articleimgs':
response = api_request('action=query&pageids={}&prop=images', pageid)
elif query == 'imageinfo':
pagename = pageid # in imageinfo titles are used instead of id
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail
return response
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300 # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content # http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content
def api_thumb_url(filename):
'''get thumbnail url of image'''
page_content_dict = api_page(filename, 'imageinfo')
if 'imageinfo' in page_content_dict.keys():
thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))
return thumburl
def parse_work_page(title, content): def parse_work_page(title, content):
content = content.encode('utf-8') content = content.encode('utf-8')
if re.match('\{\{\Graduation work', content): if re.match('\{\{\Graduation work', content):

@ -0,0 +1,68 @@
import urllib2, json, pprint, re
import xml.etree.ElementTree as ET
import subprocess, shlex
sid = '1234'
useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101"
endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&"
# API MODULES
def api_request(action, pagename): #get page: content, metadata, images, imageifnp
url = endpoint + action.format(pagename)
request = urllib2.urlopen(url)
jsonp = json.loads(request.read() )
json_dic= (jsonp.get('query').get('pages'))
# pprint.pprint( json_dic )
page_id = json_dic.keys()[0]
page_content = json_dic.get(page_id)
print 'API Resquest URL:', url
return page_content
def api_page(pageid, query):
print 'API query:', query
if query == 'content':
api_response = api_request('action=query&pageids={}&prop=revisions&rvprop=content', pageid)
response = ((api_response.get('revisions'))[0])['*']
elif query == 'metadata':
response = api_request('action=query&pageids={}&prop=info', pageid)
elif query == 'articleimgs':
response = api_request('action=query&pageids={}&prop=images', pageid)
elif query == 'imageinfo':
pagename = pageid # in imageinfo titles are used instead of id
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail
return response
def api_thumb_url(filename):
'''get thumbnail url of image'''
page_content_dict = api_page(filename, 'imageinfo')
if 'imageinfo' in page_content_dict.keys():
thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))
return thumburl
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=File:2x2 905.jpg&prop=imageinfo&iiprop=url&iiurlwidth=300
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&pageids=10603&prop=revisions&rvprop=content
# http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&action=query&titles=Graduation_Website_Braindump&prop=revisions&rvprop=content
# XML MODULES
def write_html_file(html_tree, filename):
doctype = "<!DOCTYPE HTML>"
html = doctype + ET.tostring(html_tree, encoding='utf-8', method='html')
edited = open(filename, 'w') #write
edited.write(html)
edited.close()
# Conversion Modules
def pandoc2html(mw_content):
if mw_content:
mw_content = mw_content.encode('utf-8')
# convert from mw to html
args_echo =shlex.split( ('echo "{}"'.format(mw_content)) )
args_pandoc = shlex.split( 'pandoc -f mediawiki -t html5' )
p1 = subprocess.Popen(args_echo, stdout=subprocess.PIPE)
p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE)
html = (p2.communicate())[0]
return html

@ -6,26 +6,12 @@
##### #####
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import html5lib, urllib2, json, pprint, re import html5lib, urllib2, json, pprint, re
import subprocess, shlex from mmdc_modules import api_thumb_url, pandoc2html
#import mmdc_create_json import api_thumb_url #import mmdc_create_json import api_thumb_url
json_allworks_file = open('allworks_mmdc.json', 'r') # save json json_allworks_file = open('allworks_mmdc.json', 'r') # save json
json_allworks = json.loads(json_allworks_file.read()) json_allworks = json.loads(json_allworks_file.read())
pages_path = 'web/work' pages_path = 'web/work'
def pandoc2html(mw_content):
if mw_content:
mw_content = mw_content.encode('utf-8')
# convert from mw to html
args_echo =shlex.split( ('echo "{}"'.format(mw_content)) )
args_pandoc = shlex.split( 'pandoc -f mediawiki -t html5' )
p1 = subprocess.Popen(args_echo, stdout=subprocess.PIPE)
p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE)
html = (p2.communicate())[0]
return html
#def generate_xml(): #def generate_xml():

@ -35,7 +35,7 @@
<!-- &lt;div class="isotope"&gt;&lt;/div&gt; --> <!-- &lt;div class="isotope"&gt;&lt;/div&gt; -->
<!-- WORKS DIV WILL BE ADDED HERE --> <!-- WORKS DIV WILL BE ADDED HERE -->
<div class="item" id="15986"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a><div class="work"><p class="work">User:Joak/graduation/catalog1</p><p class="work">Joseph Knierzinger</p><p class="work">2015</p></div></div><div class="item" id="15974"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/a/af/22xxxxx.jpg/500px-22xxxxx.jpg"></a><div class="work"><p class="work">Ahhhh</p><p class="work">JOK</p><p class="work">2015</p></div></div><div class="item" id="15982"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/b/bd/Collage_007_thumbnail.jpg"></a><div class="work"><p class="work">The Aesthetics of Ethics</p><p class="work">Ana Luísa Moura</p><p class="work">2015</p></div></div><div class="item" id="15965"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/8/85/Luther-blissett-300.jpg/500px-Luther-blissett-300.jpg"></a><div class="work"><p class="work">Qq</p><p class="work">Luther Blisset</p><p class="work">2015</p></div></div></div> <div class="item" id="15986"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a><div class="work"><p class="work">User:Joak/graduation/catalog1</p><p class="work">Joseph Knierzinger</p><p class="work">2015</p></div></div><div class="item" id="15999"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/1/16/Pointer.gif"></a><div class="work"><p class="work">User:Max Dovey/maxgradbio</p><p class="work">Max Dovey</p><p class="work">2015</p></div></div><div class="item" id="15974"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/a/af/22xxxxx.jpg/500px-22xxxxx.jpg"></a><div class="work"><p class="work">Ahhhh</p><p class="work">JOK</p><p class="work">2015</p></div></div><div class="item" id="15982"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/b/bd/Collage_007_thumbnail.jpg"></a><div class="work"><p class="work">The Aesthetics of Ethics</p><p class="work">Ana Luísa Moura</p><p class="work">2015</p></div></div><div class="item" id="15965"><a class="work" href="#"><img class="work" src="http://pzwiki.wdka.nl/mw-mediadesign/images/thumb/8/85/Luther-blissett-300.jpg/500px-Luther-blissett-300.jpg"></a><div class="work"><p class="work">Qq</p><p class="work">Luther Blisset</p><p class="work">2015</p></div></div></div>
<!-- &lt;script src="./js/jquery-2.1.3.min.js"&gt;&lt;/script&gt; --> <!-- &lt;script src="./js/jquery-2.1.3.min.js"&gt;&lt;/script&gt; -->
<!-- &lt;script src="./js/isotope.pkgd.min.js"&gt;&lt;/script&gt; --> <!-- &lt;script src="./js/isotope.pkgd.min.js"&gt;&lt;/script&gt; -->

Loading…
Cancel
Save