From 72a840f0686bde9e118c6226fb6b31a5bb69963c Mon Sep 17 00:00:00 2001 From: Castro0o Date: Sat, 9 May 2015 10:59:48 +0200 Subject: [PATCH] images: using mw_client --- mmdc_modules.py | 54 +++++++++++++---------------- mmdc_wiki2web.py | 56 +++++++++++++------------------ web/2015-Artyom.html | 10 +++--- web/2015-Henk-Jelle_de_Groot.html | 8 ++--- web/2015-Joseph_Knierzinger.html | 56 ++++++++++++++++--------------- web/2015-Lucia_Dossin.html | 8 ++--- web/2015-Luther_Blisset.html | 10 +++--- web/2015-Max_Dovey.html | 8 ++--- web/index.html | 2 +- web/page-template.html | 8 ++--- 10 files changed, 103 insertions(+), 117 deletions(-) diff --git a/mmdc_modules.py b/mmdc_modules.py index 4e61b44..d2b6766 100644 --- a/mmdc_modules.py +++ b/mmdc_modules.py @@ -4,17 +4,19 @@ import urllib2, json, pprint, re import xml.etree.ElementTree as ET import subprocess, shlex, urllib +from urlparse import urlparse from mwclient import Site sid = '1234' useragent = "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101" endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" +site = Site("pzwiki.wdka.nl", path="/mw-mediadesign/") # API MODULES def api_request(action, pagename): #get page: content, metadata, images, imageifnp pagename = urllib.quote(pagename.encode('utf-8')) - print pagename +# print pagename url = endpoint + (action.format(pagename)) # print 'API REQUEST', url @@ -32,21 +34,12 @@ def api_page(title, query): response = ((api_response.get('revisions'))[0])['*'] elif query == 'metadata': response = api_request('action=query&titles={}&prop=info', title) - elif query == 'articleimgs': - response = api_request('action=query&titles={}&prop=images', title) - elif query == 'file': - response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url',title) - elif query == 'imageinfo': - pagename = title # in imageinfo titles are used instead of id - print 'IMAGEINFO', pagename - response = api_request("action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500", pagename) # iiurlwidht dermines with of thumbnail return response ############################## -# CATEGORIES AND PAGES +# CATEGORIES, PAGES AND IMAGES ############################## -def mw_cats(args): - site = Site(args.host, path=args.path) +def mw_cats(site, args): last_names = None for cats in args.category: for ci, cname in enumerate(cats): @@ -62,32 +55,32 @@ def mw_cats(args): results = list(results) return [p.name for p in results] +def mw_singelimg_url(site, img): #find full of an img + if 'File:' not in img: + img = 'File:'+img + img_page=site.Pages[img] + img_url = (img_page.imageinfo)['url'] + return img_url + +def mw_imgsurl(site, page): #all the imgs in a page #return: list of tuples (img.name, img.fullurl) + page = site.Pages[page] + imgs = page.images() + imgs = list(imgs) + urls = [((img.name),(img.imageinfo)['url']) for img in imgs] + return urls + + -def api_file_url(filename): # get full urls - page_content_dict = api_page(filename, 'file') - if 'imageinfo' in page_content_dict.keys(): - imgurl = ((page_content_dict.get('imageinfo'))[0].get('url')) - return imgurl - else: - return None - -def api_thumb_url(filename): - thumburl = api_page(filename, 'imageinfo') - thumburl = ((thumburl.get('imageinfo'))[0].get('thumburl')) - return thumburl # PROCESSING MODULES def write_html_file(html_tree, filename): - print 'WRITE HTML FILE' doctype = "" - print 'TREE', type(html_tree) html = doctype + ET.tostring(html_tree, method='html', encoding='utf-8', ) edited = open(filename, 'w') #write edited.write(html) edited.close() -# Alternative to parse_work_page def parse_work(title, content): workdict = {'Title':title, 'Creator':'', 'Date':'', 'Website':'', 'Thumbnail':'', 'Bio':'', 'Description':'', 'Extra':''} if re.match('\{\{\Graduation work', content): @@ -103,9 +96,9 @@ def parse_work(title, content): if 'Creator' in key: val = val.replace(', ', '') elif 'Thumbnail' in key: - val = api_thumb_url(val) + val = mw_singelimg_url(site, val)#api_thumb_url(val) elif 'Website' in key: - val = urllib.unquote(val) + val = urllib.unquote( val) workdict[key]=val.encode('utf-8') # pprint.pprint(workdict) return workdict @@ -120,8 +113,7 @@ def pandoc2html(mw_content): p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE) html = (p2.communicate())[0] return html - - + gallery_exp=re.compile('(.*?)', re.S) imgfile_exp=re.compile('(File:(.*?)\.(gif|jpg|jpeg|png))') diff --git a/mmdc_wiki2web.py b/mmdc_wiki2web.py index 59e2af5..5b07135 100755 --- a/mmdc_wiki2web.py +++ b/mmdc_wiki2web.py @@ -1,24 +1,11 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -########### -# prototyping downloading and converting mw page content to html -########### - -# OVERVIEW: -# * creating one single html page -# * replace {{youtube/vimeo}} with video tags -# * replace galleries with rows of images -# request all the pages - -# **BUILD INDEX** - -# build all pages - import xml.etree.ElementTree as ET import html5lib, pprint -from mmdc_modules import api_page, pandoc2html, parse_work, api_file_url, replace_gallery, replace_video, index_addwork, write_html_file, mw_cats +from mmdc_modules import api_page, pandoc2html, parse_work, replace_gallery, replace_video, index_addwork, write_html_file, mw_cats, mw_imgsurl, site from argparse import ArgumentParser +from mwclient import Site p = ArgumentParser() p.add_argument("--host", default="pzwiki.wdka.nl") @@ -26,6 +13,7 @@ p.add_argument("--path", default="/mw-mediadesign/", help="nb: should end with / p.add_argument("--category", "-c", nargs="*", default=[], action="append", help="category to query, use -c foo -c bar to intersect multiple categories") args = p.parse_args() print args + ######## # QUERY API ######## @@ -36,11 +24,10 @@ endpoint = "http://pzwiki.wdka.nl/mw-mediadesign/api.php?format=json&" ######## # CREATE INDEX ######## -memberpages=mw_cats(args) +memberpages=mw_cats(site, args) #memberpages['Ctrl-F Reader','As We Speak'] print 'memberpages', memberpages - ######## # Templates ######## @@ -54,6 +41,7 @@ index_container = index_tree.find(".//div[@class='isotope']") #maybe id is impor ######## for member in memberpages: print ' member', member + # download mw work page # pageid=member['pageid'] # pagetitle=(member['title'].encode('utf-8')) @@ -61,25 +49,20 @@ for member in memberpages: workpage_mw = replace_gallery(workpage_mw) workpage_mw = replace_video(workpage_mw) workdict = parse_work(member, workpage_mw) # create dictionary workpage_mw template - - # Only parse pages with Creator, Title, Thumbnail + workpage_imgs = mw_imgsurl(site, member) + print 'WORKPAGE_IMGS', workpage_imgs + # only parse pages with Creator, Title, Thumbnail if len(workdict['Creator'])>1 and len(workdict['Title'])>1 and len(workdict['Description'])>1 and len(workdict['Thumbnail'])>1: for key in workdict.keys(): # convert Extra, Description, Bio to HTML if key in ['Extra', 'Description', 'Bio'] and workdict[key]: workdict[key] = pandoc2html( (workdict[key].decode('utf-8'))) elif key in ['Creator']: workdict[key] = workdict[key].replace(',','' ) #remove comma - #replace empty dict values with ' ' # to avoid empty tags for key in workdict.keys(): - if workdict[key] is '' and key is not 'Thumbnail': + if workdict[key] is '':# and key is not 'Thumbnail': workdict[key] = ' ' - elif key is 'Thumbnail' and workdict[key]: - img = ''.format(workdict[key]) - # append img to text - workdict[key] = workdict[key] # + img - #print 'THUMB', workdict[key] if type(workdict[key]) is unicode: workdict[key]=workdict[key].encode('utf-8') @@ -108,14 +91,23 @@ for member in memberpages: page_website.text=workdict['Website'] page_thumb = page_tree.find('.//img[@id="thumbnail"]') page_thumb.set('src', workdict['Thumbnail']) + # give work page's imgs full url imgs = page_tree.findall('.//img') + print 'ALL PAGE IMGS', imgs for img in imgs: - if img.get('id') is not 'logo': - src = img.get('src') - newsrc = api_file_url(src) ## MOVE FULL URl OPERATION TO MW CONTENT - if newsrc: - img.set('src', newsrc) + img_class = img.get('class') + if img_class != 'template': + print 'img_class',img_class + src =unicode(img.get('src')) + print src + for pair in workpage_imgs: + if src.replace("_", " ") in pair[0]: #if img in html match img in workpage_imgs + print 'FOUND IMG', pair + img.set('src', pair[1]) + # newsrc = api_file_url(src) MOVE FULL URl OPERATION TO MW CONTENT + # if newsrc: + # img.set('src', newsrc) # save work page creator = workdict['Creator']#.decode('ascii', 'ignore') creator = creator.replace(' ','_') @@ -137,7 +129,7 @@ for member in memberpages: # print '----', workdict['Title'] # print ET.tostring(tree) -print index_tree, type(index_tree) +#print index_tree, type(index_tree) write_html_file(index_tree, 'web/index.html') print print diff --git a/web/2015-Artyom.html b/web/2015-Artyom.html index 0221d2b..7c49de1 100644 --- a/web/2015-Artyom.html +++ b/web/2015-Artyom.html @@ -15,7 +15,7 @@
-

Tempted by Tomorrow

+

Tempted by Tomorrow

@@ -42,24 +42,24 @@
-
+
- +

Artyom-graduation-work

- +
- +

Photography, shortly after its invention, took on itself the responsibility of documenting history and giving us images of our world that was previously granted to painting. Photography was the first image of the kind, one that is conceived through technologies – camera, that were products of modern science. Photography democratized the image making proses allowing anyone to produce an image with a click of a button. As well us taking images out from the realm of aesthetics and art which was peculiar to painting, and making them relevant to almost every aspect of culture. As technology developed giving us television and the general media establishment, the bigger part of our world became to be seen through those images than by our own eyes. In today’s online world images function as distinct communication medium and seem do be a better fit to the quick and accumulated online culture where they serve as windows through which we can access the world far beyond our reach.

Photography as an image is known for its ability to give an objective representation of the world – it shows the world ‘as it is’. This notion although has been disproved by number of studies, is till excepted by the wider culture. Because of it scientific nature, namely the fact that the subject prescribes itself on the film without the involvement of human hand – without human subjectivity, photography came to gain its objective status. This notion went as far as making photography an image that can stand as a ‘proof’ for something. Similarly Google Images established itself as an agency that gives a precise and accurate representation of a subject. Its algorithms that developed in such way to display the most relevant and the most popular images of the subject. So its objective notion seems to rely again on its scientific factor, namely on the algorithmic analysis that automatically choose the content and arranging them by their on the page by their relevance.

diff --git a/web/2015-Henk-Jelle_de_Groot.html b/web/2015-Henk-Jelle_de_Groot.html index fbc3250..fae1729 100644 --- a/web/2015-Henk-Jelle_de_Groot.html +++ b/web/2015-Henk-Jelle_de_Groot.html @@ -15,7 +15,7 @@
-

Tempted by Tomorrow

+

Tempted by Tomorrow

@@ -42,17 +42,17 @@
-
+
- +

U ntitled

- +
diff --git a/web/2015-Joseph_Knierzinger.html b/web/2015-Joseph_Knierzinger.html index 977cb23..83dc999 100644 --- a/web/2015-Joseph_Knierzinger.html +++ b/web/2015-Joseph_Knierzinger.html @@ -1,12 +1,12 @@ - - - - - Title - - - - + + + + + User:Joak/graduation/catalog1 + + + + @@ -15,24 +15,24 @@
-

Tempted by Tomorrow

+

Tempted by Tomorrow

- +

Joseph Knierzinger

-

Title 2015

+

User:Joak/graduation/catalog1 2015

- + -
desc
+
-
-
+
-

Title

+ + + +

User:Joak/graduation/catalog1

+
- - extra -
+ +

my free text

+
- + + + diff --git a/web/2015-Lucia_Dossin.html b/web/2015-Lucia_Dossin.html index 2aa988d..d1c7c15 100644 --- a/web/2015-Lucia_Dossin.html +++ b/web/2015-Lucia_Dossin.html @@ -15,7 +15,7 @@
-

Tempted by Tomorrow

+

Tempted by Tomorrow

@@ -42,17 +42,17 @@
-
+
- +

Mina

- +
diff --git a/web/2015-Luther_Blisset.html b/web/2015-Luther_Blisset.html index ecd506f..3f5abe3 100644 --- a/web/2015-Luther_Blisset.html +++ b/web/2015-Luther_Blisset.html @@ -15,7 +15,7 @@
-

Tempted by Tomorrow

+

Tempted by Tomorrow

@@ -42,24 +42,24 @@
-
+
- +

Qq

- +
- +

While the folk heroes of the early-modern period and the nineteenth century served a variety of social and political purposes, the Luther Blissett Project (LBP) were able to utilize the media and communication strategies unavailable to their predecessors. According to Marco Deseriis, the main purpose of the LBP was to create a folk hero of the information society whereby knowledge workers and immaterial workers could organize and recognize themselves.[5] Thus, rather than being understood only as a media prankster and culture jammer, Luther Blissett became a positive mythic figure that was supposed to embody the very process of community and cross-media storytelling. Roberto Bui—one of the co-founders of the LBP and Wu Ming—explains the function of Luther Blissett and other radical folk heroes as mythmaking or mythopoesis

diff --git a/web/2015-Max_Dovey.html b/web/2015-Max_Dovey.html index e4cb526..9e15dca 100644 --- a/web/2015-Max_Dovey.html +++ b/web/2015-Max_Dovey.html @@ -15,7 +15,7 @@
-

Tempted by Tomorrow

+

Tempted by Tomorrow

@@ -42,17 +42,17 @@
-
+
- +

User:Max Dovey/maxgradbio

- +
diff --git a/web/index.html b/web/index.html index 7751cba..747a64b 100644 --- a/web/index.html +++ b/web/index.html @@ -102,7 +102,7 @@ <div class="item video flash"><img src="img/project_roel2.png"></div> <div class="item narrative"><img src="img/project_andre.jpeg"></div--> -
+
diff --git a/web/page-template.html b/web/page-template.html index d2fb61d..181ecf7 100644 --- a/web/page-template.html +++ b/web/page-template.html @@ -17,7 +17,7 @@
-

Tempted by Tomorrow

+

Tempted by Tomorrow

@@ -42,17 +42,17 @@
-
+
- +

- +