approach only with pandoc: failled

master
Castro0o 10 years ago
parent 5405d9861f
commit 63e8313370

File diff suppressed because one or more lines are too long

@ -77,6 +77,36 @@ def pandoc2html(mw_content):
html = (p2.communicate())[0] html = (p2.communicate())[0]
return html return html
def pandoc(filename, title, creator, date, website, thumbnail, bio, description, extra, template) :
'''pandoc: convert mediawiki syntax to html'''
# mw_content = mw_content.encode('utf-8')
args_echo =shlex.split( ('echo "{}"'.format( extra )) )
args_pandoc = shlex.split( 'pandoc -s -f mediawiki -t html \
--template {template} --variable title="{title}" --variable creator="{creator}" --variable date="{date}" --variable website="{website}" --variable website="{website}" --variable thumbnail="{thumbnail}" --variable bio="""{bio}""" -o {filename}'.format(template=template, title=title, creator=creator, date=date, website=website, thumbnail=thumbnail, bio=bio, description=description, extra=extra, filename=filename) )
print args_pandoc
p1 = subprocess.Popen(args_echo, stdout=subprocess.PIPE)
p2 = subprocess.Popen(args_pandoc, stdin=p1.stdout, stdout=subprocess.PIPE)
html = (p2.communicate())[0]
# return html
# pandoc either reades input from stdin or through input file
# pandoc DOES NOT convert variables
# pandoc = 'pandoc -s -f mediawiki -t html5 \
# --template template_article.html \
# --variable title="{title}" \
# --variable section="{section}" \
# --variable topics="{topics}" \
# --variable issueName="{iname}" \
# --variable issueNumber="{inum}" \
# "articles/tmp_content.mw" -o "{articlepath}/{htmlfile}.html"'.format(articlepath=path, title=(pagename).replace("_"," "), section=in_section, topics=in_topic, iname=in_issuename, inum=in_issue, htmlfile=pagename)
# subprocess.call(pandoc, shell=True) # saved in tmp_content.html html
# html = open('tmp_content.html', 'r') #write mediawiki content to html in tmp_content.html
# html = html.read()
# return html
def img_fullurl(parent): def img_fullurl(parent):
imgs = parent.findall('.//img') imgs = parent.findall('.//img')
print 'len IMG', len(imgs) print 'len IMG', len(imgs)

@ -6,7 +6,7 @@
##### #####
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import html5lib, urllib2, json, pprint, re import html5lib, urllib2, json, pprint, re
from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url, write_html_file from mmdc_modules import api_thumb_url, pandoc2html, img_fullurl, api_file_url, write_html_file, pandoc
#import mmdc_create_json import api_thumb_url #import mmdc_create_json import api_thumb_url
json_allworks_file = open('allworks_mmdc.json', 'r') # save json json_allworks_file = open('allworks_mmdc.json', 'r') # save json
json_allworks = json.loads(json_allworks_file.read()) json_allworks = json.loads(json_allworks_file.read())
@ -100,19 +100,11 @@ def create_workpage( work, work_key, tree): # replace text content in dict with
# HOW TO INSERT EXISTING HTML CHUNCK INTO TEMPLATE ??? # HOW TO INSERT EXISTING HTML CHUNCK INTO TEMPLATE ???
# test_sub = ET.SubElement(test_el, 'span') # test_sub = ET.SubElement(test_el, 'span')
# test_sub.text = content # test_sub.text = content
# sub = ET.SubElement(parent, test_el) # sub = ET.SubElement(parent, test_el)
# mw_content = work[key]
# mw_content = work[key]
# # if re.search(gallery_exp, mw_content): # # if re.search(gallery_exp, mw_content):
# # # replace_gallery must replace the gallery inline # # # replace_gallery must replace the gallery inline
# # mw_content, gallery_imgs = replace_gallery(mw_content) # # mw_content, gallery_imgs = replace_gallery(mw_content)
@ -141,20 +133,12 @@ def create_workpage( work, work_key, tree): # replace text content in dict with
# for el in all_el: # for el in all_el:
# print 'el', ET.tostring(el) # print 'el', ET.tostring(el)
# div_body.append(el) # div_body.append(el)
# imgs = document_el.findall('.//img') # imgs = document_el.findall('.//img')
# # if imgs: # # if imgs:
# # for img in imgs: # # for img in imgs:
# # src = api_file_url(img.get('src')) # # src = api_file_url(img.get('src'))
# # img.set('src', src) # # img.set('src', src)
# # print 'IMG', img, src # # print 'IMG', img, src
# print "****************************" # print "****************************"
# print ET.tostring(div_body) # print ET.tostring(div_body)
# print "****************************" # print "****************************"
@ -184,17 +168,42 @@ def edit_index(filepath, json_allworks_dict):
return tree return tree
worktemplate = open('web/work-template.html', 'r') #worktemplate = open('web/work-template.html', 'r')
for key in json_allworks.keys(): for key in json_allworks.keys():
work=json_allworks[key] work=json_allworks[key]
title = work['Title'] creator = work['Creator'].encode('utf-8') if 'Creator' in work else ''
date = work['Date'] date = work['Date'] if 'Date' in work else ''
creator = (work['Creator'].encode('ascii', 'ignore')).replace(' ','_') website=work['Website'] if 'Website' in work else ''
work_file = 'web/{}-{}-{}.html'.format(date, key, creator) thumbnail=work['Thumbnail_url'] if 'Thumbnail_url' in work else ''
bio=(work['Bio'].encode('utf-8')).replace('"','\\"') if 'Bio' in work else ''
description=(work['Description']).replace('"','\\"').encode('utf-8') if 'Description' in work else ''
extra=(work['Extra'].encode('utf-8')).replace('"','\\"') if 'Extra' in work else ''
work_file = 'web/{}-{}-{}.html'.format(work['Date'], (work['Creator'].encode('ascii', 'ignore')).replace(' ','_'), key)
title = work['Title']
print work_file, website
# if key in work.keys() else ''
pandoc( filename=work_file, \
template='web/work-template.html', \
title=title, \
creator=creator, \
date=date, \
website=website, \
thumbnail=thumbnail, \
bio=bio, \
description=description, \
extra=extra ) # convert to HTML
# print '= = = = = = ='
# print html_content
# print '= = = = = = ='
# creator = (work['Creator'].encode('ascii', 'ignore')).replace(' ','_')
# description = work['Description']
# print work_file # print work_file
work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False) # work_tree = html5lib.parse(worktemplate, namespaceHTMLElements=False)
create_workpage(work, key, work_tree ) # create_workpage(work, key, work_tree )
write_html_file(work_tree, work_file) # write_html_file(work_tree, work_file)
### ISSSUES ### ISSSUES

@ -1,16 +0,0 @@
<!DOCTYPE HTML><html><head>
<meta charset="UTF-8">
<title></title> <!-- Work title will go to here -->
<link href="css/style.css" rel="stylesheet">
</head>
<body>
<div id="work">
<div class="header"><a href="https://en.wikipedia.org/wiki/Luther_Blissett_%28nom_de_plume%29" id="Website">https://en.wikipedia.org/wiki/Luther_Blissett_%28nom_de_plume%29</a><p id="Bio">Luther Blissett is a multiple-use name, an "open pop star" informally adopted and shared by hundreds of artists and activists all over Europe and the Americas since 1994. The pseudonym first appeared in Bologna, Italy, in mid-1994, when a number of cultural activists began using it for staging a series of urban and media pranks and to experiment with new forms of authorship and identity. From Bologna the multiple-use name spread to other European cities, such as Rome and London, as well as countries such as Germany, Spain, and Slovenia.[1] Sporadic appearances of Luther Blissett have been also noted in Canada, the United States, and Brazil.</p><h1 id="Title">Qq</h1><p id="Creator">Luther Blisset</p><p id="Date">2015</p></div><!--title, Creator, Date, Bio will go here -->
<div class="body"><p>The novel Q was written by four Bologna-based members of the LBP, as a final contribution to the project, and published in Italy in 1999. So far, it has been translated into English (British and American), Spanish, German, Dutch, French, Portuguese (Brazilian), Danish, Polish, Greek, Czech, Russian, Turkish, Basque and Korean. In August 2003 the book was nominated for the Guardian First Book Prize.</p>
<img alt="This is possibly a screen shot" src="http://pzwiki.wdka.nl/mw-mediadesign/images/3/38/Screen_Shot_2015-03-31_at_11.07.35.png" title="fig:This is possibly a screen shot"><img alt="Quipu - a hybrid between octopus and rope " src="http://pzwiki.wdka.nl/mw-mediadesign/images/c/cc/Quipu.png" title="fig:Quipu - a hybrid between octopus and rope "><p>While the folk heroes of the early-modern period and the nineteenth century served a variety of social and political purposes, the Luther Blissett Project (LBP) were able to utilize the media and communication strategies unavailable to their predecessors. According to Marco Deseriis, the main purpose of the LBP was to create a folk hero of the information society whereby knowledge workers and immaterial workers could organize and recognize themselves.[5] Thus, rather than being understood only as a media prankster and culture jammer, Luther Blissett became a positive mythic figure that was supposed to embody the very process of community and cross-media storytelling. Roberto Bui—one of the co-founders of the LBP and Wu Ming—explains the function of Luther Blissett and other radical folk heroes as mythmaking or mythopoesis<img alt="Quipu - a hybrid between octopus and rope " src="http://pzwiki.wdka.nl/mw-mediadesign/images/c/cc/Quipu.png" title="fig:Quipu - a hybrid between octopus and rope "><img alt="This is possibly a screen shot" src="http://pzwiki.wdka.nl/mw-mediadesign/images/3/38/Screen_Shot_2015-03-31_at_11.07.35.png" title="fig:This is possibly a screen shot"></p>
</div> <!-- Description, Extra will go here -->
<div class="av"><img src="http://pzwiki.wdka.nl/mw-mediadesign/images/8/85/Luther-blissett-300.jpg"><iframe height="450px" src="https://www.youtube.com/embed/WUXsLyX4u3M" width="600px"></iframe><iframe height="450px" src="https://www.youtube.com/embed/fg-IyA0jX6w" width="600px"></iframe></div> <!-- AV material (Thumbanail, gallery, video )will go here-->
</div>
</body></html>

@ -2,23 +2,22 @@
<html> <html>
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<title></title> <!-- Work title will go to here --> <title>$title$</title> <!-- Work title will go to here -->
<link rel="stylesheet" href="css/style.css"> <link rel="stylesheet" href="css/style.css">
</head> </head>
<body> <body>
<div id="header"> <div id="header">
<div id="title"></div> <div id="title"><h1>$title$</h1></div>
<div id="creator"></div> <div id="creator"><h2>$creator$</h2></div>
<div id="date"></div> <div id="date">$date$</div>
<div id="website"></div> <div id="website"><a href="$website$">$website$</a></div>
<div id="thumbnail"></div> <div id="thumbnail"><img src="$thumnail$" class="" alt="" /></div>
<div id="bio"></div> <div id="bio">$bio$</div>
</div> </div>
<div id="body"> <div id="body">
<div id="description"></div> $body$
<div id="extra"></div> <!--div id="description">$description$</div>
</div> <!-- Description, Extra will go here --> <div id="extra">$extra$</div-->
</div>
</body> </body>
</html> </html>

Loading…
Cancel
Save