rewriting all mechanism from scrath - w/out json

master
Castro0o 10 years ago
parent 63e8313370
commit 4fbfd6b88a

File diff suppressed because one or more lines are too long

@ -64,11 +64,59 @@ def write_html_file(html_tree, filename):
edited.write(html) edited.write(html)
edited.close() edited.close()
# mw article modules
def parse_work_page(title, content):
# content = content.encode('utf-8')
if re.match('\{\{\Graduation work', content):
work_dict = {}
work_dict['Title']=title
template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
# template's key/value pair
keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL)
if extra:
extra = ('Extra', extra)
keyval.append(extra)
# checkkeys = [keyval[i] for i in range(len(keyval)) if keyval[i][0] in mainkeys and len(keyval[i][1])>3] #list mainkeys present, w/ values, in tuples [(key, val),(key, val)...]
# if len(checkkeys) == 3 : # checkkeys contains all mainkeys and values
for pair in keyval:
key = pair[0]
val = pair[1]
val = val.replace('\n','')
if 'Creator' in key:
val = val.replace(', ', '')
elif 'Thumbnail' in key:
thumburl = api_thumb_url(val)
work_dict['Thumbnail_url']=thumburl
print 'THUMB:', thumburl
work_dict[key]=val
return work_dict, extra
# Alternative to parse_work_page - W/out dictionary
def parse_work(title, content):
if re.match('\{\{\Graduation work', content):
template, extra = (re.findall('\{\{Graduation work\n(.*?)\}\}(.*)', content, re.DOTALL))[0]
# template's key/value pair
keyval = re.findall('\|(.*?)\=(.*?\n)', template, re.DOTALL)
for pair in keyval:
key = pair[0]
val = pair[1]
if 'Description' in key:
description = val
print keyval
return extra, description
# Conversion Modules # Conversion Modules
def pandoc2html(mw_content): def pandoc2html(mw_content):
if mw_content: if mw_content:
mw_content = mw_content.encode('utf-8') mw_content = mw_content#.encode('utf-8')
# convert from mw to html # convert from mw to html
args_echo =shlex.split( ('echo "{}"'.format(mw_content)) ) args_echo =shlex.split( ('echo "{}"'.format(mw_content)) )
args_pandoc = shlex.split( 'pandoc -f mediawiki -t html5' ) args_pandoc = shlex.split( 'pandoc -f mediawiki -t html5' )
@ -90,7 +138,11 @@ def pandoc(filename, title, creator, date, website, thumbnail, bio, description,
# return html # return html
# pandoc either reades input from stdin or through input file # pandoc either reades input from stdin or through input file
# pandoc DOES NOT convert variables # pandoc DOES NOT convert variables; it has to receive the input from stdin.
# to create html convert: bio, description, extra, of at time
# insert them into HTML template by:
## gerating html in python? and insertion sub elements ?
## ??
# pandoc = 'pandoc -s -f mediawiki -t html5 \ # pandoc = 'pandoc -s -f mediawiki -t html5 \
# --template template_article.html \ # --template template_article.html \

@ -183,16 +183,21 @@ for key in json_allworks.keys():
print work_file, website print work_file, website
# if key in work.keys() else '' # if key in work.keys() else ''
pandoc( filename=work_file, \
template='web/work-template.html', \ if extra:
title=title, \ extra_html = pandoc2html(extra)
creator=creator, \ print (extra_html)
date=date, \
website=website, \ # pandoc( filename=work_file, \
thumbnail=thumbnail, \ # template='web/work-template.html', \
bio=bio, \ # title=title, \
description=description, \ # creator=creator, \
extra=extra ) # convert to HTML # date=date, \
# website=website, \
# thumbnail=thumbnail, \
# bio=bio, \
# description=description, \
# extra=extra ) # convert to HTML
# print '= = = = = = =' # print '= = = = = = ='
# print html_content # print html_content
# print '= = = = = = =' # print '= = = = = = ='

@ -0,0 +1,31 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
###########
# Testing downloading and converting mw page content to html
###########
from mmdc_modules import api_request, api_page, api_thumb_url, pandoc2html, parse_work_page, parse_work
template = open("web/page-template.html", "r")
template = template.read()
template = template.format(title="This is My Title", creator='', date='', website='', thumbnail='', bio='',description='', extra='' )
# download
pageid='15965'
article = api_page(pageid, 'content')
#print article
#print '----------- article -----'
# parsing article
extra, description = parse_work('Qq', article)
# placing mw content inside dict makes it non convertable. Why?
#print extra #work_dict['Extra']
html_extra = pandoc2html(extra.encode('utf-8'))
html_description = pandoc2html(description.encode('utf-8'))
print '----------- html -----'
print html_description
#print html_extra
print template

@ -0,0 +1,22 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>{title}</title> <!-- Work title will go to here -->
<link rel="stylesheet" href="css/style.css">
</head>
<body>
<div id="header">
<div id="title"><h1>{title}</h1></div>
<div id="creator"><h2>{creator}</h2></div>
<div id="date">{date}</div>
<div id="website"><a href="{website}">{website}</a></div>
<div id="thumbnail"><img src="{thumbnail}" class="" alt="" /></div>
<div id="bio">{bio}</div>
</div>
<div id="body">
<div id="description">{description}</div>
<div id="extra">{extra}</div>
</div>
</body>
</html>
Loading…
Cancel
Save