Merge branch 'master' of ssh://git.xpub.nl:2501/XPUB/special-issue-11-wiki2html

pull/6/head
Castro0o 5 years ago
commit 88bf0a0b83

1
.gitignore vendored

@ -17,3 +17,4 @@ share/
__pycache__/ __pycache__/
*.jpg *.jpg
*.jpeg *.jpeg
secrets.py

@ -0,0 +1,67 @@
import os, json, sys, urllib
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import unpack_response, clean_dir, remove_nonwords
from functions import Colors
import argparse
p = argparse.ArgumentParser(description="Dump wiki files to html",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
p.add_argument("--host", metavar='', default="hub.xpub.nl/sandbox", help='wiki host')
p.add_argument("--path", metavar='', default="/itchwiki/", help="Wiki path. Should end with /")
p.add_argument("--output", default="/var/www/html/archive", help="Output path for pages")
# p.add_argument("--conditions", "-c", metavar='',
# default='[[File:+]][[Title::+]][[Part::+]][[Date::+]]',
# help='The query conditions')
# p.add_argument("--printouts", "-p", metavar='',
# default='?Title|?Date|?Part|?Partof|?Creator|?Organization|?Format|?Event|?Topic|?Language',
# help='Selection of properties to printout')
# p.add_argument("--sort", "-s", metavar='',
# default='Date,Title,Part',
# help='Sorting according to conditions')
# p.add_argument("--order", "-o", metavar='',
# default='asc,asc,asc',
# help='Order of sorting conditions. Should same amount as the --sort properties')
# p.add_argument('--limit', '-l', help='(optional) Limit the number of returned '
# 'items')
# # TODO: GET limit to work.Perhaps with a site.raw_api method
# p.add_argument('--dry', '-d', action='store_true',
# help='dry-run: will only show the query but not run it')
args = p.parse_args()
# site and login
site = Site(host=args.host, path=args.path)
with open('login.txt', 'r') as login: # read login user & pwd
loginlines = login.read()
user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki
# read template files
with open('templates/title.html') as title_html:
title_template = Template(title_html.read())
SLASH = "\u2044"
def filenameforpage(p):
f=p.name.replace(' ','_').replace('/', SLASH) + '.html'
return f
title=site.Categories['Title']
for p in title.members():
print(p)
htmlsrc = site.parse(page=p.name)['text']['*']
with open(os.path.join(args.output, filenameforpage(p)), 'w') as f:
print(htmlsrc, file=f)
# break

@ -1,76 +0,0 @@
import os, json
from mwclient import Site
from pprint import pprint
from jinja2 import Template
from functions import pandoc, page_props
site = Site(host='hub.xpub.nl/sandbox', path='/itchwiki/')
wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images')
imgsjson_fn = os.path.join(wd, 'images.json')
with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file)
static_html = os.path.join(wd, 'static_html')
os.makedirs(static_html, exist_ok=True) # create images/ dir
with open(os.path.join(wd, 'login.txt'), 'r') as login: # read login user & pwd
loginlines = login.read()
user, pwd = loginlines.split('\n')
site.login(username=user, password=pwd) # login to wiki
page_html_template = '''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="../static/style.css" />
<title>{{title}}</title>
</head>
<body>
<h1>{{ title }}</h1>
<p><time datetime="{{date}}">{{date}}</time></p>
<div id="img">
<img src="{{ imgsrc }}" />
</div>
<div id="content">
{{ content }}
</div>
<footer>
Part {{part}} of {{partof}}
</footer>
</body>
</html>
'''
page_template = Template(page_html_template)
for img_info in images_info.values():
print(img_info)
page_name = img_info['name']
page = site.pages[page_name]
# print(page)
# pprint(page.__dict__)
# print(dir(page))
pagetext = page.text()
pageproperties = page_props(wikicontent=pagetext)
print(pageproperties)
if pageproperties.get('Title'):
pagetext_html = pandoc(pwd=wd ,content=pagetext, format_in='mediawiki', format_out='html')
# print('pagetext', pagetext)
# print('pagetext_html', pagetext_html)
page_html = page_template.render(title=pageproperties.get('Title'),
date=pageproperties.get('Date'),
imgsrc=os.path.join(imgdir, img_info.get('filename')),
content=pagetext_html,
part=pageproperties.get('Part'),
partof=pageproperties.get('Partof'))
htmlpage_fn = "{}_{}.html".format(
pageproperties.get('Title').replace(" ", ""),
pageproperties.get('Part').zfill(3)
)
print(htmlpage_fn)
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
htmlfile.write(page_html)

@ -2,7 +2,7 @@ import os, json, sys, urllib
from mwclient import Site from mwclient import Site
from pprint import pprint from pprint import pprint
from jinja2 import Template from jinja2 import Template
from functions import pandoc, unpack_response, clean_dir, remove_nonwords from functions import unpack_response, clean_dir, remove_nonwords
from functions import Colors from functions import Colors
import argparse import argparse
@ -54,7 +54,6 @@ if args.dry is True:
site = Site(host=args.host, path=args.path) site = Site(host=args.host, path=args.path)
wd = os.path.dirname(os.path.abspath(__file__)) # working directory wd = os.path.dirname(os.path.abspath(__file__)) # working directory
imgdir = os.path.join(wd, 'images')
imgsjson_fn = os.path.join(wd, 'images.json') imgsjson_fn = os.path.join(wd, 'images.json')
with open(imgsjson_fn, 'r') as imgsjson_file: with open(imgsjson_fn, 'r') as imgsjson_file:
images_info = json.load(imgsjson_file) images_info = json.load(imgsjson_file)
@ -74,11 +73,10 @@ with open(os.path.join(wd, 'templates/index.html')) as document_html:
with open(os.path.join(wd, 'templates/document.html')) as document_html: with open(os.path.join(wd, 'templates/document.html')) as document_html:
document_template = Template(document_html.read()) document_template = Template(document_html.read())
with open(os.path.join(wd, 'templates/document_part.html')) as document_html:
document_part_template = Template(document_html.read())
all_document_parts = '' # to append all content all_document_parts = '' # to append all content
documentslist = [] documentslist = []
doc_imgs = []
for answer in site.ask(query): for answer in site.ask(query):
# publication_title = '' # publication_title = ''
# print(answer, answer.keys()) # print(answer, answer.keys())
@ -91,37 +89,42 @@ for answer in site.ask(query):
print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC) print(Colors.GREEN, 'run python3 download_imgs.py to fix the issue', Colors.ENDC)
sys.exit() sys.exit()
page = site.pages[[printout_dict['page']]] # request that page from wiki img_local = os.path.join('../images', img_info.get('filename'))
pagetext = page.text()
pagetext_html = pandoc(pwd=wd, content=pagetext, format_in='mediawiki', format_out='html')
img_local = os.path.join(imgdir, img_info.get('filename'))
# Todo: Create list of all images from document
# TODO: join document_part + document
# TODO: look into the template structure of images : columns and rows # TODO: look into the template structure of images : columns and rows
doc_imgs.append(img_local)
# RENDER document part # on last File(Part) of Document
document_part_html = document_part_template.render(
printout_dict=printout_dict,
imgsrc=os.path.join(imgdir, img_info.get('filename')),
text=pagetext_html,
fullurl=fullurl,)
all_document_parts += document_part_html # append resulting html from document part to the previous parts
if printout_dict['Part'] == printout_dict['Partof']: if printout_dict['Part'] == printout_dict['Partof']:
print(doc_imgs)
# organize doc_imgs into a matrix (list of lists)
# each sublist containing 4 items [[0,1,2,3][4,5,6,7],[8,9]]
# for template
doc_imgs_matrix = [doc_imgs[i:i+100] for i in range(0, len(doc_imgs), 100)]
# RENDER DOCUMENT # RENDER DOCUMENT
# by passing all_document_parts html to document_template content # by passing all_document_parts html to document_template content
document_html = document_template.render( document_html = document_template.render(
title=printout_dict.get('Title'), title=printout_dict.get('Title'),
date=printout_dict.get('Date'), date=printout_dict.get('Date'),
organization=printout_dict.get('Organization'),
creator=printout_dict.get('Creator'),
format=printout_dict.get('Format'),
topic=printout_dict.get('Topic'),
imgsmatrix=doc_imgs_matrix,
content=all_document_parts) # render document template content=all_document_parts) # render document template
# print(document_html)
htmlpage_fn = "{}.html".format( htmlpage_fn = "{}.html".format(
remove_nonwords(printout_dict.get('Title')[0]) remove_nonwords(printout_dict.get('Title')[0])
) )
with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile: with open(os.path.join(static_html, htmlpage_fn), 'w') as htmlfile:
htmlfile.write(document_html) htmlfile.write(document_html)
all_document_parts = '' # Reset all_document_parts
# add info to documentslist for index creation # add info to documentslist for index creation
documentslist.append({'file': htmlpage_fn, documentslist.append({'file': htmlpage_fn,
@ -130,6 +133,11 @@ for answer in site.ask(query):
'creator': printout_dict.get('Creator') 'creator': printout_dict.get('Creator')
}) })
# reset document_images list
doc_imgs = []
# RENDER index.html from documentslist # RENDER index.html from documentslist
index_html = index_template.render(index='Index', index_html = index_template.render(index='Index',
query=query, query=query,

@ -1,14 +1,196 @@
body{font-size: 12pt;}
div.part {border: 1px solid #e5e5e5;
margin-bottom: 10px; body {
font-family: Arial, Helvetica, sans-serif;
background-color: #aaa4a0;
}
/*div.row {display: inline; }
div.column { display: inline; }
div.column img{ width:24%; }*/
.grid-container {
display: grid;
grid-template-columns: repeat(4, 1fr);
grid-gap: 10px;
position: absolute;
top: 60%;
width: 96%;
height: auto;
left:2%;
/*background-color: blue;*/
text-align: : center;
z-index: 5;
}
.grid-container > div {
position: relative;
/*background-color: green;*/
text-align: center;
/*padding-bottom: 1%;*/
z-index: 5;
}
img {
position: relative;
max-width: 100%;
max-height: 100%;
/*align-content: center;*/
}
/*links*/
a:link {
text-decoration: none;
color: black;
}
a:visited {
text-decoration: none;
color: blue;
}
a:hover {
text-decoration: none;
color: blue;
}
h1 {
position: fixed;
right:0%;
top:3%;
text-align: right;
background-color: grey;
padding: 3px 30px 3px 10px;
z-index: 10;
color: black;
font-size: 28px;
}
h2{
position: fixed;
right: 0%;
top: 15%;
text-align: left;
background-color: grey;
color: black;
padding: 3px 3px 3px 30px;
z-index: 10;
font-size: 20px;
}
.indexlist {
position: absolute;
top:20%;
}
/* Grid buttons */
.header {
position: absolute;
text-align: center;
padding: 32px;
top: 30%;
right: 0%;
z-index: 9;
width: auto;
height: auto;
}
p {
display: inline-block;
position: relative;
}
.btn {
border: none;
outline: none;
width: 110px;
text-align: center;
padding: 12px 0px;
background-color: grey;
cursor: pointer;
font-size: 18px;
margin-right: 5px;
z-index: 9;
}
.btn:hover {
background-color: black;
color: blue;
}
.btn:active {
background-color: black;
color: white;
}
.metadata{
position: fixed;
top:5%;
left:0%;
background-color: ;
width: 40%;
z-index: 10;
opacity: 0.7;
}
.collapsible {
background-color: #777;
color: white;
cursor: pointer;
padding: 18px;
width: 10%;
height: auto;
border: none;
text-align: right;
outline: none;
font-size: 15px;
z-index: 10;
margin-top: 10px; margin-top: 10px;
padding: 20px;} }
.active, .collapsible:hover {
background-color: #555;
}
.content {
position: relative;
padding: 0px 18px;
max-height: 0;
overflow: hidden;
width: 90%;
transition: max-height 0.2s ease-out;
background-color: grey;
color: white;
z-index: 10;
text-align: right;
}
.metadata_links {
display: block;
width: 100%;
position: relative;
right: 0%;
padding: 10px;
text-align: left;
/*background-color: blue;*/
font-size: 18px;
}
.metadata_organization {
font-size: 14px;
}
div#content img {width: 50%;}
div.metadata span.key {color: red;
font-weight: bold;}
#orc {color:blue;} .fbtn {
font-style: italic;
}

@ -3,14 +3,85 @@
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<link rel="stylesheet" href="../static/style.css" /> <link rel="stylesheet" href="../static/style.css" />
<script type="text/javascript" src="../static/script.js"></script>
<title>{{ title[0] }}</title> <title>{{ title[0] }}</title>
</head> </head>
<body> <body>
<h1>{{ title[0] }}</h1> <h1>{{ title[0] }} &#8629;</h1>
<p><time datetime="{{date}}">{{ date.year }}.{{ date.month }}.{{ date.day }} </time></p> <h2><time datetime="{{date}}">{{ date.year }}.{{ date.month }}.{{ date.day }}</time></h2>
<div id="content">
{{ content }} <div class="metadata">
<button class="collapsible cbtn">C</button>
<div class="content">
<!-- metadata creator / format / topic -->
<div class="metadata_creator">{{ creator }}</div>
<a class="metadata_links" href="allcreators.html">All Creators</a>
</div>
<button class="collapsible fbtn">f</button>
<div class="content">
<div class="metadata_format">{{ format }}</div>
<a class="metadata_links" href="allformats.html">All Formats</a>
</div>
<button class="collapsible orgbtn">Org</button>
<div class="content">
<div class="metadata_organization">{{ organization }}</div>
<a class="metadata_links" href="allorganizations.html">All Organizations</a>
</div>
<button class="collapsible tbtn">T</button>
<div class="content">
<div class="metadata_topic">{{ topic }}</div>
<a class="metadata_links" href="alltopics.html">All Topics</a>
</div>
</div>
<!-- nav -->
<!-- <div id="rightnav" class="rightnav">
<a href="index.html" id="titlenav">Title</a>
</div>
<div id="leftnav" class="leftnav">
<a href="timeline.html" id="datenav">Date</a>
</div> -->
<!-- square -->
<!-- <div class="square"></div> -->
<!-- 2 btn grid switch for images -->
<div class="header" id="myHeader">
<p><button class="btn" onclick="myFunction()">100%</button></p>
<p><button class="btn" onclick="myFunction2()">overview</button></p>
</div>
<!-- images -->
{% for row in imgsmatrix %}
<div class="grid-container" id="myDIV">
{% for img in row %}
<div class="column">
<img src="{{ img }}">
</div>
{% endfor %}
</div> </div>
{% endfor %}
<script>
var coll = document.getElementsByClassName("collapsible");
var i;
for (i = 0; i < coll.length; i++) {
coll[i].addEventListener("click", function() {
this.classList.toggle("active");
var content = this.nextElementSibling;
if (content.style.maxHeight){
content.style.maxHeight = null;
} else {
content.style.maxHeight = content.scrollHeight + "px";
}
});
}
</script>
</body> </body>
</html> </html>

@ -1,31 +0,0 @@
<div class="part">
<div class="img">
<img src="{{ imgsrc }}" />
<a href="https:{{ fullurl }}">{{ fullurl }}</a>
</div>
<div class="text">
{{ text | safe }}
</div>
<div class="metadata">
<h3>Metadata</h3>
{% for key, valuelist in printout_dict.items() %}
<div class="metadata_{{key}}">
{% if key == 'Date' %}
<span class="key">{{key}}</span>
<span class="value">{{valuelist.year}} {{valuelist.month}} {{valuelist.day}}</span>
{% elif key == 'page' %}
<span class="key">{{key|upper}}</span>
<span class="value">{{valuelist}}</span>
{% else %}
{% if valuelist|length > 0 %}
<span class="key">{{key|upper}}</span>
<span class="value">{{valuelist | join(", ")}}</span>
{% endif %}
{% endif %}
</div>
{% endfor %}
</div>
</div>

@ -3,6 +3,7 @@
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<title>{{title}}</title> <title>{{title}}</title>
<link rel="stylesheet" href="../static/style.css" />
</head> </head>
<body> <body>
<h3>Resultsss from query:<br/><code>{{query}}</code></h3> <h3>Resultsss from query:<br/><code>{{query}}</code></h3>

@ -0,0 +1,87 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="../static/style.css" />
<script type="text/javascript" src="../static/script.js"></script>
<title>{{ title[0] }}</title>
</head>
<body>
<h1>{{ title[0] }} &#8629;</h1>
<h2><time datetime="{{date}}">{{ date.year }}.{{ date.month }}.{{ date.day }}</time></h2>
<div class="metadata">
<button class="collapsible cbtn">C</button>
<div class="content">
<!-- metadata creator / format / topic -->
<div class="metadata_creator">{{ creator }}</div>
<a class="metadata_links" href="allcreators.html">All Creators</a>
</div>
<button class="collapsible fbtn">f</button>
<div class="content">
<div class="metadata_format">{{ format }}</div>
<a class="metadata_links" href="allformats.html">All Formats</a>
</div>
<button class="collapsible orgbtn">Org</button>
<div class="content">
<div class="metadata_organization">{{ organization }}</div>
<a class="metadata_links" href="allorganizations.html">All Organizations</a>
</div>
<button class="collapsible tbtn">T</button>
<div class="content">
<div class="metadata_topic">{{ topic }}</div>
<a class="metadata_links" href="alltopics.html">All Topics</a>
</div>
</div>
<!-- nav -->
<!-- <div id="rightnav" class="rightnav">
<a href="index.html" id="titlenav">Title</a>
</div>
<div id="leftnav" class="leftnav">
<a href="timeline.html" id="datenav">Date</a>
</div> -->
<!-- square -->
<!-- <div class="square"></div> -->
<!-- 2 btn grid switch for images -->
<div class="header" id="myHeader">
<p><button class="btn" onclick="myFunction()">100%</button></p>
<p><button class="btn" onclick="myFunction2()">overview</button></p>
</div>
<!-- images -->
{% for row in imgsmatrix %}
<div class="grid-container" id="myDIV">
{% for img in row %}
<div class="column">
<img src="{{ img }}">
</div>
{% endfor %}
</div>
{% endfor %}
<script>
var coll = document.getElementsByClassName("collapsible");
var i;
for (i = 0; i < coll.length; i++) {
coll[i].addEventListener("click", function() {
this.classList.toggle("active");
var content = this.nextElementSibling;
if (content.style.maxHeight){
content.style.maxHeight = null;
} else {
content.style.maxHeight = content.scrollHeight + "px";
}
});
}
</script>
</body>
</html>
Loading…
Cancel
Save