master
Michael Murtaugh 11 months ago
parent 87352c6eed
commit 7de5d3a231

@ -404,7 +404,7 @@
<div class="subrow">
<p class="subcl1">Clara Noseda</p>
<p class="subcl2"><a href="desire_revolution/" target="_blank" class="ext">CONSIDER DISASTER, DESIRE REVOLUTION: A repository for astropolitical research</a></p>
<p class="subcl3"><a href="desire_revolution/pdf/A SAILORS GUIDE TO EARTH_Clara Noseda.pdf" target="_blank" class="ext">PDF</a></p>
<p class="subcl3"><a href="desire_revolution/pdf/A%20SAILORS%20GUIDE%20TO%20EARTH_Clara%20Noseda.pdf" target="_blank" class="ext">PDF</a></p>
</div>
<div class="subrow">
<p class="subcl1">Mika Motskobili</p>

@ -1,4 +1,3 @@
<!DOCTYPE html>
<html lang="en"><head>
<meta charset="utf-8">
@ -404,7 +403,7 @@
<div class="subrow" typeof="xpub:project">
<p class="subcl1" property="xpub:student">Clara Noseda</p>
<p class="subcl2" property="dc:title"><a href="desire_revolution/" target="_blank" class="ext" property="xpub:project">CONSIDER DISASTER, DESIRE REVOLUTION: A repository for astropolitical research</a></p>
<p class="subcl3"><a href="desire_revolution/pdf/A SAILORS GUIDE TO EARTH_Clara Noseda.pdf" target="_blank" class="ext" property="xpub:thesis">PDF</a></p>
<p class="subcl3"><a href="desire_revolution/pdf/A%20SAILORS%20GUIDE%20TO%20EARTH_Clara%20Noseda.pdf" target="_blank" class="ext" property="xpub:thesis">PDF</a></p>
</div>
<div class="subrow" typeof="xpub:project">
<p class="subcl1" property="xpub:student">Mika Motskobili</p>

@ -0,0 +1,16 @@
from rdflib import Graph
from rdflib import RDF, URIRef, Namespace
XPUB = Namespace("http://xpub.nl/terms/")
import sys
# url = "https://gallery.constantvzw.org/index.php/Maison-des-arts-expo/"
# g = Graph()
# with open("index.rdfa.html") as fin:
# data = fin.read()
# g.parse(publicID="index.html", data=data, format="rdfa")
# # g.parse(file=fin, format="rdfa", override_encoding="utf-8")
g = Graph()
g.parse("https://project.xpub.nl/index.rdfa.html", format="rdfa")
print (g.serialize())

@ -0,0 +1,41 @@
import html5lib
from xml.etree import ElementTree as ET
with open("index.html") as fin:
t = html5lib.parse(fin.read(), namespaceHTMLElements=False)
body = t.find(".//body")
body.attrib['xmlns:xpub'] = "http://xpub.nl/terms/"
body.attrib['xmlns:dc'] = "http://purl.org/dc/terms/"
count = 0
for div in t.findall('.//div[@class="subrow"]'):
count += 1
div.attrib['typeof'] = "xpub:project"
student, title = None, None
for i, p in enumerate(div.findall("./p")):
# print (f"{i}: p")
if i == 0:
p.attrib['property'] = "xpub:student"
student = p.text
elif i == 1:
p.attrib['property'] = "dc:title"
project_link = p.find("./a")
project_link.attrib['property'] = "xpub:project"
title = project_link.text
elif i == 2:
for ai, pdf_link in enumerate(p.findall("./a")):
if ai == 0:
pdf_link.attrib['property'] = "xpub:thesis"
elif ai == 1:
pdf_link.attrib['property'] = "xpub:thesis_image"
else:
raise Exception (f"{title}: too many links in PDF columns")
else:
raise Exception(f"{title}: too many p columns")
print (f"count: {count}")
with open("index.rdfa.html", "w") as fout:
print (ET.tostring(t, method="html", encoding="unicode"), file=fout)
Loading…
Cancel
Save