From 63d42b78507df7e5a651c689527ddfc384d55e76 Mon Sep 17 00:00:00 2001 From: Michael Murtaugh Date: Thu, 10 Nov 2022 09:35:33 +0100 Subject: [PATCH] sparql query materials --- query_materials.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 query_materials.py diff --git a/query_materials.py b/query_materials.py new file mode 100644 index 0000000..2639fc4 --- /dev/null +++ b/query_materials.py @@ -0,0 +1,59 @@ +# import rdflib +# g = rdflib.Graph() +# https://stackoverflow.com/questions/68824808/query-wikidata-rest-api-with-related-identifier + +from urllib.request import urlopen +from urllib.parse import urlencode +import json +import re + + +def get_info (wdurl): + m = re.search(r"^https?:\/\/www\.wikidata\.org\/(wiki|entity)/(?PQ\d+)$", wdurl) + if m is not None: + qid = m.groupdict()['q'] + # print ("qid", qid) + url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json?flavor=simple" + return json.load(urlopen(url))['entities'][qid] + else: + print("unrecognized url") + +def query (q, format="json", endpoint="https://query.wikidata.org/sparql"): + p = {} + p['format'] = format + p['query'] = q + f = urlopen(endpoint+"?"+urlencode(p)) + return json.load(f)['results']['bindings'] + + +q1 = """ +PREFIX p: +PREFIX ps: + +SELECT DISTINCT ?item ?statement0 ?material WHERE + { + ?item p:P186 ?statement0. + ?statement0 ps:P186 ?material. + } +LIMIT 10 +""" + + +print (q1) +print () +from time import sleep + +for result in query(q1): + statement = result['statement0']['value'] + value, item = result['material']['value'], result['item']['value'] + value = get_info(value) + item = get_info(item) + # print ("value", value) + # print ("item", item) + try: + material_label, item_label = value['labels']['en']['value'], item['labels']['en']['value'] + print (f"A {item_label} made of {material_label}. See {statement}") + print () + sleep(3) + except KeyError: + print ("no english label?") \ No newline at end of file