""" Let's say you're interested in documents related to the topic "US Foreign Policy" You can start with an "ask" (first just from the "Semantic Ask" interface) and use the condition: [[Topic::US Foreign Policy]] Press "Find results" and you get a lot of pages as result. Then you would add the info you're interested in the "printout selection" box, and also make sure to sort the results with the part number to make sure document pages are in order. Finally, click on the "Code" view and copy paste the code -- but be careful NOT to copy the "{{#ask:" and the "}}" at the end. This you can then paste into the API Sandbox to test it if you want... OR just paste it into the code below as the "ask_query" variable. https://hub.xpub.nl/sandbox/itchwiki/index.php/Special:ApiSandbox#action=ask """ # https://www.mediawiki.org/wiki/API:Main_page # https://mwclient.readthedocs.io/en/latest/ import datetime import json import mwclient from mwclient import Site from secrets import BOTPASSWORD site = Site("hub.xpub.nl", path="/sandbox/itchwiki/") site.login("Bot", BOTPASSWORD) ask_query = """ [[Topic::US Foreign Policy]] |?Date |?Part |?Partof |?Title |format=broadtable |limit=50 |offset=0 |link=all |sort=Date,Title,Part |order=asc,asc,asc |headers=show |searchlabel=... further results |class=sortable wikitable smwtable """ # Tested in the APISandbox # See: https://hub.xpub.nl/sandbox/itchwiki/index.php/Special:ApiSandbox#action=ask&format=json&query=%5B%5BTopic%3A%3AUS%20Foreign%20Policy%5D%5D%20%20%7C%3FPart%20%20%7C%3FPartof%20%20%7C%3FTitle%20%20%7C%3FDate%20%20%7Cformat%3Dbroadtable%20%20%7Climit%3D50%20%20%7Coffset%3D0%20%20%7Clink%3Dall%20%20%7Csort%3D%20%20%7Corder%3Dasc%20%20%7Cheaders%3Dshow%20%20%7Csearchlabel%3D...%20further%20results%20%20%7Cclass%3Dsortable%20wikitable%20smwtable&formatversion=2 # copy/paste the URL above and press "Make request" to see the results response = site.api("ask", query=ask_query, format="json") results = response['query']['results'] # Results is a dictionary where the keys are wiki names like "File:KSP Kilusan Vol 2 Nos 2 and 3-26.jpg" # results[wikiname] is then another dictionary with all the results specific to that item for wikiname in results: item = results[wikiname] # FOR DEBUGGING uncomment the print # print (wikiname, json.dumps(item, indent=2)) date = datetime.date.fromtimestamp(int(item["printouts"]["Date"][0]['timestamp'])) date_raw = item["printouts"]["Date"][0]['raw'] title = item["printouts"]["Title"][0]["fulltext"] part = int(item["printouts"]["Part"][0]) partof = int(item["printouts"]["Partof"][0]) r = site.api("query", prop="imageinfo", \ titles=wikiname, \ iiprop="url", \ iiurlwidth="80", \ formatversion=2) iinfo = r['query']['pages'][0]['imageinfo'][0] thumburl = iinfo['thumburl'] fullsizeurl = iinfo['url'] filepageurl = iinfo['descriptionurl'] # nb: the code lines *need* to be indented # to stay INSIDE the loop # the output text between the """ can break this rule print ("""

{} ({})

{}, Part {} of {}

""".format(date, date_raw, title, part, partof, filepageurl, thumburl))