@ -11,10 +11,33 @@ from xml.etree import ElementTree as ET
# from wiki_get_html import page_html
from mwclient import Site
from mwclient . page import Page
from mwclient . errors import APIError
from leaflet import tiles_wrapper , recursiverender , gridrender , html
from imagetile2 import tile_image
from urllib import quote as urlquote
def wget ( url , path , blocksize = 4 * 1000 ) :
if type ( url ) == unicode :
url = url . encode ( " utf-8 " )
count = 0
with open ( path , " wb " ) as fout :
fin = urlopen ( url )
while True :
data = fin . read ( blocksize )
if not data :
break
fout . write ( data )
count + = len ( data )
return count
def page_url ( site , page ) :
# print ("[page_url]", page.name, file=sys.stderr)
base = os . path . split ( site . site [ ' base ' ] ) [ 0 ]
uret = os . path . join ( base , urlquote ( page . normalize_title ( page . name ) ) )
# assert type(uret) == str
return uret
def wiki_url_to_title ( url ) :
return urllib . unquote ( url . split ( " / " ) [ - 1 ] )
@ -269,9 +292,162 @@ def make_gallery(args):
else :
print ( json . dumps ( root_node , indent = 2 ) )
from time import sleep
def testwiki ( args ) :
return Site ( ( args . wikiprotocol , args . wikihost ) , path = args . wikipath )
site = Site ( ( args . wikiprotocol , args . wikihost ) , path = args . wikipath )
return site
USER_NS = 2
def imageinfo_with_thumbnail ( site , name ) :
d = site . api (
" query " ,
titles = name ,
prop = " imageinfo " ,
iiprop = " url|mime " ,
iiurlwidth = 1024
)
pp = d [ ' query ' ] [ ' pages ' ]
for key in pp :
return pp [ key ] [ ' imageinfo ' ] [ 0 ]
def recentfiles ( args ) :
# open connection to wiki
wiki = Site ( ( args . wikiprotocol , args . wikihost ) , path = args . wikipath )
# Prepare user list to filter (if args.usercategory)
filter_by_users = None
if args . usercategory :
filter_by_users = set ( )
usercategory = wiki . categories . get ( args . usercategory )
for p in usercategory . members ( ) :
if p . namespace == USER_NS :
filter_by_users . add ( p . page_title )
# Load args.json for oldest timestamp
last_date = None
if args . json :
try :
with open ( args . json ) as f :
print ( " Reading {0} " . format ( args . json ) , file = sys . stderr )
for line in f :
data = json . loads ( line )
if ' date ' in data :
last_date = data [ ' date ' ]
except IOError as e :
pass
# Prepare the query arguments
qargs = {
' list ' : " allimages " ,
' ailimit ' : 50 ,
' aisort ' : ' timestamp ' ,
' aidir ' : ' descending ' ,
' aiprop ' : " timestamp|url|user|userid "
}
if args . oldest :
qargs [ ' aiend ' ] = args . oldest
if last_date :
print ( " Using aiend {0} " . format ( last_date ) , file = sys . stderr )
qargs [ ' aiend ' ] = last_date
count = 0 # used to satisfy --limit when given
skipped_users = set ( ) # nicety for outputting names only once when skipped
items_to_output = [ ]
# LOOP for continuing queries as needed
while True :
qq = wiki . api ( ' query ' , * * qargs )
# print ("Got {0} results".format(len(qq['query']['allimages'])), file=sys.stderr)
results = qq [ ' query ' ] [ ' allimages ' ]
for r in results :
# Filter on user
if filter_by_users != None :
if r [ ' user ' ] not in filter_by_users :
if r [ ' user ' ] not in skipped_users :
print ( " Skipping user {0} " . format ( r [ ' user ' ] ) , file = sys . stderr )
skipped_users . add ( r [ ' user ' ] )
continue
try :
# Filter on mime type (image/*)
filepage = wiki . pages . get ( r [ ' title ' ] )
# mwclient's imageinfo doesn't have mime (or thumbnail info)
# imageinfo = filepage.imageinfo
imageinfo = imageinfo_with_thumbnail ( wiki , r [ ' title ' ] )
if not imageinfo [ ' mime ' ] . startswith ( " image/ " ) :
print ( " Skipping non image ( {0} ) {1} " . format ( imageinfo [ ' mime ' ] , r [ ' title ' ] ) )
continue
# Deal with edge case at items == aiend are returned
if last_date and r [ ' timestamp ' ] == last_date :
print ( " SKIPPING AIEND item " , file = sys . stderr )
break
# Construct an item for output
print ( " [ {0} ], date: {1} " . format ( filepage . page_title , r [ ' timestamp ' ] ) , file = sys . stderr )
usagepage = None
for usagepage in filepage . imageusage ( ) :
break # just grab the first usage page
# url : local path to file
imageurl = imageinfo [ ' url ' ]
localpath = imageurl . replace ( " https://pzwiki.wdka.nl/mw-mediadesign/images/ " , " wiki/ " )
# wget image from wiki to local folder
if not os . path . exists ( localpath ) :
try :
os . makedirs ( os . path . split ( localpath ) [ 0 ] )
except OSError :
pass
print ( " downloading {0} to {1} " . format ( imageurl , localpath ) , file = sys . stderr )
wget ( imageurl , localpath )
item = { }
item [ ' url ' ] = localpath
item [ ' date ' ] = r [ ' timestamp ' ]
userpage = wiki . pages . get ( ' User: ' + r [ ' user ' ] )
if usagepage :
item [ ' text ' ] = ' <a href= " {0} " > {1} </a><br>Uploaded by <a href= " {2} " > {3} </a> ' . format (
page_url ( wiki , usagepage ) ,
usagepage . page_title ,
page_url ( wiki , userpage ) ,
r [ ' user ' ] )
else :
item [ ' text ' ] = ' <a href= " {0} " > {1} </a><br>Uploaded by <a href= " {2} " > {3} </a> ' . format (
page_url ( wiki , filepage ) ,
filepage . page_title ,
page_url ( wiki , userpage ) ,
r [ ' user ' ] )
# print (json.dumps(item))
items_to_output . append ( item )
# honor --limit
count + = 1
if args . limit and count == args . limit :
break
except APIError as e :
print ( " Error {0} , skipping " . format ( e ) )
if args . limit and count == args . limit :
break
# continue the query if possible (pre-loop)...
if ' continue ' in qq :
qargs [ ' aicontinue ' ] = qq [ ' continue ' ] [ ' aicontinue ' ]
else :
# we've reached the end of the query data
break
# OUTPUT RESULTS
# reverse to be chronological
items_to_output . reverse ( )
if args . json :
with open ( args . json , " a " ) as f :
for x in items_to_output :
print ( json . dumps ( x ) , file = f )
else :
for x in items_to_output :
print ( json . dumps ( x ) )
if __name__ == " __main__ " :
@ -312,6 +488,13 @@ if __name__ == "__main__":
ap_article . add_argument ( " --html " , default = False , action = " store_true " )
ap_article . set_defaults ( func = make_category )
ap_recentfiles = subparsers . add_parser ( ' recentfiles ' , help = ' Incrementally update a json file with information about recent files ' )
ap_recentfiles . add_argument ( " --usercategory " , help = " limit to activity by users that are members of this category " )
ap_recentfiles . add_argument ( " --limit " , type = int , help = " limit " )
ap_recentfiles . add_argument ( " --oldest " , default = None , help = " No results earlier than this timestamp (e.g. 2018-01-01T00:00:00Z) " )
ap_recentfiles . add_argument ( " --json " , default = None , help = " Use this json file as both input (to check last timestampt) and output -- append results chronologically as json-stream. " )
ap_recentfiles . set_defaults ( func = recentfiles )
args = ap . parse_args ( )