refined gallery regex

master
Castro0o
parent 4e5dd55718
commit 4e5303110c

@ -33,9 +33,15 @@ def api_page(pageid, query):
response = api_request('action=query&titles=File:{}&prop=imageinfo&iiprop=url&iiurlwidth=500', pagename) # iiurlwidht dermines with of thumbnail
return response
def api_img_url(filename): # get full-size's image url
'''get url of image'''
page_content_dict = api_page(filename, 'fullimage')
if 'imageinfo' in page_content_dict.keys():
thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))
return thumburl
def api_thumb_url(filename):
'''get thumbnail url of image'''
'''get thumbnail url of image'''
page_content_dict = api_page(filename, 'imageinfo')
if 'imageinfo' in page_content_dict.keys():
thumburl = ((page_content_dict.get('imageinfo'))[0].get('thumburl'))

@ -15,7 +15,8 @@ pages_path = 'web/work'
#def generate_xml():
gallery_exp=re.compile('<gallery>.*?</gallery>')
file_exp=re.compile('File:(.*?)(?=File:|<\/gallery>)')
img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I)
#img_exp=re.compile('(^.*?\.(gif|jpg|jpeg|png))', re.I)
img_exp=re.compile('File:((.*?)\.(gif|jpg|jpeg|png))(?=\||File:|<\/gallery>)', re.I)
def replace_gallery(content):
# from <gallery>.*</gallery> imgs, return list of img ET elements
@ -26,17 +27,16 @@ def replace_gallery(content):
print 'gallery_found', gallery_found
for gallery in gallery_found: # in case there is more than 1 <gallery>
print 'GALLERY', gallery
allfiles =re.findall(file_exp, gallery)
allfiles =re.findall(img_exp, gallery)
print 'ALLFILES', allfiles
for imgfile in allfiles:
img = ((re.search(img_exp, imgfile)).group(0))#.decode('utf-8')
imgsrc = api_thumb_url(img)
imgfile = imgfile[0]
imgsrc = api_thumb_url(imgfile) # search for original image
print imgfile, imgsrc
img_el = ET.Element('img', attrib={'src': imgsrc})
gallery_imgs.append(img_el)
print 'gallery img', img, ET.tostring(img_el)
# print 'gallery img', imgfile, ET.tostring(img_el)
return content, gallery_imgs
# need to return a list of images that is the gallery
# need to replace <gallery>.*</gallery> with ''
def create_workpage( allworks_dict, work_key): # replace text content in dict with html nodes, holding the content
for key in allworks_dict.keys():

Loading…
Cancel
Save