# import libraries from selenium import webdriver from selenium.webdriver.common.keys import Keys import os import time import datetime from pprint import pprint import requests import multiprocessing import base64 i = 1 name = (input("How do you want to call you files? ")) while True: try: # URL iterates through the sequence number url = ("https://www.jstor.org/stable/23267102?Search=yes&resultItemClick=true&searchText=tea&searchUri=%2Faction%2FdoBasicSearch%3FQuery%3Dtea%26amp%3Bacc%3Don%26amp%3Bfc%3Doff%26amp%3Bwc%3Don%26amp%3Bgroup%3Dnone&ab_segments=0%2Fl2b-basic-1%2Frelevance_config_with_tbsub_l2b&refreqid=search%3A9fd0deff8d3258de87d3b54d6dfad664&" + "seq=%i#metadata_info_tab_contents"%i) # Tell Selenium to open a new Firefox session # and specify the path to the driver driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver') # Implicit wait tells Selenium how long it should wait before it throws an exception driver.implicitly_wait(10) driver.get(url) time.sleep(3) # get the image bay64 code img = driver.find_element_by_css_selector('#page-scan-container.page-scan-container') src = img.get_attribute('src') # check if source is correct # pprint(src) # strip type from Javascript to base64 string only base64String = src.split(',').pop(); pprint(base64String) # decode base64 string imgdata = base64.b64decode(base64String) # save the image filename = (name + '%i.gif'%i) with open(filename, 'wb') as f: f.write(imgdata) driver.close() i+=1 print("DONE! Closing Window") except: print("Impossible to print image") driver.close() break time.sleep(1)