You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

58 lines
1.8 KiB
Python

# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
import datetime
from pprint import pprint
import requests
import multiprocessing
import base64
i = 1
name = (input("How do you want to call you files? "))
while True:
try:
# URL iterates through the sequence number
url = ("https://www.jstor.org/stable/23267102?Search=yes&resultItemClick=true&searchText=tea&searchUri=%2Faction%2FdoBasicSearch%3FQuery%3Dtea%26amp%3Bacc%3Don%26amp%3Bfc%3Doff%26amp%3Bwc%3Don%26amp%3Bgroup%3Dnone&ab_segments=0%2Fl2b-basic-1%2Frelevance_config_with_tbsub_l2b&refreqid=search%3A9fd0deff8d3258de87d3b54d6dfad664&" + "seq=%i#metadata_info_tab_contents"%i)
# Tell Selenium to open a new Firefox session
# and specify the path to the driver
driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver')
# Implicit wait tells Selenium how long it should wait before it throws an exception
driver.implicitly_wait(10)
driver.get(url)
time.sleep(3)
# get the image bay64 code
img = driver.find_element_by_css_selector('#page-scan-container.page-scan-container')
src = img.get_attribute('src')
# check if source is correct
# pprint(src)
# strip type from Javascript to base64 string only
base64String = src.split(',').pop();
pprint(base64String)
# decode base64 string
imgdata = base64.b64decode(base64String)
# save the image
filename = (name + '%i.gif'%i)
with open(filename, 'wb') as f:
f.write(imgdata)
driver.close()
i+=1
print("DONE! Closing Window")
except:
print("Impossible to print image")
driver.close()
break
time.sleep(1)