You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Jsort_scrape/selenium_jstor_terminal_inp...

58 lines
1.6 KiB
Python

# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
import datetime
from pprint import pprint
import requests
import multiprocessing
import base64
i = 1
while True:
try:
# get the url from the terminal
url = (input("Enter instance.social url (include 'https://' AND exlude 'seq=%i#metadata_info_tab_contents'") + ("seq=%i#metadata_info_tab_contents"%i))
name = (input("How do you want to call you files? "))
# Tell Selenium to open a new Firefox session
# and specify the path to the driver
driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver')
# Implicit wait tells Selenium how long it should wait before it throws an exception
driver.implicitly_wait(10)
driver.get(url)
time.sleep(1)
# get the image bay64 code
img = driver.find_element_by_css_selector('#page-scan-container.page-scan-container')
src = img.get_attribute('src')
# check if source is correct
# pprint(src)
# strip type from Javascript to base64 string only
base64String = src.split(',').pop();
pprint(base64String)
# decode base64 string
imgdata = base64.b64decode(base64String)
# save the image
filename = (name + '%i.gif'%i)
with open(filename, 'wb') as f:
f.write(imgdata)
driver.close()
i+=1
print("DONE! Closing Window")
except:
print("Impossible to print image")
driver.close()
break
time.sleep(0.2)