requests what name you want to the files

master
Pedro Sá Couto 5 years ago
parent e63d61d584
commit 7c138f6612

BIN
.DS_Store vendored

Binary file not shown.

@ -4331,3 +4331,24 @@ console.error: BroadcastService:
1558945422969 Marionette INFO Listening on port 50122
1558945422985 Marionette WARN TLS certificate errors will be ignored for this session
1558945439310 Marionette INFO Stopped listening on port 50122
1558946707664 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "-marionette" "-foreground" "-no-remote" "-profile" "/var/folders/bc/txjl5mmd7cb1ngc8vvs687sc0000gn/T/rust_mozprofile.ery0MVS6ILyy"
1558946708065 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: mozillaAddons
1558946708065 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: resource://pdf.js/
1558946708066 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: about:reader*
1558946709426 Marionette INFO Listening on port 50419
1558946709507 Marionette WARN TLS certificate errors will be ignored for this session
1558946766652 Marionette INFO Stopped listening on port 50419
1558956988513 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "-marionette" "-foreground" "-no-remote" "-profile" "/var/folders/bc/txjl5mmd7cb1ngc8vvs687sc0000gn/T/rust_mozprofile.olBMzOkfktV9"
1558956989017 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: mozillaAddons
1558956989017 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: resource://pdf.js/
1558956989017 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: about:reader*
1558956990377 Marionette INFO Listening on port 54084
1558956990448 Marionette WARN TLS certificate errors will be ignored for this session
1558956997384 Marionette INFO Stopped listening on port 54084
1558957316716 mozrunner::runner INFO Running command: "/Applications/Firefox.app/Contents/MacOS/firefox-bin" "-marionette" "-foreground" "-no-remote" "-profile" "/var/folders/bc/txjl5mmd7cb1ngc8vvs687sc0000gn/T/rust_mozprofile.uK9ffHirXJAk"
1558957317129 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: mozillaAddons
1558957317130 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: resource://pdf.js/
1558957317130 addons.webextension.screenshots@mozilla.org WARN Loading extension 'screenshots@mozilla.org': Reading manifest: Invalid extension permission: about:reader*
1558957318490 Marionette INFO Listening on port 54200
1558957318545 Marionette WARN TLS certificate errors will be ignored for this session
1558957323861 Marionette INFO Stopped listening on port 54200

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 21 KiB

@ -13,9 +13,6 @@ i = 1
while True:
try:
# get the url from the terminal
# url = input("Enter instance.social url (include https:// + exlude "seq=%i#metadata_info_tab_contents"): ")
# URL iterates through the sequence number
url = ("https://www.jstor.org/stable/23267102?Search=yes&resultItemClick=true&searchText=tea&searchUri=%2Faction%2FdoBasicSearch%3FQuery%3Dtea%26amp%3Bacc%3Don%26amp%3Bfc%3Doff%26amp%3Bwc%3Don%26amp%3Bgroup%3Dnone&ab_segments=0%2Fl2b-basic-1%2Frelevance_config_with_tbsub_l2b&refreqid=search%3A9fd0deff8d3258de87d3b54d6dfad664&" + "seq=%i#metadata_info_tab_contents"%i)

@ -0,0 +1,57 @@
# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
import datetime
from pprint import pprint
import requests
import multiprocessing
import base64
i = 1
while True:
try:
# get the url from the terminal
url = (input("Enter instance.social url (include 'https://' AND exlude 'seq=%i#metadata_info_tab_contents'") + ("seq=%i#metadata_info_tab_contents"%i))
name = (input("How do you want to call you files? "))
# Tell Selenium to open a new Firefox session
# and specify the path to the driver
driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver')
# Implicit wait tells Selenium how long it should wait before it throws an exception
driver.implicitly_wait(10)
driver.get(url)
time.sleep(1)
# get the image bay64 code
img = driver.find_element_by_css_selector('#page-scan-container.page-scan-container')
src = img.get_attribute('src')
# check if source is correct
# pprint(src)
# strip type from Javascript to base64 string only
base64String = src.split(',').pop();
pprint(base64String)
# decode base64 string
imgdata = base64.b64decode(base64String)
# save the image
filename = (name + '%i.gif'%i)
with open(filename, 'wb') as f:
f.write(imgdata)
driver.close()
i+=1
print("DONE! Closing Window")
except:
print("Impossible to print image")
driver.close()
break
time.sleep(0.2)
Loading…
Cancel
Save