# import libraries from selenium import webdriver from selenium.webdriver.common.keys import Keys import os import time import datetime from pprint import pprint import requests import multiprocessing today = datetime.date.today() text_file = open("results.txt", "a+") text_file.write("Data collected on : "+str(today)+"\n"+"\n") # get the url from the terminal # url = input("Enter instance.social url (include https:// ): ") url = "https://instances.social/list#lang=en&allowed=nudity_nocw,nudity_all,pornography_nocw,pornography_all,illegalContentLinks&prohibited=spam,advertising,spoilers_nocw&users=" # Tell Selenium to open a new Firefox session # and specify the path to the driver driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver') # Implicit wait tells Selenium how long it should wait before it throws an exception driver.implicitly_wait(10) driver.get(url) time.sleep(3) d = 1 e = [52,102,152,202,252,302,352,402] f = 0 i = 0 while True: try: driver.find_element_by_css_selector('a.list-group-item:nth-child(%d)'%d).click() instance_url = driver.find_element_by_css_selector('#modalInstanceInfoLabel') description = driver.find_element_by_id('modalInstanceInfo-description') print ('Instance:') print(instance_url.text) text_file.write("Instance: "+"\n"+instance_url.text+"\n") print ('Description:') print(description.text) text_file.write("Description: "+"\n"+description.text+"\n"+"\n") time.sleep(0.5) # open instance in new tab driver.find_element_by_css_selector('#modalInstanceInfo-btn-go').send_keys(Keys.COMMAND + Keys.ENTER) time.sleep(0.5) #go to new tab driver.switch_to.window(driver.window_handles[-1]) time.sleep(1) try: # get the image source img = driver.find_element_by_css_selector('.landing-page__hero > img:nth-child(1)') src = img.get_attribute('src') # download the image Picture_request = requests.get(src) if Picture_request.status_code == 200: with open("image%i.jpg"%i, 'wb') as f: f.write(Picture_request.content) print("Printed Image") except: print("Impossible to print image") time.sleep(0.5) # close new tab driver.close() print("Closing Window") #back to original tab driver.switch_to.window(driver.window_handles[0]) # closes pop up driver.find_element_by_css_selector('.btn.btn-secondary').click() time.sleep(1) d+=1 i+=1 except: print("This is an exception") driver.find_element_by_css_selector('#load-more-instances > a:nth-child(1)').click() d = (e[f]) f+=1 pass text_file.close() # close the browser driver.close()