You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
2.9 KiB
Python

# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
import datetime
from pprint import pprint
import requests
import multiprocessing
today = datetime.date.today()
text_file = open("results.txt", "a+")
text_file.write("Data collected on : "+str(today)+"\n"+"\n")
# get the url from the terminal
# url = input("Enter instance.social url (include https:// ): ")
url = "https://instances.social/list#lang=en&allowed=nudity_nocw,nudity_all,pornography_nocw,pornography_all,illegalContentLinks&prohibited=spam,advertising,spoilers_nocw&users="
# Tell Selenium to open a new Firefox session
# and specify the path to the driver
driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver')
# Implicit wait tells Selenium how long it should wait before it throws an exception
driver.implicitly_wait(10)
driver.get(url)
time.sleep(3)
d = 1
e = [52,102,152,202,252,302,352,402]
f = 0
i = 0
while True:
try:
driver.find_element_by_css_selector('a.list-group-item:nth-child(%d)'%d).click()
instance_url = driver.find_element_by_css_selector('#modalInstanceInfoLabel')
description = driver.find_element_by_id('modalInstanceInfo-description')
print ('Instance:')
print(instance_url.text)
text_file.write("Instance: "+"\n"+instance_url.text+"\n")
print ('Description:')
print(description.text)
text_file.write("Description: "+"\n"+description.text+"\n"+"\n")
time.sleep(0.5)
# open instance in new tab
driver.find_element_by_css_selector('#modalInstanceInfo-btn-go').send_keys(Keys.COMMAND + Keys.ENTER)
time.sleep(0.5)
#go to new tab
driver.switch_to.window(driver.window_handles[-1])
time.sleep(1)
try:
# get the image source
img = driver.find_element_by_css_selector('.landing-page__hero > img:nth-child(1)')
src = img.get_attribute('src')
# download the image
Picture_request = requests.get(src)
if Picture_request.status_code == 200:
with open("image%i.jpg"%i, 'wb') as f:
f.write(Picture_request.content)
print("Printed Image")
except:
print("Impossible to print image")
time.sleep(0.5)
# close new tab
driver.close()
print("Closing Window")
#back to original tab
driver.switch_to.window(driver.window_handles[0])
# closes pop up
driver.find_element_by_css_selector('.btn.btn-secondary').click()
time.sleep(1)
d+=1
i+=1
except:
print("This is an exception")
driver.find_element_by_css_selector('#load-more-instances > a:nth-child(1)').click()
d = (e[f])
f+=1
pass
text_file.close()
# close the browser
driver.close()