|
|
@ -4,11 +4,16 @@ from selenium.webdriver.common.keys import Keys
|
|
|
|
import os
|
|
|
|
import os
|
|
|
|
import time
|
|
|
|
import time
|
|
|
|
import datetime
|
|
|
|
import datetime
|
|
|
|
|
|
|
|
from pprint import pprint
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
today = datetime.date.today()
|
|
|
|
today = datetime.date.today()
|
|
|
|
|
|
|
|
text_file = open("results.txt", "a+")
|
|
|
|
|
|
|
|
text_file.write("Data collected on : "+str(today)+"\n"+"\n")
|
|
|
|
|
|
|
|
|
|
|
|
# get the url from the terminal
|
|
|
|
# get the url from the terminal
|
|
|
|
url = input("Enter instance.social url (include https:// etc.): ")
|
|
|
|
# url = input("Enter instance.social url (include https:// ): ")
|
|
|
|
|
|
|
|
url = "https://instances.social/list#lang=en&allowed=nudity_nocw,nudity_all,spam&prohibited=&users="
|
|
|
|
|
|
|
|
|
|
|
|
# Tell Selenium to open a new Firefox session
|
|
|
|
# Tell Selenium to open a new Firefox session
|
|
|
|
# and specify the path to the driver
|
|
|
|
# and specify the path to the driver
|
|
|
@ -20,27 +25,79 @@ driver.get(url)
|
|
|
|
time.sleep(3)
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
|
|
|
d = 1
|
|
|
|
d = 1
|
|
|
|
|
|
|
|
i = 0
|
|
|
|
|
|
|
|
p = -1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#NOT WORKING
|
|
|
|
|
|
|
|
#results = []
|
|
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
while True:
|
|
|
|
|
|
|
|
|
|
|
|
driver.find_element_by_css_selector('a.list-group-item:nth-child(%d)'%d).click()
|
|
|
|
driver.find_element_by_css_selector('a.list-group-item:nth-child(%d)'%d).click()
|
|
|
|
instance_url = driver.find_element_by_xpath('//h5')
|
|
|
|
instance_url = driver.find_element_by_xpath('//h5')
|
|
|
|
description = driver.find_element_by_id('modalInstanceInfo-description')
|
|
|
|
description = driver.find_element_by_id('modalInstanceInfo-description')
|
|
|
|
|
|
|
|
|
|
|
|
print ('Instance:')
|
|
|
|
print ('Instance:')
|
|
|
|
print(instance_url.text)
|
|
|
|
print(instance_url.text)
|
|
|
|
|
|
|
|
# NOT WORKING
|
|
|
|
|
|
|
|
#append instance url to a list
|
|
|
|
|
|
|
|
#results.append("Instance: "+"\n"+instance_url.text+"\n")
|
|
|
|
|
|
|
|
|
|
|
|
print ('Description:')
|
|
|
|
print ('Description:')
|
|
|
|
print(description.text)
|
|
|
|
print(description.text)
|
|
|
|
time.sleep(0.5)
|
|
|
|
#NOT WORKING
|
|
|
|
|
|
|
|
#append instance description to a list
|
|
|
|
|
|
|
|
#results.append("Description: "+"\n"+description.text+"\n"+"\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# open instance in new tab
|
|
|
|
|
|
|
|
driver.find_element_by_css_selector('#modalInstanceInfo-btn-go').send_keys(Keys.COMMAND + Keys.ENTER)
|
|
|
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#go to new tab
|
|
|
|
|
|
|
|
print([p])
|
|
|
|
|
|
|
|
driver.switch_to.window(driver.window_handles[p])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
p = p-1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
time.sleep(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
# get the image source
|
|
|
|
|
|
|
|
img = driver.find_element_by_css_selector('.landing-page__hero > img:nth-child(1)')
|
|
|
|
|
|
|
|
src = img.get_attribute('src')
|
|
|
|
|
|
|
|
# download the image
|
|
|
|
|
|
|
|
Picture_request = requests.get(src)
|
|
|
|
|
|
|
|
if Picture_request.status_code == 200:
|
|
|
|
|
|
|
|
with open("image%i.jpg"%i, 'wb') as f:
|
|
|
|
|
|
|
|
f.write(Picture_request.content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
#back to original tab
|
|
|
|
|
|
|
|
driver.switch_to.window(driver.window_handles[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# # NOT WORKING
|
|
|
|
|
|
|
|
# # close new tab
|
|
|
|
|
|
|
|
# driver.find_element_by_xpath('/html/body').send_keys(Keys.COMMAND + 'w')
|
|
|
|
|
|
|
|
# print("Closing Window")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#back to original tab
|
|
|
|
|
|
|
|
driver.switch_to.window(driver.window_handles[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# closes pop up
|
|
|
|
# closes pop up
|
|
|
|
driver.find_element_by_css_selector('.btn.btn-secondary').click()
|
|
|
|
driver.find_element_by_css_selector('.btn.btn-secondary').click()
|
|
|
|
time.sleep(0.5)
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
|
|
# scroll to just under the video in order to load the comments
|
|
|
|
|
|
|
|
driver.execute_script('window.scrollTo(1, 40);')
|
|
|
|
|
|
|
|
time.sleep(0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
d+=1
|
|
|
|
d+=1
|
|
|
|
|
|
|
|
i+=1
|
|
|
|
|
|
|
|
text_file.write(str(results))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#pprint(results)
|
|
|
|
|
|
|
|
#text_file.write(str(results))
|
|
|
|
|
|
|
|
text_file.close()
|
|
|
|
# close the browser
|
|
|
|
# close the browser
|
|
|
|
driver.close()
|
|
|
|
driver.close()
|
|
|
|