You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
2.5 KiB
Python

# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
import datetime
from pprint import pprint
import requests
import multiprocessing
from mastodon import Mastodon
with open('token.txt','r') as token:
print(token.read())
mastodon = Mastodon(access_token=token.read(),api_base_url="https://todon.nl")
peers = mastodon.instance_peers()
today = datetime.date.today()
text_file = open("AboutMore/results.txt", "a+")
text_file.write("Data collected on : "+str(today)+"\n"+"\n")
for n, peer in enumerate(peers):
if n < 200:
time.sleep(0.5)
# get the url from the terminal
# url ("Enter instance.social url (include https:// ): ")
url = "https://"+(str(peer)+"/about/more")
print(peer)
# Tell Selenium to open a new Firefox session
# and specify the path to the driver
driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver')
# Implicit wait tells Selenium how long it should wait before it throws an exception
driver.implicitly_wait(5)
driver.get(url)
time.sleep(3)
print ('Instance: ', "\n", peer)
text_file.write("Instance:"+"\n"+(peer)+"\n")
try:
about_more = driver.find_element_by_css_selector('.rich-formatting')
print ('About more:')
print(about_more.text)
text_file.write("About more:"+"\n"+about_more.text+"\n"+"\n")
time.sleep(1)
try:
# get the image source
img = driver.find_element_by_xpath('/html/body/div/div[2]/div/div[1]/div/div/img')
src = img.get_attribute('src')
# download the image
Picture_request = requests.get(src)
if Picture_request.status_code == 200:
with open("AboutMore/{}.jpg".format(peer), 'wb') as f:
f.write(Picture_request.content)
print("Printed Image")
except:
print("Impossible to print image")
text_file.write("Impossible to print image"+"\n"+"\n")
time.sleep(0.5)
except:
text_file.write("No about more"+"\n"+"\n")
print("Status:"+"\n"+"No about more")
time.sleep(1)
# close new tab
driver.close()
print("Closing Window")
text_file.close()
# close the browser
driver.close()