You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
2.5 KiB
Python

# import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
import datetime
from pprint import pprint
import requests
import multiprocessing
from mastodon import Mastodon
with open('token.txt','r') as token:
5 years ago
print(token.read())
mastodon = Mastodon(access_token=token.read(),api_base_url="https://todon.nl")
peers = mastodon.instance_peers()
today = datetime.date.today()
text_file = open("scrape/results.txt", "a+")
text_file.write("Data collected on : "+str(today)+"\n"+"\n")
for n, peer in enumerate(peers):
if n < 200:
time.sleep(0.5)
# get the url from the terminal
# url ("Enter instance.social url (include https:// ): ")
url = "https://"+(str(peer))
print(peer)
# Tell Selenium to open a new Firefox session
# and specify the path to the driver
driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver')
# Implicit wait tells Selenium how long it should wait before it throws an exception
driver.implicitly_wait(5)
driver.get(url)
time.sleep(3)
5 years ago
print ('Instance: ', "\n", peer)
text_file.write("Instance:"+"\n"+(peer)+"\n")
try:
5 years ago
about = driver.find_element_by_css_selector('.landing-page__short-description')
print ('About:')
print(about.text)
text_file.write("About:"+"\n"+about.text+"\n"+"\n")
time.sleep(1)
try:
# get the image source
5 years ago
img = driver.find_element_by_xpath('/html/body/div[1]/div/div/div[3]/div[1]/img')
src = img.get_attribute('src')
# download the image
Picture_request = requests.get(src)
if Picture_request.status_code == 200:
with open("scrape/{}.jpg".format(peer), 'wb') as f:
f.write(Picture_request.content)
print("Printed Image")
except:
print("Impossible to print image")
5 years ago
text_file.write("Impossible to print image"+"\n"+"\n")
time.sleep(0.5)
except:
text_file.write("Impossible to check instance"+"\n"+"\n")
5 years ago
print("Status:"+"\n"+"Impossible to check instance")
time.sleep(1)
# close new tab
driver.close()
print("Closing Window")
text_file.close()
# close the browser
driver.close()