# import libraries from selenium import webdriver from selenium.webdriver.common.keys import Keys import os import time import datetime from pprint import pprint import requests import multiprocessing from mastodon import Mastodon with open('token.txt','r') as token: print(token.read()) mastodon = Mastodon(access_token=token.read(),api_base_url="https://todon.nl") peers = mastodon.instance_peers() today = datetime.date.today() text_file = open("scrape/results.txt", "a+") text_file.write("Data collected on : "+str(today)+"\n"+"\n") for n, peer in enumerate(peers): if n < 200: time.sleep(0.5) # get the url from the terminal # url ("Enter instance.social url (include https:// ): ") url = "https://"+(str(peer)) print(peer) # Tell Selenium to open a new Firefox session # and specify the path to the driver driver = webdriver.Firefox(executable_path=os.path.dirname(os.path.realpath(__file__)) + '/geckodriver') # Implicit wait tells Selenium how long it should wait before it throws an exception driver.implicitly_wait(5) driver.get(url) time.sleep(3) print ('Instance: ', "\n", peer) text_file.write("Instance:"+"\n"+(peer)+"\n") try: about = driver.find_element_by_css_selector('.landing-page__short-description') print ('About:') print(about.text) text_file.write("About:"+"\n"+about.text+"\n"+"\n") time.sleep(1) try: # get the image source img = driver.find_element_by_xpath('/html/body/div[1]/div/div/div[3]/div[1]/img') src = img.get_attribute('src') # download the image Picture_request = requests.get(src) if Picture_request.status_code == 200: with open("scrape/{}.jpg".format(peer), 'wb') as f: f.write(Picture_request.content) print("Printed Image") except: print("Impossible to print image") text_file.write("Impossible to print image"+"\n"+"\n") time.sleep(0.5) except: text_file.write("Impossible to check instance"+"\n"+"\n") print("Status:"+"\n"+"Impossible to check instance") time.sleep(1) # close new tab driver.close() print("Closing Window") text_file.close() # close the browser driver.close()