diff --git a/DynamicallyLoadedContentDownloader.ipynb b/DynamicallyLoadedContentDownloader.ipynb new file mode 100644 index 0000000..e78d2d7 --- /dev/null +++ b/DynamicallyLoadedContentDownloader.ipynb @@ -0,0 +1,145 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Dynamically loaded content downloader" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Selenium + BeautifulSoup " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://dvenkatsagar.github.io/tutorials/python/2015/10/26/ddlv/" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# The standard library modules\n", + "import os\n", + "import sys\n", + "\n", + "# The wget module\n", + "import wget\n", + "\n", + "# The BeautifulSoup module\n", + "from bs4 import BeautifulSoup\n", + "\n", + "# The selenium module\n", + "from selenium import webdriver\n", + "from selenium.webdriver.common.keys import Keys\n", + "from selenium.webdriver.support.ui import WebDriverWait\n", + "from selenium.webdriver.support import expected_conditions as EC\n", + "from selenium.webdriver.common.by import By" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "driver = webdriver.Firefox(\"/Users/pnofrc/\")\n", + "#driver = webdriver.Chrome(\"/Users/pnofrc/\")\n", + "driver.get(\"https://mubi.com/it/films/music-and-apocalypse/watch\") # load the web page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# for websites that need you to login to access the information\n", + "elem = driver.find_element_by_id(\"email\") # Find the email input field of the login form\n", + "elem.send_keys(\"user@example.com\") # Send the users email\n", + "elem = driver.find_element_by_id(\"pwd\") # Find the password field of the login form\n", + "elem.send_keys(\"userpwd\") # send the users password\n", + "elem.send_keys(Keys.RETURN) # press the enter key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "driver.get(\"http://www.example.com/path/of/video/page.html\") # load the page that has the video" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "WebDriverWait(driver, 50).until(EC.visibility_of_element_located((By.ID, \"the-element-id\"))) # waits till the element with the specific id appears\n", + "src = driver.page_source # gets the html source of the page" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "parser = BeautifulSoup(src,\"lxml\") # initialize the parser and parse the source \"src\"\n", + "list_of_attributes = {\"class\" : \"some-class\", \"name\" : \"some-name\"} # A list of attributes that you want to check in a tag\n", + "tag = parser.findAll('video',attrs=list_of_attributes) # Get the video tag from the source" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = 0 # Specify the index of video element in the web page\n", + "url = tag[n]['src'] # get the src attribute of the video\n", + "wget.download(url,out=\"path/to/output/file\") # download the video" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "driver.close() # closes the driver" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/cucinasenegalese.ipynb b/cucinasenegalese.ipynb new file mode 100644 index 0000000..d3f714c --- /dev/null +++ b/cucinasenegalese.ipynb @@ -0,0 +1,112 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "cereale = ['riso','miglio','fonio','cous cous']\n", + "rc = " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "carne = ['pesce','scimmia','agnello','mucca','pecora']" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "contorno = [' e patatine.','.',' e insalata.']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "r" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'patatine'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "menu = f\"Il menĂ¹ di oggi:\\nPiatto di {cereale[r]} e {carne[r]} con cipolle{contorno[r]}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}