new patches

master
pnofrc 4 years ago
parent 7273f06894
commit 0aebb6b53d

@ -0,0 +1,145 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Dynamically loaded content downloader"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Selenium + BeautifulSoup "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://dvenkatsagar.github.io/tutorials/python/2015/10/26/ddlv/"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# The standard library modules\n",
"import os\n",
"import sys\n",
"\n",
"# The wget module\n",
"import wget\n",
"\n",
"# The BeautifulSoup module\n",
"from bs4 import BeautifulSoup\n",
"\n",
"# The selenium module\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"from selenium.webdriver.common.by import By"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"driver = webdriver.Firefox(\"/Users/pnofrc/\")\n",
"#driver = webdriver.Chrome(\"/Users/pnofrc/\")\n",
"driver.get(\"https://mubi.com/it/films/music-and-apocalypse/watch\") # load the web page"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# for websites that need you to login to access the information\n",
"elem = driver.find_element_by_id(\"email\") # Find the email input field of the login form\n",
"elem.send_keys(\"user@example.com\") # Send the users email\n",
"elem = driver.find_element_by_id(\"pwd\") # Find the password field of the login form\n",
"elem.send_keys(\"userpwd\") # send the users password\n",
"elem.send_keys(Keys.RETURN) # press the enter key"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"driver.get(\"http://www.example.com/path/of/video/page.html\") # load the page that has the video"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"WebDriverWait(driver, 50).until(EC.visibility_of_element_located((By.ID, \"the-element-id\"))) # waits till the element with the specific id appears\n",
"src = driver.page_source # gets the html source of the page"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"parser = BeautifulSoup(src,\"lxml\") # initialize the parser and parse the source \"src\"\n",
"list_of_attributes = {\"class\" : \"some-class\", \"name\" : \"some-name\"} # A list of attributes that you want to check in a tag\n",
"tag = parser.findAll('video',attrs=list_of_attributes) # Get the video tag from the source"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"n = 0 # Specify the index of video element in the web page\n",
"url = tag[n]['src'] # get the src attribute of the video\n",
"wget.download(url,out=\"path/to/output/file\") # download the video"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"driver.close() # closes the driver"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

@ -0,0 +1,112 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import random"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"cereale = ['riso','miglio','fonio','cous cous']\n",
"rc = "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"carne = ['pesce','scimmia','agnello','mucca','pecora']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"contorno = [' e patatine.','.',' e insalata.']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"r"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'patatine'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"menu = f\"Il menù di oggi:\\nPiatto di {cereale[r]} e {carne[r]} con cipolle{contorno[r]}\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading…
Cancel
Save