You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
56 lines
1.8 KiB
Python
56 lines
1.8 KiB
Python
import logging
|
|
logging.getLogger('flask_assistant').setLevel(logging.DEBUG)
|
|
|
|
from flask import Flask
|
|
from flask_assistant import Assistant, ask, tell, event, build_item
|
|
|
|
# This script scrapes text on a page that contains a keyword
|
|
# Based on a tutorial of Michael Murtaugh at Piet Zwart Institute
|
|
# https://pad.xpub.nl/p/prototyping_02102018
|
|
import sys
|
|
import html5lib
|
|
import xml.etree.ElementTree as ET
|
|
from urllib.request import urlopen
|
|
|
|
app = Flask(__name__)
|
|
assist = Assistant(app, route='/')
|
|
|
|
|
|
@assist.action('greeting')
|
|
def greet_and_start():
|
|
speech = "Hey! What should I check for you at NYTimes.com?"
|
|
return ask(speech)
|
|
|
|
@assist.action('give-keyword')
|
|
def read_headlines(keyword):
|
|
# Open the page
|
|
url = 'https://nytimes.com'
|
|
with urlopen(url) as f:
|
|
t = html5lib.parse(f, namespaceHTMLElements=False)
|
|
|
|
# Make lists to put the headings in
|
|
text_list = []
|
|
|
|
|
|
# Open the xml tree map of the webpage. Go through all text containing
|
|
# the word 'trump'. To ignore javascript having that word, ignore when
|
|
# the html tag of the text is script.
|
|
# Append each found text to the list with headings.
|
|
|
|
for x in t.iter():
|
|
if x.text != None and keyword in x.text.lower() and x.tag != 'script':
|
|
text_list.append(x.text)
|
|
|
|
# Done? Great!
|
|
# Change the list into a collection of strings
|
|
# Open a txt file and put them there
|
|
# In case the file already exists, then just paste it at the bottom
|
|
# Super handy if you want to run the script for multiple sites and collect text
|
|
# in just one file
|
|
text_list_strings = "\n".join(text_list)
|
|
speech = 'Ok, I found the following news about ' + keyword + '. It is juicy!'
|
|
|
|
return ask('{} {}'.format(speech, text_list_strings))
|
|
|
|
if __name__ == '__main__':
|
|
app.run(debug=True) |