grad_prototypes/voice_interface_scraping_sc.../webhook.py

import logging
logging.getLogger('flask_assistant').setLevel(logging.DEBUG)

from flask import Flask
from flask_assistant import Assistant, ask, tell, event, build_item

# This script scrapes text on a page that contains a keyword
# Based on a tutorial of Michael Murtaugh at Piet Zwart Institute
# https://pad.xpub.nl/p/prototyping_02102018
import sys
import html5lib
import xml.etree.ElementTree as ET
from urllib.request import urlopen

app = Flask(__name__)
assist = Assistant(app, route='/')


@assist.action('greeting')
def greet_and_start():
    speech = "Hey! What should I check for you at NYTimes.com?"
    return ask(speech)

@assist.action('give-keyword')
def read_headlines(keyword):
    # Open the page
    url = 'https://nytimes.com'
    with urlopen(url) as f:
        t = html5lib.parse(f, namespaceHTMLElements=False)

    # Make lists to put the headings in
    text_list = []


    # Open the xml tree map of the webpage. Go through all text containing
    # the word 'trump'. To ignore javascript having that word, ignore when
    # the html tag of the text is script.
    # Append each found text to the list with headings.

    for x in t.iter():
        if x.text != None and keyword in x.text.lower() and x.tag != 'script':
            text_list.append(x.text)

    # Done? Great!
    # Change the list into a collection of strings
    # Open a txt file and put them there
    # In case the file already exists, then just paste it at the bottom
    # Super handy if you want to run the script for multiple sites and collect text
    # in just one file
    text_list_strings = "\n".join(text_list)
    speech = 'Ok, I found the following news about ' + keyword + '. It is juicy!'

    return  ask('{} {}'.format(speech, text_list_strings))

if __name__ == '__main__':
    app.run(debug=True)