bo-graduation/nltk-book/pattern-master/examples/01-web/04-twitter.py

from __future__ import print_function
from __future__ import unicode_literals

from builtins import str, bytes, dict, int
from builtins import range

import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))

from pattern.web import Twitter, hashtags
from pattern.db import Datasheet, pprint, pd

# This example retrieves tweets containing given keywords from Twitter.

try:
    # We'll store tweets in a Datasheet.
    # A Datasheet is a table of rows and columns that can be exported as a CSV-file.
    # In the first column, we'll store a unique id for each tweet.
    # We only want to add the latest tweets, i.e., those we haven't seen yet.
    # With an index on the first column we can quickly check if an id already exists.
    # The pd() function returns the parent directory of this script + any given path.
    table = Datasheet.load(pd("cool.csv"))
    index = set(table.columns[0])
except:
    table = Datasheet()
    index = set()

engine = Twitter(language="en")

# With Twitter.search(cached=False), a "live" request is sent to Twitter:
# we get the most recent results instead of those in the local cache.
# Keeping a local cache can also be useful (e.g., while testing)
# because a query is instant when it is executed the second time.
prev = None
for i in range(2):
    print(i)
    for tweet in engine.search("is cooler than", start=prev, count=25, cached=False):
        print("")
        print(tweet.text)
        print(tweet.author)
        print(tweet.date)
        print(hashtags(tweet.text))  # Keywords in tweets start with a "#".
        print("")
        # Only add the tweet to the table if it doesn't already exists.
        if len(table) == 0 or tweet.id not in index:
            table.append([tweet.id, tweet.text])
            index.add(tweet.id)
        # Continue mining older tweets in next iteration.
        prev = tweet.id

# Create a .csv in pattern/examples/01-web/
table.save(pd("cool.csv"))

print("Total results: %s" % len(table))
print("")

# Print all the rows in the table.
# Since it is stored as a CSV-file it grows comfortably each time the script runs.
# We can also open the table later on: in other scripts, for further analysis, ...

pprint(table, truncate=100)

# Note: you can also search tweets by author:
# Twitter().search("from:tom_de_smedt")
thrid updates 5 years ago			`from __future__ import print_function`
			`from __future__ import unicode_literals`

			`from builtins import str, bytes, dict, int`
			`from builtins import range`

			`import os`
			`import sys`
			`sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))`

			`from pattern.web import Twitter, hashtags`
			`from pattern.db import Datasheet, pprint, pd`

			`# This example retrieves tweets containing given keywords from Twitter.`

			`try:`
			`# We'll store tweets in a Datasheet.`
			`# A Datasheet is a table of rows and columns that can be exported as a CSV-file.`
			`# In the first column, we'll store a unique id for each tweet.`
			`# We only want to add the latest tweets, i.e., those we haven't seen yet.`
			`# With an index on the first column we can quickly check if an id already exists.`
			`# The pd() function returns the parent directory of this script + any given path.`
			`table = Datasheet.load(pd("cool.csv"))`
			`index = set(table.columns[0])`
			`except:`
			`table = Datasheet()`
			`index = set()`

			`engine = Twitter(language="en")`

			`# With Twitter.search(cached=False), a "live" request is sent to Twitter:`
			`# we get the most recent results instead of those in the local cache.`
			`# Keeping a local cache can also be useful (e.g., while testing)`
			`# because a query is instant when it is executed the second time.`
			`prev = None`
			`for i in range(2):`
			`print(i)`
			`for tweet in engine.search("is cooler than", start=prev, count=25, cached=False):`
			`print("")`
			`print(tweet.text)`
			`print(tweet.author)`
			`print(tweet.date)`
			`print(hashtags(tweet.text)) # Keywords in tweets start with a "#".`
			`print("")`
			`# Only add the tweet to the table if it doesn't already exists.`
			`if len(table) == 0 or tweet.id not in index:`
			`table.append([tweet.id, tweet.text])`
			`index.add(tweet.id)`
			`# Continue mining older tweets in next iteration.`
			`prev = tweet.id`

			`# Create a .csv in pattern/examples/01-web/`
			`table.save(pd("cool.csv"))`

			`print("Total results: %s" % len(table))`
			`print("")`

			`# Print all the rows in the table.`
			`# Since it is stored as a CSV-file it grows comfortably each time the script runs.`
			`# We can also open the table later on: in other scripts, for further analysis, ...`

			`pprint(table, truncate=100)`

			`# Note: you can also search tweets by author:`
			`# Twitter().search("from:tom_de_smedt")`