You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
66 lines
2.3 KiB
Python
66 lines
2.3 KiB
Python
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from builtins import str, bytes, dict, int
|
|
from builtins import range
|
|
|
|
import os
|
|
import sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from pattern.web import Twitter, hashtags
|
|
from pattern.db import Datasheet, pprint, pd
|
|
|
|
# This example retrieves tweets containing given keywords from Twitter.
|
|
|
|
try:
|
|
# We'll store tweets in a Datasheet.
|
|
# A Datasheet is a table of rows and columns that can be exported as a CSV-file.
|
|
# In the first column, we'll store a unique id for each tweet.
|
|
# We only want to add the latest tweets, i.e., those we haven't seen yet.
|
|
# With an index on the first column we can quickly check if an id already exists.
|
|
# The pd() function returns the parent directory of this script + any given path.
|
|
table = Datasheet.load(pd("cool.csv"))
|
|
index = set(table.columns[0])
|
|
except:
|
|
table = Datasheet()
|
|
index = set()
|
|
|
|
engine = Twitter(language="en")
|
|
|
|
# With Twitter.search(cached=False), a "live" request is sent to Twitter:
|
|
# we get the most recent results instead of those in the local cache.
|
|
# Keeping a local cache can also be useful (e.g., while testing)
|
|
# because a query is instant when it is executed the second time.
|
|
prev = None
|
|
for i in range(2):
|
|
print(i)
|
|
for tweet in engine.search("is cooler than", start=prev, count=25, cached=False):
|
|
print("")
|
|
print(tweet.text)
|
|
print(tweet.author)
|
|
print(tweet.date)
|
|
print(hashtags(tweet.text)) # Keywords in tweets start with a "#".
|
|
print("")
|
|
# Only add the tweet to the table if it doesn't already exists.
|
|
if len(table) == 0 or tweet.id not in index:
|
|
table.append([tweet.id, tweet.text])
|
|
index.add(tweet.id)
|
|
# Continue mining older tweets in next iteration.
|
|
prev = tweet.id
|
|
|
|
# Create a .csv in pattern/examples/01-web/
|
|
table.save(pd("cool.csv"))
|
|
|
|
print("Total results: %s" % len(table))
|
|
print("")
|
|
|
|
# Print all the rows in the table.
|
|
# Since it is stored as a CSV-file it grows comfortably each time the script runs.
|
|
# We can also open the table later on: in other scripts, for further analysis, ...
|
|
|
|
pprint(table, truncate=100)
|
|
|
|
# Note: you can also search tweets by author:
|
|
# Twitter().search("from:tom_de_smedt")
|