You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from builtins import str, bytes, dict, int
|
|
from builtins import range
|
|
|
|
import os
|
|
import sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from pattern.web import Twitter
|
|
from pattern.graph import Graph
|
|
|
|
# This example demonstrates a simple Twitter miner + visualizer.
|
|
# We collect tweets containing "A is the new B",
|
|
# mine A and B and use them as connected nodes in a graph.
|
|
# Then we export the graph as a browser visualization.
|
|
|
|
comparisons = []
|
|
|
|
for i in range(1, 10):
|
|
# Set cached=False for live results:
|
|
for result in Twitter(language="en").search("\"is the new\"", start=i, count=100, cached=True):
|
|
s = result.text
|
|
s = s.replace("\n", " ")
|
|
s = s.lower()
|
|
s = s.replace("is the new", "NEW")
|
|
s = s.split(" ")
|
|
try:
|
|
i = s.index("NEW")
|
|
A = s[i - 1].strip("?!.:;,#@\"'")
|
|
B = s[i + 1].strip("?!.:;,#@\"'")
|
|
# Exclude common phrases such as "this is the new thing".
|
|
if A and B and A not in ("it", "this", "here", "what", "why", "where"):
|
|
comparisons.append((A, B))
|
|
except:
|
|
pass
|
|
|
|
g = Graph()
|
|
for A, B in comparisons:
|
|
e = g.add_edge(B, A) # "A is the new B": A <= B
|
|
e.weight += 0.1
|
|
print(B, "=>", A)
|
|
|
|
# Not all nodes will be connected, there will be multiple subgraphs.
|
|
# Simply take the largest subgraph for our visualization.
|
|
g = g.split()[0]
|
|
|
|
g.export("trends", weighted=True, directed=True)
|