You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
51 lines
2.0 KiB
Python
51 lines
2.0 KiB
Python
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
from builtins import str, bytes, dict, int
|
|
from builtins import range
|
|
|
|
import os
|
|
import sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from pattern.web import Google, plaintext
|
|
from pattern.web import SEARCH
|
|
|
|
# The pattern.web module has a SearchEngine class,
|
|
# with a SearchEngine.search() method that yields a list of Result objects.
|
|
# Each Result has url, title, text, language, author and date and properties.
|
|
# Subclasses of SearchEngine include:
|
|
# Google, Bing, Yahoo, Twitter, Facebook, Wikipedia, Wiktionary, Flickr, ...
|
|
|
|
# This example retrieves results from Google based on a given query.
|
|
# The Google search engine can handle SEARCH type searches.
|
|
# Other search engines may also handle IMAGE, NEWS, ...
|
|
|
|
# Google's "Custom Search API" is a paid service.
|
|
# The pattern.web module uses a test account by default,
|
|
# with a 100 free queries per day shared by all Pattern users.
|
|
# If this limit is exceeded, SearchEngineLimitError is raised.
|
|
# You should obtain your own license key at:
|
|
# https://code.google.com/apis/console/
|
|
# Activate "Custom Search API" under "Services" and get the key under "API Access".
|
|
# Then use Google(license=[YOUR_KEY]).search().
|
|
# This will give you 100 personal free queries, or 5$ per 1000 queries.
|
|
engine = Google(license=None, language="en")
|
|
|
|
# Veale & Hao's method for finding similes using wildcards (*):
|
|
# http://afflatus.ucd.ie/Papers/LearningFigurative_CogSci07.pdf
|
|
# This will match results such as:
|
|
# - "as light as a feather",
|
|
# - "as cute as a cupcake",
|
|
# - "as drunk as a lord",
|
|
# - "as snug as a bug", etc.
|
|
q = "as * as a *"
|
|
|
|
# Google is very fast but you can only get up to 100 (10x10) results per query.
|
|
for i in range(1, 2):
|
|
for result in engine.search(q, start=i, count=10, type=SEARCH, cached=True):
|
|
print(plaintext(result.text)) # plaintext() removes all HTML formatting.
|
|
print(result.url)
|
|
print(result.date)
|
|
print("")
|