You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

51 lines
2.0 KiB
Python

from __future__ import print_function
from __future__ import unicode_literals
from builtins import str, bytes, dict, int
from builtins import range
import os
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
from pattern.web import Google, plaintext
from pattern.web import SEARCH
# The pattern.web module has a SearchEngine class,
# with a SearchEngine.search() method that yields a list of Result objects.
# Each Result has url, title, text, language, author and date and properties.
# Subclasses of SearchEngine include:
# Google, Bing, Yahoo, Twitter, Facebook, Wikipedia, Wiktionary, Flickr, ...
# This example retrieves results from Google based on a given query.
# The Google search engine can handle SEARCH type searches.
# Other search engines may also handle IMAGE, NEWS, ...
# Google's "Custom Search API" is a paid service.
# The pattern.web module uses a test account by default,
# with a 100 free queries per day shared by all Pattern users.
# If this limit is exceeded, SearchEngineLimitError is raised.
# You should obtain your own license key at:
# https://code.google.com/apis/console/
# Activate "Custom Search API" under "Services" and get the key under "API Access".
# Then use Google(license=[YOUR_KEY]).search().
# This will give you 100 personal free queries, or 5$ per 1000 queries.
engine = Google(license=None, language="en")
# Veale & Hao's method for finding similes using wildcards (*):
# http://afflatus.ucd.ie/Papers/LearningFigurative_CogSci07.pdf
# This will match results such as:
# - "as light as a feather",
# - "as cute as a cupcake",
# - "as drunk as a lord",
# - "as snug as a bug", etc.
q = "as * as a *"
# Google is very fast but you can only get up to 100 (10x10) results per query.
for i in range(1, 2):
for result in engine.search(q, start=i, count=10, type=SEARCH, cached=True):
print(plaintext(result.text)) # plaintext() removes all HTML formatting.
print(result.url)
print(result.date)
print("")