You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
5 years ago
|
from __future__ import print_function
|
||
|
from __future__ import unicode_literals
|
||
|
|
||
|
from builtins import str, bytes, dict, int
|
||
|
|
||
|
import os
|
||
|
import sys
|
||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||
|
|
||
|
from pattern.db import Datasheet, INTEGER, STRING
|
||
|
from pattern.db import uid, pprint
|
||
|
|
||
|
# The main purpose of the pattern module is to facilitate automated processes
|
||
|
# for (text) data acquisition and (linguistical) data mining.
|
||
|
# Often, this involves a tangle of messy text files and custom formats to store the data.
|
||
|
# The Datasheet class offers a useful matrix (cfr. MS Excel) in Python code.
|
||
|
# It can be saved as a CSV text file that is both human/machine readable.
|
||
|
# See also: examples/01-web/03-twitter.py
|
||
|
|
||
|
# A Datasheet can have headers: a (name, type)-tuple for each column.
|
||
|
# In this case, imported columns will automatically map values to the defined type.
|
||
|
# Supported values that are imported and exported correctly:
|
||
|
# str, unicode, int, float, bool, Date, None
|
||
|
# For other data types, custom encoder and decoder functions can be used.
|
||
|
|
||
|
ds = Datasheet(rows=[
|
||
|
[uid(), "broccoli", "vegetable"],
|
||
|
[uid(), "turnip", "vegetable"],
|
||
|
[uid(), "asparagus", "vegetable"],
|
||
|
[uid(), "banana", "fruit"],
|
||
|
], fields=[
|
||
|
("id", INTEGER), # Define the column headers.
|
||
|
("name", STRING),
|
||
|
("type", STRING)
|
||
|
])
|
||
|
|
||
|
print(ds.rows[0]) # A list of rows.
|
||
|
print(ds.columns[1]) # A list of columns, where each column is a list of values.
|
||
|
print(ds.name)
|
||
|
print("")
|
||
|
|
||
|
# Columns can be manipulated directly like any other Python list.
|
||
|
# This can be slow for large tables. If you need a fast way to do matrix math,
|
||
|
# use numpy (http://numpy.scipy.org/) instead.
|
||
|
# The purpose of Table is data storage.
|
||
|
ds.columns.append([
|
||
|
"green",
|
||
|
"purple",
|
||
|
"white",
|
||
|
"yellow"
|
||
|
], field=("color", STRING))
|
||
|
|
||
|
# Save as a comma-separated (unicode) text file.
|
||
|
ds.save("food.txt", headers=True)
|
||
|
|
||
|
# Load a table from file.
|
||
|
ds = Datasheet.load("food.txt", headers=True)
|
||
|
|
||
|
pprint(ds, truncate=50, padding=" ", fill=".")
|
||
|
print("")
|
||
|
print(ds.fields)
|