You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
2.4 KiB
Python

5 years ago
import itertools
class InvalidCallback(ValueError):
pass
class _Chunker(object):
def __init__(self, table, chunksize, callback):
self.queue = []
self.table = table
self.chunksize = chunksize
if callback and not callable(callback):
raise InvalidCallback
self.callback = callback
def flush(self):
self.queue.clear()
def _queue_add(self, item):
self.queue.append(item)
if len(self.queue) >= self.chunksize:
self.flush()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.flush()
class ChunkedInsert(_Chunker):
"""Batch up insert operations
with ChunkedInsert(my_table) as inserter:
inserter(row)
Rows will be inserted in groups of `chunksize` (defaulting to 1000). An
optional callback can be provided that will be called before the insert.
This callback takes one parameter which is the queue which is about to be
inserted into the database
"""
def __init__(self, table, chunksize=1000, callback=None):
self.fields = set()
super().__init__(table, chunksize, callback)
def insert(self, item):
self.fields.update(item.keys())
super()._queue_add(item)
def flush(self):
for item in self.queue:
for field in self.fields:
item[field] = item.get(field)
if self.callback is not None:
self.callback(self.queue)
self.table.insert_many(self.queue)
super().flush()
class ChunkedUpdate(_Chunker):
"""Batch up update operations
with ChunkedUpdate(my_table) as updater:
updater(row)
Rows will be updated in groups of `chunksize` (defaulting to 1000). An
optional callback can be provided that will be called before the update.
This callback takes one parameter which is the queue which is about to be
updated into the database
"""
def __init__(self, table, keys, chunksize=1000, callback=None):
self.keys = keys
super().__init__(table, chunksize, callback)
def update(self, item):
super()._queue_add(item)
def flush(self):
if self.callback is not None:
self.callback(self.queue)
self.queue.sort(key=dict.keys)
for fields, items in itertools.groupby(self.queue, key=dict.keys):
self.table.update_many(list(items), self.keys)
super().flush()