You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
86 lines
2.4 KiB
Python
86 lines
2.4 KiB
Python
import itertools
|
|
|
|
|
|
class InvalidCallback(ValueError):
|
|
pass
|
|
|
|
|
|
class _Chunker(object):
|
|
def __init__(self, table, chunksize, callback):
|
|
self.queue = []
|
|
self.table = table
|
|
self.chunksize = chunksize
|
|
if callback and not callable(callback):
|
|
raise InvalidCallback
|
|
self.callback = callback
|
|
|
|
def flush(self):
|
|
self.queue.clear()
|
|
|
|
def _queue_add(self, item):
|
|
self.queue.append(item)
|
|
if len(self.queue) >= self.chunksize:
|
|
self.flush()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.flush()
|
|
|
|
|
|
class ChunkedInsert(_Chunker):
|
|
"""Batch up insert operations
|
|
with ChunkedInsert(my_table) as inserter:
|
|
inserter(row)
|
|
|
|
Rows will be inserted in groups of `chunksize` (defaulting to 1000). An
|
|
optional callback can be provided that will be called before the insert.
|
|
This callback takes one parameter which is the queue which is about to be
|
|
inserted into the database
|
|
"""
|
|
|
|
def __init__(self, table, chunksize=1000, callback=None):
|
|
self.fields = set()
|
|
super().__init__(table, chunksize, callback)
|
|
|
|
def insert(self, item):
|
|
self.fields.update(item.keys())
|
|
super()._queue_add(item)
|
|
|
|
def flush(self):
|
|
for item in self.queue:
|
|
for field in self.fields:
|
|
item[field] = item.get(field)
|
|
if self.callback is not None:
|
|
self.callback(self.queue)
|
|
self.table.insert_many(self.queue)
|
|
super().flush()
|
|
|
|
|
|
class ChunkedUpdate(_Chunker):
|
|
"""Batch up update operations
|
|
with ChunkedUpdate(my_table) as updater:
|
|
updater(row)
|
|
|
|
Rows will be updated in groups of `chunksize` (defaulting to 1000). An
|
|
optional callback can be provided that will be called before the update.
|
|
This callback takes one parameter which is the queue which is about to be
|
|
updated into the database
|
|
"""
|
|
|
|
def __init__(self, table, keys, chunksize=1000, callback=None):
|
|
self.keys = keys
|
|
super().__init__(table, chunksize, callback)
|
|
|
|
def update(self, item):
|
|
super()._queue_add(item)
|
|
|
|
def flush(self):
|
|
if self.callback is not None:
|
|
self.callback(self.queue)
|
|
self.queue.sort(key=dict.keys)
|
|
for fields, items in itertools.groupby(self.queue, key=dict.keys):
|
|
self.table.update_many(list(items), self.keys)
|
|
super().flush()
|