You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
86 lines
2.4 KiB
Python
86 lines
2.4 KiB
Python
5 years ago
|
import itertools
|
||
|
|
||
|
|
||
|
class InvalidCallback(ValueError):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class _Chunker(object):
|
||
|
def __init__(self, table, chunksize, callback):
|
||
|
self.queue = []
|
||
|
self.table = table
|
||
|
self.chunksize = chunksize
|
||
|
if callback and not callable(callback):
|
||
|
raise InvalidCallback
|
||
|
self.callback = callback
|
||
|
|
||
|
def flush(self):
|
||
|
self.queue.clear()
|
||
|
|
||
|
def _queue_add(self, item):
|
||
|
self.queue.append(item)
|
||
|
if len(self.queue) >= self.chunksize:
|
||
|
self.flush()
|
||
|
|
||
|
def __enter__(self):
|
||
|
return self
|
||
|
|
||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||
|
self.flush()
|
||
|
|
||
|
|
||
|
class ChunkedInsert(_Chunker):
|
||
|
"""Batch up insert operations
|
||
|
with ChunkedInsert(my_table) as inserter:
|
||
|
inserter(row)
|
||
|
|
||
|
Rows will be inserted in groups of `chunksize` (defaulting to 1000). An
|
||
|
optional callback can be provided that will be called before the insert.
|
||
|
This callback takes one parameter which is the queue which is about to be
|
||
|
inserted into the database
|
||
|
"""
|
||
|
|
||
|
def __init__(self, table, chunksize=1000, callback=None):
|
||
|
self.fields = set()
|
||
|
super().__init__(table, chunksize, callback)
|
||
|
|
||
|
def insert(self, item):
|
||
|
self.fields.update(item.keys())
|
||
|
super()._queue_add(item)
|
||
|
|
||
|
def flush(self):
|
||
|
for item in self.queue:
|
||
|
for field in self.fields:
|
||
|
item[field] = item.get(field)
|
||
|
if self.callback is not None:
|
||
|
self.callback(self.queue)
|
||
|
self.table.insert_many(self.queue)
|
||
|
super().flush()
|
||
|
|
||
|
|
||
|
class ChunkedUpdate(_Chunker):
|
||
|
"""Batch up update operations
|
||
|
with ChunkedUpdate(my_table) as updater:
|
||
|
updater(row)
|
||
|
|
||
|
Rows will be updated in groups of `chunksize` (defaulting to 1000). An
|
||
|
optional callback can be provided that will be called before the update.
|
||
|
This callback takes one parameter which is the queue which is about to be
|
||
|
updated into the database
|
||
|
"""
|
||
|
|
||
|
def __init__(self, table, keys, chunksize=1000, callback=None):
|
||
|
self.keys = keys
|
||
|
super().__init__(table, chunksize, callback)
|
||
|
|
||
|
def update(self, item):
|
||
|
super()._queue_add(item)
|
||
|
|
||
|
def flush(self):
|
||
|
if self.callback is not None:
|
||
|
self.callback(self.queue)
|
||
|
self.queue.sort(key=dict.keys)
|
||
|
for fields, items in itertools.groupby(self.queue, key=dict.keys):
|
||
|
self.table.update_many(list(items), self.keys)
|
||
|
super().flush()
|