harvesting_the_net/pics/lib/python3.10/site-packages/pickleshare.py

#!/usr/bin/env python

""" PickleShare - a small 'shelve' like datastore with concurrency support

Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
shelve, many processes can access the database simultaneously. Changing a
value in database is immediately visible to other processes accessing the
same database.

Concurrency is possible because the values are stored in separate files. Hence
the "database" is a directory where *all* files are governed by PickleShare.

Example usage::

    from pickleshare import *
    db = PickleShareDB('~/testpickleshare')
    db.clear()
    print "Should be empty:",db.items()
    db['hello'] = 15
    db['aku ankka'] = [1,2,313]
    db['paths/are/ok/key'] = [1,(5,46)]
    print db.keys()
    del db['aku ankka']

This module is certainly not ZODB, but can be used for low-load
(non-mission-critical) situations where tiny code size trumps the
advanced features of a "real" object database.

Installation guide: pip install pickleshare

Author: Ville Vainio <vivainio@gmail.com>
License: MIT open source license.

"""

from __future__ import print_function


__version__ = "0.7.5"

try:
    from pathlib import Path
except ImportError:
    # Python 2 backport
    from pathlib2 import Path

import os,stat,time
try:
    import collections.abc as collections_abc
except ImportError:
    import collections as collections_abc
try:
    import cPickle as pickle
except ImportError:
    import pickle
import errno
import sys

if sys.version_info[0] >= 3:
    string_types = (str,)
else:
    string_types = (str, unicode)

def gethashfile(key):
    return ("%02x" % abs(hash(key) % 256))[-2:]

_sentinel = object()

class PickleShareDB(collections_abc.MutableMapping):
    """ The main 'connection' object for PickleShare database """
    def __init__(self,root):
        """ Return a db object that will manage the specied directory"""
        if not isinstance(root, string_types):
            root = str(root)
        root = os.path.abspath(os.path.expanduser(root))
        self.root = Path(root)
        if not self.root.is_dir():
            # catching the exception is necessary if multiple processes are concurrently trying to create a folder
            # exists_ok keyword argument of mkdir does the same but only from Python 3.5
            try:
                self.root.mkdir(parents=True)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise
        # cache has { 'key' : (obj, orig_mod_time) }
        self.cache = {}


    def __getitem__(self,key):
        """ db['key'] reading """
        fil = self.root / key
        try:
            mtime = (fil.stat()[stat.ST_MTIME])
        except OSError:
            raise KeyError(key)

        if fil in self.cache and mtime == self.cache[fil][1]:
            return self.cache[fil][0]
        try:
            # The cached item has expired, need to read
            with fil.open("rb") as f:
                obj = pickle.loads(f.read())
        except:
            raise KeyError(key)

        self.cache[fil] = (obj,mtime)
        return obj

    def __setitem__(self,key,value):
        """ db['key'] = 5 """
        fil = self.root / key
        parent = fil.parent
        if parent and not parent.is_dir():
            parent.mkdir(parents=True)
        # We specify protocol 2, so that we can mostly go between Python 2
        # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete.
        with fil.open('wb') as f:
            pickle.dump(value, f, protocol=2)
        try:
            self.cache[fil] = (value, fil.stat().st_mtime)
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise

    def hset(self, hashroot, key, value):
        """ hashed set """
        hroot = self.root / hashroot
        if not hroot.is_dir():
            hroot.mkdir()
        hfile = hroot / gethashfile(key)
        d = self.get(hfile, {})
        d.update( {key : value})
        self[hfile] = d


    def hget(self, hashroot, key, default = _sentinel, fast_only = True):
        """ hashed get """
        hroot = self.root / hashroot
        hfile = hroot / gethashfile(key)

        d = self.get(hfile, _sentinel )
        #print "got dict",d,"from",hfile
        if d is _sentinel:
            if fast_only:
                if default is _sentinel:
                    raise KeyError(key)

                return default

            # slow mode ok, works even after hcompress()
            d = self.hdict(hashroot)

        return d.get(key, default)

    def hdict(self, hashroot):
        """ Get all data contained in hashed category 'hashroot' as dict """
        hfiles = self.keys(hashroot + "/*")
        hfiles.sort()
        last = len(hfiles) and hfiles[-1] or ''
        if last.endswith('xx'):
            # print "using xx"
            hfiles = [last] + hfiles[:-1]

        all = {}

        for f in hfiles:
            # print "using",f
            try:
                all.update(self[f])
            except KeyError:
                print("Corrupt",f,"deleted - hset is not threadsafe!")
                del self[f]

            self.uncache(f)

        return all

    def hcompress(self, hashroot):
        """ Compress category 'hashroot', so hset is fast again

        hget will fail if fast_only is True for compressed items (that were
        hset before hcompress).

        """
        hfiles = self.keys(hashroot + "/*")
        all = {}
        for f in hfiles:
            # print "using",f
            all.update(self[f])
            self.uncache(f)

        self[hashroot + '/xx'] = all
        for f in hfiles:
            p = self.root / f
            if p.name == 'xx':
                continue
            p.unlink()


    def __delitem__(self,key):
        """ del db["key"] """
        fil = self.root / key
        self.cache.pop(fil,None)
        try:
            fil.unlink()
        except OSError:
            # notfound and permission denied are ok - we
            # lost, the other process wins the conflict
            pass

    def _normalized(self, p):
        """ Make a key suitable for user's eyes """
        return str(p.relative_to(self.root)).replace('\\','/')

    def keys(self, globpat = None):
        """ All keys in DB, or all keys matching a glob"""

        if globpat is None:
            files = self.root.rglob('*')
        else:
            files = self.root.glob(globpat)
        return [self._normalized(p) for p in files if p.is_file()]

    def __iter__(self):
        return iter(self.keys())

    def __len__(self):
        return len(self.keys())

    def uncache(self,*items):
        """ Removes all, or specified items from cache

        Use this after reading a large amount of large objects
        to free up memory, when you won't be needing the objects
        for a while.

        """
        if not items:
            self.cache = {}
        for it in items:
            self.cache.pop(it,None)

    def waitget(self,key, maxwaittime = 60 ):
        """ Wait (poll) for a key to get a value

        Will wait for `maxwaittime` seconds before raising a KeyError.
        The call exits normally if the `key` field in db gets a value
        within the timeout period.

        Use this for synchronizing different processes or for ensuring
        that an unfortunately timed "db['key'] = newvalue" operation
        in another process (which causes all 'get' operation to cause a
        KeyError for the duration of pickling) won't screw up your program
        logic.
        """

        wtimes = [0.2] * 3 + [0.5] * 2 + [1]
        tries = 0
        waited = 0
        while 1:
            try:
                val = self[key]
                return val
            except KeyError:
                pass

            if waited > maxwaittime:
                raise KeyError(key)

            time.sleep(wtimes[tries])
            waited+=wtimes[tries]
            if tries < len(wtimes) -1:
                tries+=1

    def getlink(self,folder):
        """ Get a convenient link for accessing items  """
        return PickleShareLink(self, folder)

    def __repr__(self):
        return "PickleShareDB('%s')" % self.root


class PickleShareLink:
    """ A shortdand for accessing nested PickleShare data conveniently.

    Created through PickleShareDB.getlink(), example::

        lnk = db.getlink('myobjects/test')
        lnk.foo = 2
        lnk.bar = lnk.foo + 5

    """
    def __init__(self, db, keydir ):
        self.__dict__.update(locals())

    def __getattr__(self,key):
        return self.__dict__['db'][self.__dict__['keydir']+'/' + key]
    def __setattr__(self,key,val):
        self.db[self.keydir+'/' + key] = val
    def __repr__(self):
        db = self.__dict__['db']
        keys = db.keys( self.__dict__['keydir'] +"/*")
        return "<PickleShareLink '%s': %s>" % (
            self.__dict__['keydir'],
            ";".join([Path(k).basename() for k in keys]))

def main():
    import textwrap
    usage = textwrap.dedent("""\
    pickleshare - manage PickleShare databases

    Usage:

        pickleshare dump /path/to/db > dump.txt
        pickleshare load /path/to/db < dump.txt
        pickleshare test /path/to/db
    """)
    DB = PickleShareDB
    import sys
    if len(sys.argv) < 2:
        print(usage)
        return

    cmd = sys.argv[1]
    args = sys.argv[2:]
    if cmd == 'dump':
        if not args: args= ['.']
        db = DB(args[0])
        import pprint
        pprint.pprint(db.items())
    elif cmd == 'load':
        cont = sys.stdin.read()
        db = DB(args[0])
        data = eval(cont)
        db.clear()
        for k,v in db.items():
            db[k] = v
    elif cmd == 'testwait':
        db = DB(args[0])
        db.clear()
        print(db.waitget('250'))
    elif cmd == 'test':
        test()
        stress()

if __name__== "__main__":
    main()
end 2 years ago			`#!/usr/bin/env python`

			`""" PickleShare - a small 'shelve' like datastore with concurrency support`

			`Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike`
			`shelve, many processes can access the database simultaneously. Changing a`
			`value in database is immediately visible to other processes accessing the`
			`same database.`

			`Concurrency is possible because the values are stored in separate files. Hence`
			`the "database" is a directory where all files are governed by PickleShare.`

			`Example usage::`

			`from pickleshare import *`
			`db = PickleShareDB('~/testpickleshare')`
			`db.clear()`
			`print "Should be empty:",db.items()`
			`db['hello'] = 15`
			`db['aku ankka'] = [1,2,313]`
			`db['paths/are/ok/key'] = [1,(5,46)]`
			`print db.keys()`
			`del db['aku ankka']`

			`This module is certainly not ZODB, but can be used for low-load`
			`(non-mission-critical) situations where tiny code size trumps the`
			`advanced features of a "real" object database.`

			`Installation guide: pip install pickleshare`

			`Author: Ville Vainio <vivainio@gmail.com>`
			`License: MIT open source license.`

			`"""`

			`from __future__ import print_function`


			`__version__ = "0.7.5"`

			`try:`
			`from pathlib import Path`
			`except ImportError:`
			`# Python 2 backport`
			`from pathlib2 import Path`

			`import os,stat,time`
			`try:`
			`import collections.abc as collections_abc`
			`except ImportError:`
			`import collections as collections_abc`
			`try:`
			`import cPickle as pickle`
			`except ImportError:`
			`import pickle`
			`import errno`
			`import sys`

			`if sys.version_info[0] >= 3:`
			`string_types = (str,)`
			`else:`
			`string_types = (str, unicode)`

			`def gethashfile(key):`
			`return ("%02x" % abs(hash(key) % 256))[-2:]`

			`_sentinel = object()`

			`class PickleShareDB(collections_abc.MutableMapping):`
			`""" The main 'connection' object for PickleShare database """`
			`def __init__(self,root):`
			`""" Return a db object that will manage the specied directory"""`
			`if not isinstance(root, string_types):`
			`root = str(root)`
			`root = os.path.abspath(os.path.expanduser(root))`
			`self.root = Path(root)`
			`if not self.root.is_dir():`
			`# catching the exception is necessary if multiple processes are concurrently trying to create a folder`
			`# exists_ok keyword argument of mkdir does the same but only from Python 3.5`
			`try:`
			`self.root.mkdir(parents=True)`
			`except OSError as e:`
			`if e.errno != errno.EEXIST:`
			`raise`
			`# cache has { 'key' : (obj, orig_mod_time) }`
			`self.cache = {}`


			`def __getitem__(self,key):`
			`""" db['key'] reading """`
			`fil = self.root / key`
			`try:`
			`mtime = (fil.stat()[stat.ST_MTIME])`
			`except OSError:`
			`raise KeyError(key)`

			`if fil in self.cache and mtime == self.cache[fil][1]:`
			`return self.cache[fil][0]`
			`try:`
			`# The cached item has expired, need to read`
			`with fil.open("rb") as f:`
			`obj = pickle.loads(f.read())`
			`except:`
			`raise KeyError(key)`

			`self.cache[fil] = (obj,mtime)`
			`return obj`

			`def __setitem__(self,key,value):`
			`""" db['key'] = 5 """`
			`fil = self.root / key`
			`parent = fil.parent`
			`if parent and not parent.is_dir():`
			`parent.mkdir(parents=True)`
			`# We specify protocol 2, so that we can mostly go between Python 2`
			`# and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete.`
			`with fil.open('wb') as f:`
			`pickle.dump(value, f, protocol=2)`
			`try:`
			`self.cache[fil] = (value, fil.stat().st_mtime)`
			`except OSError as e:`
			`if e.errno != errno.ENOENT:`
			`raise`

			`def hset(self, hashroot, key, value):`
			`""" hashed set """`
			`hroot = self.root / hashroot`
			`if not hroot.is_dir():`
			`hroot.mkdir()`
			`hfile = hroot / gethashfile(key)`
			`d = self.get(hfile, {})`
			`d.update( {key : value})`
			`self[hfile] = d`



			`def hget(self, hashroot, key, default = _sentinel, fast_only = True):`
			`""" hashed get """`
			`hroot = self.root / hashroot`
			`hfile = hroot / gethashfile(key)`

			`d = self.get(hfile, _sentinel )`
			`#print "got dict",d,"from",hfile`
			`if d is _sentinel:`
			`if fast_only:`
			`if default is _sentinel:`
			`raise KeyError(key)`

			`return default`

			`# slow mode ok, works even after hcompress()`
			`d = self.hdict(hashroot)`

			`return d.get(key, default)`

			`def hdict(self, hashroot):`
			`""" Get all data contained in hashed category 'hashroot' as dict """`
			`hfiles = self.keys(hashroot + "/*")`
			`hfiles.sort()`
			`last = len(hfiles) and hfiles[-1] or ''`
			`if last.endswith('xx'):`
			`# print "using xx"`
			`hfiles = [last] + hfiles[:-1]`

			`all = {}`

			`for f in hfiles:`
			`# print "using",f`
			`try:`
			`all.update(self[f])`
			`except KeyError:`
			`print("Corrupt",f,"deleted - hset is not threadsafe!")`
			`del self[f]`

			`self.uncache(f)`

			`return all`

			`def hcompress(self, hashroot):`
			`""" Compress category 'hashroot', so hset is fast again`

			`hget will fail if fast_only is True for compressed items (that were`
			`hset before hcompress).`

			`"""`
			`hfiles = self.keys(hashroot + "/*")`
			`all = {}`
			`for f in hfiles:`
			`# print "using",f`
			`all.update(self[f])`
			`self.uncache(f)`

			`self[hashroot + '/xx'] = all`
			`for f in hfiles:`
			`p = self.root / f`
			`if p.name == 'xx':`
			`continue`
			`p.unlink()`



			`def __delitem__(self,key):`
			`""" del db["key"] """`
			`fil = self.root / key`
			`self.cache.pop(fil,None)`
			`try:`
			`fil.unlink()`
			`except OSError:`
			`# notfound and permission denied are ok - we`
			`# lost, the other process wins the conflict`
			`pass`

			`def _normalized(self, p):`
			`""" Make a key suitable for user's eyes """`
			`return str(p.relative_to(self.root)).replace('\\','/')`

			`def keys(self, globpat = None):`
			`""" All keys in DB, or all keys matching a glob"""`

			`if globpat is None:`
			`files = self.root.rglob('*')`
			`else:`
			`files = self.root.glob(globpat)`
			`return [self._normalized(p) for p in files if p.is_file()]`

			`def __iter__(self):`
			`return iter(self.keys())`

			`def __len__(self):`
			`return len(self.keys())`

			`def uncache(self,*items):`
			`""" Removes all, or specified items from cache`

			`Use this after reading a large amount of large objects`
			`to free up memory, when you won't be needing the objects`
			`for a while.`

			`"""`
			`if not items:`
			`self.cache = {}`
			`for it in items:`
			`self.cache.pop(it,None)`

			`def waitget(self,key, maxwaittime = 60 ):`
			`""" Wait (poll) for a key to get a value`

			Will wait for `maxwaittime` seconds before raising a KeyError.
			The call exits normally if the `key` field in db gets a value
			`within the timeout period.`

			`Use this for synchronizing different processes or for ensuring`
			`that an unfortunately timed "db['key'] = newvalue" operation`
			`in another process (which causes all 'get' operation to cause a`
			`KeyError for the duration of pickling) won't screw up your program`
			`logic.`
			`"""`

			`wtimes = [0.2] * 3 + [0.5] * 2 + [1]`
			`tries = 0`
			`waited = 0`
			`while 1:`
			`try:`
			`val = self[key]`
			`return val`
			`except KeyError:`
			`pass`

			`if waited > maxwaittime:`
			`raise KeyError(key)`

			`time.sleep(wtimes[tries])`
			`waited+=wtimes[tries]`
			`if tries < len(wtimes) -1:`
			`tries+=1`

			`def getlink(self,folder):`
			`""" Get a convenient link for accessing items """`
			`return PickleShareLink(self, folder)`

			`def __repr__(self):`
			`return "PickleShareDB('%s')" % self.root`



			`class PickleShareLink:`
			`""" A shortdand for accessing nested PickleShare data conveniently.`

			`Created through PickleShareDB.getlink(), example::`

			`lnk = db.getlink('myobjects/test')`
			`lnk.foo = 2`
			`lnk.bar = lnk.foo + 5`

			`"""`
			`def __init__(self, db, keydir ):`
			`self.__dict__.update(locals())`

			`def __getattr__(self,key):`
			`return self.__dict__['db'][self.__dict__['keydir']+'/' + key]`
			`def __setattr__(self,key,val):`
			`self.db[self.keydir+'/' + key] = val`
			`def __repr__(self):`
			`db = self.__dict__['db']`
			`keys = db.keys( self.__dict__['keydir'] +"/*")`
			`return "<PickleShareLink '%s': %s>" % (`
			`self.__dict__['keydir'],`
			`";".join([Path(k).basename() for k in keys]))`

			`def main():`
			`import textwrap`
			`usage = textwrap.dedent("""\`
			`pickleshare - manage PickleShare databases`

			`Usage:`

			`pickleshare dump /path/to/db > dump.txt`
			`pickleshare load /path/to/db < dump.txt`
			`pickleshare test /path/to/db`
			`""")`
			`DB = PickleShareDB`
			`import sys`
			`if len(sys.argv) < 2:`
			`print(usage)`
			`return`

			`cmd = sys.argv[1]`
			`args = sys.argv[2:]`
			`if cmd == 'dump':`
			`if not args: args= ['.']`
			`db = DB(args[0])`
			`import pprint`
			`pprint.pprint(db.items())`
			`elif cmd == 'load':`
			`cont = sys.stdin.read()`
			`db = DB(args[0])`
			`data = eval(cont)`
			`db.clear()`
			`for k,v in db.items():`
			`db[k] = v`
			`elif cmd == 'testwait':`
			`db = DB(args[0])`
			`db.clear()`
			`print(db.waitget('250'))`
			`elif cmd == 'test':`
			`test()`
			`stress()`

			`if __name__== "__main__":`
			`main()`