You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			362 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			362 lines
		
	
	
		
			9.7 KiB
		
	
	
	
		
			Python
		
	
#!/usr/bin/env python
 | 
						|
 | 
						|
"""PickleShare - a small 'shelve' like datastore with concurrency support
 | 
						|
 | 
						|
Like shelve, a PickleShareDB object acts like a normal dictionary. Unlike
 | 
						|
shelve, many processes can access the database simultaneously. Changing a
 | 
						|
value in database is immediately visible to other processes accessing the
 | 
						|
same database.
 | 
						|
 | 
						|
Concurrency is possible because the values are stored in separate files. Hence
 | 
						|
the "database" is a directory where *all* files are governed by PickleShare.
 | 
						|
 | 
						|
Example usage::
 | 
						|
 | 
						|
    from pickleshare import *
 | 
						|
    db = PickleShareDB('~/testpickleshare')
 | 
						|
    db.clear()
 | 
						|
    print "Should be empty:",db.items()
 | 
						|
    db['hello'] = 15
 | 
						|
    db['aku ankka'] = [1,2,313]
 | 
						|
    db['paths/are/ok/key'] = [1,(5,46)]
 | 
						|
    print db.keys()
 | 
						|
    del db['aku ankka']
 | 
						|
 | 
						|
This module is certainly not ZODB, but can be used for low-load
 | 
						|
(non-mission-critical) situations where tiny code size trumps the
 | 
						|
advanced features of a "real" object database.
 | 
						|
 | 
						|
Installation guide: pip install pickleshare
 | 
						|
 | 
						|
Author: Ville Vainio <vivainio@gmail.com>
 | 
						|
License: MIT open source license.
 | 
						|
 | 
						|
"""
 | 
						|
 | 
						|
from __future__ import print_function
 | 
						|
 | 
						|
 | 
						|
__version__ = "0.7.5"
 | 
						|
 | 
						|
try:
 | 
						|
    from pathlib import Path
 | 
						|
except ImportError:
 | 
						|
    # Python 2 backport
 | 
						|
    from pathlib2 import Path
 | 
						|
 | 
						|
import os, stat, time
 | 
						|
 | 
						|
try:
 | 
						|
    import collections.abc as collections_abc
 | 
						|
except ImportError:
 | 
						|
    import collections as collections_abc
 | 
						|
try:
 | 
						|
    import cPickle as pickle
 | 
						|
except ImportError:
 | 
						|
    import pickle
 | 
						|
import errno
 | 
						|
import sys
 | 
						|
 | 
						|
if sys.version_info[0] >= 3:
 | 
						|
    string_types = (str,)
 | 
						|
else:
 | 
						|
    string_types = (str, unicode)
 | 
						|
 | 
						|
 | 
						|
def gethashfile(key):
 | 
						|
    return ("%02x" % abs(hash(key) % 256))[-2:]
 | 
						|
 | 
						|
 | 
						|
_sentinel = object()
 | 
						|
 | 
						|
 | 
						|
class PickleShareDB(collections_abc.MutableMapping):
 | 
						|
    """The main 'connection' object for PickleShare database"""
 | 
						|
 | 
						|
    def __init__(self, root):
 | 
						|
        """Return a db object that will manage the specied directory"""
 | 
						|
        if not isinstance(root, string_types):
 | 
						|
            root = str(root)
 | 
						|
        root = os.path.abspath(os.path.expanduser(root))
 | 
						|
        self.root = Path(root)
 | 
						|
        if not self.root.is_dir():
 | 
						|
            # catching the exception is necessary if multiple processes are concurrently trying to create a folder
 | 
						|
            # exists_ok keyword argument of mkdir does the same but only from Python 3.5
 | 
						|
            try:
 | 
						|
                self.root.mkdir(parents=True)
 | 
						|
            except OSError as e:
 | 
						|
                if e.errno != errno.EEXIST:
 | 
						|
                    raise
 | 
						|
        # cache has { 'key' : (obj, orig_mod_time) }
 | 
						|
        self.cache = {}
 | 
						|
 | 
						|
    def __getitem__(self, key):
 | 
						|
        """db['key'] reading"""
 | 
						|
        fil = self.root / key
 | 
						|
        try:
 | 
						|
            mtime = fil.stat()[stat.ST_MTIME]
 | 
						|
        except OSError:
 | 
						|
            raise KeyError(key)
 | 
						|
 | 
						|
        if fil in self.cache and mtime == self.cache[fil][1]:
 | 
						|
            return self.cache[fil][0]
 | 
						|
        try:
 | 
						|
            # The cached item has expired, need to read
 | 
						|
            with fil.open("rb") as f:
 | 
						|
                obj = pickle.loads(f.read())
 | 
						|
        except:
 | 
						|
            raise KeyError(key)
 | 
						|
 | 
						|
        self.cache[fil] = (obj, mtime)
 | 
						|
        return obj
 | 
						|
 | 
						|
    def __setitem__(self, key, value):
 | 
						|
        """db['key'] = 5"""
 | 
						|
        fil = self.root / key
 | 
						|
        parent = fil.parent
 | 
						|
        if parent and not parent.is_dir():
 | 
						|
            parent.mkdir(parents=True)
 | 
						|
        # We specify protocol 2, so that we can mostly go between Python 2
 | 
						|
        # and Python 3. We can upgrade to protocol 3 when Python 2 is obsolete.
 | 
						|
        with fil.open("wb") as f:
 | 
						|
            pickle.dump(value, f, protocol=2)
 | 
						|
        try:
 | 
						|
            self.cache[fil] = (value, fil.stat().st_mtime)
 | 
						|
        except OSError as e:
 | 
						|
            if e.errno != errno.ENOENT:
 | 
						|
                raise
 | 
						|
 | 
						|
    def hset(self, hashroot, key, value):
 | 
						|
        """hashed set"""
 | 
						|
        hroot = self.root / hashroot
 | 
						|
        if not hroot.is_dir():
 | 
						|
            hroot.mkdir()
 | 
						|
        hfile = hroot / gethashfile(key)
 | 
						|
        d = self.get(hfile, {})
 | 
						|
        d.update({key: value})
 | 
						|
        self[hfile] = d
 | 
						|
 | 
						|
    def hget(self, hashroot, key, default=_sentinel, fast_only=True):
 | 
						|
        """hashed get"""
 | 
						|
        hroot = self.root / hashroot
 | 
						|
        hfile = hroot / gethashfile(key)
 | 
						|
 | 
						|
        d = self.get(hfile, _sentinel)
 | 
						|
        # print "got dict",d,"from",hfile
 | 
						|
        if d is _sentinel:
 | 
						|
            if fast_only:
 | 
						|
                if default is _sentinel:
 | 
						|
                    raise KeyError(key)
 | 
						|
 | 
						|
                return default
 | 
						|
 | 
						|
            # slow mode ok, works even after hcompress()
 | 
						|
            d = self.hdict(hashroot)
 | 
						|
 | 
						|
        return d.get(key, default)
 | 
						|
 | 
						|
    def hdict(self, hashroot):
 | 
						|
        """Get all data contained in hashed category 'hashroot' as dict"""
 | 
						|
        hfiles = self.keys(hashroot + "/*")
 | 
						|
        hfiles.sort()
 | 
						|
        last = len(hfiles) and hfiles[-1] or ""
 | 
						|
        if last.endswith("xx"):
 | 
						|
            # print "using xx"
 | 
						|
            hfiles = [last] + hfiles[:-1]
 | 
						|
 | 
						|
        all = {}
 | 
						|
 | 
						|
        for f in hfiles:
 | 
						|
            # print "using",f
 | 
						|
            try:
 | 
						|
                all.update(self[f])
 | 
						|
            except KeyError:
 | 
						|
                print("Corrupt", f, "deleted - hset is not threadsafe!")
 | 
						|
                del self[f]
 | 
						|
 | 
						|
            self.uncache(f)
 | 
						|
 | 
						|
        return all
 | 
						|
 | 
						|
    def hcompress(self, hashroot):
 | 
						|
        """Compress category 'hashroot', so hset is fast again
 | 
						|
 | 
						|
        hget will fail if fast_only is True for compressed items (that were
 | 
						|
        hset before hcompress).
 | 
						|
 | 
						|
        """
 | 
						|
        hfiles = self.keys(hashroot + "/*")
 | 
						|
        all = {}
 | 
						|
        for f in hfiles:
 | 
						|
            # print "using",f
 | 
						|
            all.update(self[f])
 | 
						|
            self.uncache(f)
 | 
						|
 | 
						|
        self[hashroot + "/xx"] = all
 | 
						|
        for f in hfiles:
 | 
						|
            p = self.root / f
 | 
						|
            if p.name == "xx":
 | 
						|
                continue
 | 
						|
            p.unlink()
 | 
						|
 | 
						|
    def __delitem__(self, key):
 | 
						|
        """del db["key"]"""
 | 
						|
        fil = self.root / key
 | 
						|
        self.cache.pop(fil, None)
 | 
						|
        try:
 | 
						|
            fil.unlink()
 | 
						|
        except OSError:
 | 
						|
            # notfound and permission denied are ok - we
 | 
						|
            # lost, the other process wins the conflict
 | 
						|
            pass
 | 
						|
 | 
						|
    def _normalized(self, p):
 | 
						|
        """Make a key suitable for user's eyes"""
 | 
						|
        return str(p.relative_to(self.root)).replace("\\", "/")
 | 
						|
 | 
						|
    def keys(self, globpat=None):
 | 
						|
        """All keys in DB, or all keys matching a glob"""
 | 
						|
 | 
						|
        if globpat is None:
 | 
						|
            files = self.root.rglob("*")
 | 
						|
        else:
 | 
						|
            files = self.root.glob(globpat)
 | 
						|
        return [self._normalized(p) for p in files if p.is_file()]
 | 
						|
 | 
						|
    def __iter__(self):
 | 
						|
        return iter(self.keys())
 | 
						|
 | 
						|
    def __len__(self):
 | 
						|
        return len(self.keys())
 | 
						|
 | 
						|
    def uncache(self, *items):
 | 
						|
        """Removes all, or specified items from cache
 | 
						|
 | 
						|
        Use this after reading a large amount of large objects
 | 
						|
        to free up memory, when you won't be needing the objects
 | 
						|
        for a while.
 | 
						|
 | 
						|
        """
 | 
						|
        if not items:
 | 
						|
            self.cache = {}
 | 
						|
        for it in items:
 | 
						|
            self.cache.pop(it, None)
 | 
						|
 | 
						|
    def waitget(self, key, maxwaittime=60):
 | 
						|
        """Wait (poll) for a key to get a value
 | 
						|
 | 
						|
        Will wait for `maxwaittime` seconds before raising a KeyError.
 | 
						|
        The call exits normally if the `key` field in db gets a value
 | 
						|
        within the timeout period.
 | 
						|
 | 
						|
        Use this for synchronizing different processes or for ensuring
 | 
						|
        that an unfortunately timed "db['key'] = newvalue" operation
 | 
						|
        in another process (which causes all 'get' operation to cause a
 | 
						|
        KeyError for the duration of pickling) won't screw up your program
 | 
						|
        logic.
 | 
						|
        """
 | 
						|
 | 
						|
        wtimes = [0.2] * 3 + [0.5] * 2 + [1]
 | 
						|
        tries = 0
 | 
						|
        waited = 0
 | 
						|
        while 1:
 | 
						|
            try:
 | 
						|
                val = self[key]
 | 
						|
                return val
 | 
						|
            except KeyError:
 | 
						|
                pass
 | 
						|
 | 
						|
            if waited > maxwaittime:
 | 
						|
                raise KeyError(key)
 | 
						|
 | 
						|
            time.sleep(wtimes[tries])
 | 
						|
            waited += wtimes[tries]
 | 
						|
            if tries < len(wtimes) - 1:
 | 
						|
                tries += 1
 | 
						|
 | 
						|
    def getlink(self, folder):
 | 
						|
        """Get a convenient link for accessing items"""
 | 
						|
        return PickleShareLink(self, folder)
 | 
						|
 | 
						|
    def __repr__(self):
 | 
						|
        return "PickleShareDB('%s')" % self.root
 | 
						|
 | 
						|
 | 
						|
class PickleShareLink:
 | 
						|
    """A shortdand for accessing nested PickleShare data conveniently.
 | 
						|
 | 
						|
    Created through PickleShareDB.getlink(), example::
 | 
						|
 | 
						|
        lnk = db.getlink('myobjects/test')
 | 
						|
        lnk.foo = 2
 | 
						|
        lnk.bar = lnk.foo + 5
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, db, keydir):
 | 
						|
        self.__dict__.update(locals())
 | 
						|
 | 
						|
    def __getattr__(self, key):
 | 
						|
        return self.__dict__["db"][self.__dict__["keydir"] + "/" + key]
 | 
						|
 | 
						|
    def __setattr__(self, key, val):
 | 
						|
        self.db[self.keydir + "/" + key] = val
 | 
						|
 | 
						|
    def __repr__(self):
 | 
						|
        db = self.__dict__["db"]
 | 
						|
        keys = db.keys(self.__dict__["keydir"] + "/*")
 | 
						|
        return "<PickleShareLink '%s': %s>" % (
 | 
						|
            self.__dict__["keydir"],
 | 
						|
            ";".join([Path(k).basename() for k in keys]),
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    import textwrap
 | 
						|
 | 
						|
    usage = textwrap.dedent(
 | 
						|
        """\
 | 
						|
    pickleshare - manage PickleShare databases
 | 
						|
 | 
						|
    Usage:
 | 
						|
 | 
						|
        pickleshare dump /path/to/db > dump.txt
 | 
						|
        pickleshare load /path/to/db < dump.txt
 | 
						|
        pickleshare test /path/to/db
 | 
						|
    """
 | 
						|
    )
 | 
						|
    DB = PickleShareDB
 | 
						|
    import sys
 | 
						|
 | 
						|
    if len(sys.argv) < 2:
 | 
						|
        print(usage)
 | 
						|
        return
 | 
						|
 | 
						|
    cmd = sys.argv[1]
 | 
						|
    args = sys.argv[2:]
 | 
						|
    if cmd == "dump":
 | 
						|
        if not args:
 | 
						|
            args = ["."]
 | 
						|
        db = DB(args[0])
 | 
						|
        import pprint
 | 
						|
 | 
						|
        pprint.pprint(db.items())
 | 
						|
    elif cmd == "load":
 | 
						|
        cont = sys.stdin.read()
 | 
						|
        db = DB(args[0])
 | 
						|
        data = eval(cont)
 | 
						|
        db.clear()
 | 
						|
        for k, v in db.items():
 | 
						|
            db[k] = v
 | 
						|
    elif cmd == "testwait":
 | 
						|
        db = DB(args[0])
 | 
						|
        db.clear()
 | 
						|
        print(db.waitget("250"))
 | 
						|
    elif cmd == "test":
 | 
						|
        test()
 | 
						|
        stress()
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |