diff options
Diffstat (limited to 'python/pylru')
-rw-r--r-- | python/pylru/pylru.py | 556 | ||||
-rw-r--r-- | python/pylru/test.py | 238 |
2 files changed, 794 insertions, 0 deletions
diff --git a/python/pylru/pylru.py b/python/pylru/pylru.py new file mode 100644 index 0000000000..e69cadb76c --- /dev/null +++ b/python/pylru/pylru.py @@ -0,0 +1,556 @@ + +# Cache implementaion with a Least Recently Used (LRU) replacement policy and +# a basic dictionary interface. + +# Copyright (C) 2006, 2009, 2010, 2011 Jay Hutchinson + +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. + +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., 51 +# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + + +# The cache is implemented using a combination of a python dictionary (hash +# table) and a circular doubly linked list. Items in the cache are stored in +# nodes. These nodes make up the linked list. The list is used to efficiently +# maintain the order that the items have been used in. The front or head of +# the list contains the most recently used item, the tail of the list +# contains the least recently used item. When an item is used it can easily +# (in a constant amount of time) be moved to the front of the list, thus +# updating its position in the ordering. These nodes are also placed in the +# hash table under their associated key. The hash table allows efficient +# lookup of values by key. + +# Class for the node objects. +class _dlnode(object): + def __init__(self): + self.empty = True + + +class lrucache(object): + + def __init__(self, size, callback=None): + + self.callback = callback + + # Create an empty hash table. + self.table = {} + + # Initialize the doubly linked list with one empty node. This is an + # invariant. The cache size must always be greater than zero. Each + # node has a 'prev' and 'next' variable to hold the node that comes + # before it and after it respectively. Initially the two variables + # each point to the head node itself, creating a circular doubly + # linked list of size one. Then the size() method is used to adjust + # the list to the desired size. + + self.head = _dlnode() + self.head.next = self.head + self.head.prev = self.head + + self.listSize = 1 + + # Adjust the size + self.size(size) + + + def __len__(self): + return len(self.table) + + def clear(self): + for node in self.dli(): + node.empty = True + node.key = None + node.value = None + + self.table.clear() + + + def __contains__(self, key): + return key in self.table + + # Looks up a value in the cache without affecting cache order. + def peek(self, key): + # Look up the node + node = self.table[key] + return node.value + + + def __getitem__(self, key): + # Look up the node + node = self.table[key] + + # Update the list ordering. Move this node so that is directly + # proceeds the head node. Then set the 'head' variable to it. This + # makes it the new head of the list. + self.mtf(node) + self.head = node + + # Return the value. + return node.value + + def get(self, key, default=None): + """Get an item - return default (None) if not present""" + try: + return self[key] + except KeyError: + return default + + def __setitem__(self, key, value): + # First, see if any value is stored under 'key' in the cache already. + # If so we are going to replace that value with the new one. + if key in self.table: + + # Lookup the node + node = self.table[key] + + # Replace the value. + node.value = value + + # Update the list ordering. + self.mtf(node) + self.head = node + + return + + # Ok, no value is currently stored under 'key' in the cache. We need + # to choose a node to place the new item in. There are two cases. If + # the cache is full some item will have to be pushed out of the + # cache. We want to choose the node with the least recently used + # item. This is the node at the tail of the list. If the cache is not + # full we want to choose a node that is empty. Because of the way the + # list is managed, the empty nodes are always together at the tail + # end of the list. Thus, in either case, by chooseing the node at the + # tail of the list our conditions are satisfied. + + # Since the list is circular, the tail node directly preceeds the + # 'head' node. + node = self.head.prev + + # If the node already contains something we need to remove the old + # key from the dictionary. + if not node.empty: + if self.callback is not None: + self.callback(node.key, node.value) + del self.table[node.key] + + # Place the new key and value in the node + node.empty = False + node.key = key + node.value = value + + # Add the node to the dictionary under the new key. + self.table[key] = node + + # We need to move the node to the head of the list. The node is the + # tail node, so it directly preceeds the head node due to the list + # being circular. Therefore, the ordering is already correct, we just + # need to adjust the 'head' variable. + self.head = node + + + def __delitem__(self, key): + + # Lookup the node, then remove it from the hash table. + node = self.table[key] + del self.table[key] + + node.empty = True + + # Not strictly necessary. + node.key = None + node.value = None + + # Because this node is now empty we want to reuse it before any + # non-empty node. To do that we want to move it to the tail of the + # list. We move it so that it directly preceeds the 'head' node. This + # makes it the tail node. The 'head' is then adjusted. This + # adjustment ensures correctness even for the case where the 'node' + # is the 'head' node. + self.mtf(node) + self.head = node.next + + def __iter__(self): + + # Return an iterator that returns the keys in the cache in order from + # the most recently to least recently used. Does not modify the cache + # order. + for node in self.dli(): + yield node.key + + def items(self): + + # Return an iterator that returns the (key, value) pairs in the cache + # in order from the most recently to least recently used. Does not + # modify the cache order. + for node in self.dli(): + yield (node.key, node.value) + + def keys(self): + + # Return an iterator that returns the keys in the cache in order from + # the most recently to least recently used. Does not modify the cache + # order. + for node in self.dli(): + yield node.key + + def values(self): + + # Return an iterator that returns the values in the cache in order + # from the most recently to least recently used. Does not modify the + # cache order. + for node in self.dli(): + yield node.value + + def size(self, size=None): + + if size is not None: + assert size > 0 + if size > self.listSize: + self.addTailNode(size - self.listSize) + elif size < self.listSize: + self.removeTailNode(self.listSize - size) + + return self.listSize + + # Increases the size of the cache by inserting n empty nodes at the tail + # of the list. + def addTailNode(self, n): + for i in range(n): + node = _dlnode() + node.next = self.head + node.prev = self.head.prev + + self.head.prev.next = node + self.head.prev = node + + self.listSize += n + + # Decreases the size of the list by removing n nodes from the tail of the + # list. + def removeTailNode(self, n): + assert self.listSize > n + for i in range(n): + node = self.head.prev + if not node.empty: + if self.callback is not None: + self.callback(node.key, node.value) + del self.table[node.key] + + # Splice the tail node out of the list + self.head.prev = node.prev + node.prev.next = self.head + + # The next four lines are not strictly necessary. + node.prev = None + node.next = None + + node.key = None + node.value = None + + self.listSize -= n + + + # This method adjusts the ordering of the doubly linked list so that + # 'node' directly precedes the 'head' node. Because of the order of + # operations, if 'node' already directly precedes the 'head' node or if + # 'node' is the 'head' node the order of the list will be unchanged. + def mtf(self, node): + node.prev.next = node.next + node.next.prev = node.prev + + node.prev = self.head.prev + node.next = self.head.prev.next + + node.next.prev = node + node.prev.next = node + + # This method returns an iterator that iterates over the non-empty nodes + # in the doubly linked list in order from the most recently to the least + # recently used. + def dli(self): + node = self.head + for i in range(len(self.table)): + yield node + node = node.next + + + + +class WriteThroughCacheManager(object): + def __init__(self, store, size): + self.store = store + self.cache = lrucache(size) + + def __len__(self): + return len(self.store) + + # Returns/sets the size of the managed cache. + def size(self, size=None): + return self.cache.size(size) + + def clear(self): + self.cache.clear() + self.store.clear() + + def __contains__(self, key): + # Check the cache first. If it is there we can return quickly. + if key in self.cache: + return True + + # Not in the cache. Might be in the underlying store. + if key in self.store: + return True + + return False + + def __getitem__(self, key): + # First we try the cache. If successful we just return the value. If + # not we catch KeyError and ignore it since that just means the key + # was not in the cache. + try: + return self.cache[key] + except KeyError: + pass + + # It wasn't in the cache. Look it up in the store, add the entry to + # the cache, and return the value. + value = self.store[key] + self.cache[key] = value + return value + + def get(self, key, default=None): + """Get an item - return default (None) if not present""" + try: + return self[key] + except KeyError: + return default + + def __setitem__(self, key, value): + # Add the key/value pair to the cache and store. + self.cache[key] = value + self.store[key] = value + + def __delitem__(self, key): + # Write-through behavior cache and store should be consistent. Delete + # it from the store. + del self.store[key] + try: + # Ok, delete from the store was successful. It might also be in + # the cache, try and delete it. If not we catch the KeyError and + # ignore it. + del self.cache[key] + except KeyError: + pass + + def __iter__(self): + return self.keys() + + def keys(self): + return self.store.keys() + + def values(self): + return self.store.values() + + def items(self): + return self.store.items() + + + +class WriteBackCacheManager(object): + def __init__(self, store, size): + self.store = store + + # Create a set to hold the dirty keys. + self.dirty = set() + + # Define a callback function to be called by the cache when a + # key/value pair is about to be ejected. This callback will check to + # see if the key is in the dirty set. If so, then it will update the + # store object and remove the key from the dirty set. + def callback(key, value): + if key in self.dirty: + self.store[key] = value + self.dirty.remove(key) + + # Create a cache and give it the callback function. + self.cache = lrucache(size, callback) + + # Returns/sets the size of the managed cache. + def size(self, size=None): + return self.cache.size(size) + + def clear(self): + self.cache.clear() + self.dirty.clear() + self.store.clear() + + def __contains__(self, key): + # Check the cache first, since if it is there we can return quickly. + if key in self.cache: + return True + + # Not in the cache. Might be in the underlying store. + if key in self.store: + return True + + return False + + def __getitem__(self, key): + # First we try the cache. If successful we just return the value. If + # not we catch KeyError and ignore it since that just means the key + # was not in the cache. + try: + return self.cache[key] + except KeyError: + pass + + # It wasn't in the cache. Look it up in the store, add the entry to + # the cache, and return the value. + value = self.store[key] + self.cache[key] = value + return value + + def get(self, key, default=None): + """Get an item - return default (None) if not present""" + try: + return self[key] + except KeyError: + return default + + def __setitem__(self, key, value): + # Add the key/value pair to the cache. + self.cache[key] = value + self.dirty.add(key) + + def __delitem__(self, key): + + found = False + try: + del self.cache[key] + found = True + self.dirty.remove(key) + except KeyError: + pass + + try: + del self.store[key] + found = True + except KeyError: + pass + + if not found: # If not found in cache or store, raise error. + raise KeyError + + + def __iter__(self): + return self.keys() + + def keys(self): + for key in self.store.keys(): + if key not in self.dirty: + yield key + + for key in self.dirty: + yield key + + + def values(self): + for key, value in self.items(): + yield value + + + def items(self): + for key, value in self.store.items(): + if key not in self.dirty: + yield (key, value) + + for key in self.dirty: + value = self.cache.peek(key) + yield (key, value) + + + + def sync(self): + # For each dirty key, peek at its value in the cache and update the + # store. Doesn't change the cache's order. + for key in self.dirty: + self.store[key] = self.cache.peek(key) + # There are no dirty keys now. + self.dirty.clear() + + def flush(self): + self.sync() + self.cache.clear() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.sync() + return False + + +class FunctionCacheManager(object): + def __init__(self, func, size): + self.func = func + self.cache = lrucache(size) + + def size(self, size=None): + return self.cache.size(size) + + def clear(self): + self.cache.clear() + + def __call__(self, *args, **kwargs): + kwtuple = tuple((key, kwargs[key]) for key in sorted(kwargs.keys())) + key = (args, kwtuple) + try: + return self.cache[key] + except KeyError: + pass + + value = self.func(*args, **kwargs) + self.cache[key] = value + return value + + +def lruwrap(store, size, writeback=False): + if writeback: + return WriteBackCacheManager(store, size) + else: + return WriteThroughCacheManager(store, size) + +import functools + +class lrudecorator(object): + def __init__(self, size): + self.cache = lrucache(size) + + def __call__(self, func): + def wrapper(*args, **kwargs): + kwtuple = tuple((key, kwargs[key]) for key in sorted(kwargs.keys())) + key = (args, kwtuple) + try: + return self.cache[key] + except KeyError: + pass + + value = func(*args, **kwargs) + self.cache[key] = value + return value + + wrapper.cache = self.cache + wrapper.size = self.cache.size + wrapper.clear = self.cache.clear + return functools.update_wrapper(wrapper, func) diff --git a/python/pylru/test.py b/python/pylru/test.py new file mode 100644 index 0000000000..7a4842fb52 --- /dev/null +++ b/python/pylru/test.py @@ -0,0 +1,238 @@ + +from pylru import * +import random + +# This tests PyLRU by fuzzing it with random operations, then checking the +# results against another, simpler, LRU cache implementation. + +class simplelrucache: + + def __init__(self, size): + + # Initialize the cache as empty. + self.cache = [] + self.size = size + + def __contains__(self, key): + + for x in self.cache: + if x[0] == key: + return True + + return False + + + def __getitem__(self, key): + + for i in range(len(self.cache)): + x = self.cache[i] + if x[0] == key: + del self.cache[i] + self.cache.append(x) + return x[1] + + raise KeyError + + + def __setitem__(self, key, value): + + for i in range(len(self.cache)): + x = self.cache[i] + if x[0] == key: + x[1] = value + del self.cache[i] + self.cache.append(x) + return + + if len(self.cache) == self.size: + self.cache = self.cache[1:] + + self.cache.append([key, value]) + + + def __delitem__(self, key): + + for i in range(len(self.cache)): + if self.cache[i][0] == key: + del self.cache[i] + return + + raise KeyError + + def resize(self, x=None): + assert x > 0 + self.size = x + if x < len(self.cache): + del self.cache[:len(self.cache) - x] + + +def test(a, b, c, d, verify): + + for i in range(1000): + x = random.randint(0, 512) + y = random.randint(0, 512) + + a[x] = y + b[x] = y + verify(c, d) + + for i in range(1000): + x = random.randint(0, 512) + if x in a: + assert x in b + z = a[x] + z += b[x] + else: + assert x not in b + verify(c, d) + + for i in range(256): + x = random.randint(0, 512) + if x in a: + assert x in b + del a[x] + del b[x] + else: + assert x not in b + verify(c, d) + + +def testcache(): + def verify(a, b): + q = [] + z = a.head + for j in range(len(a.table)): + q.append([z.key, z.value]) + z = z.next + + assert q == b.cache[::-1] + + q2 = [] + for x, y in q: + q2.append((x, y)) + + assert list(a.items()) == q2 + assert list(zip(a.keys(), a.values())) == q2 + assert list(a.keys()) == list(a) + + + a = lrucache(128) + b = simplelrucache(128) + verify(a, b) + test(a, b, a, b, verify) + + a.size(71) + b.resize(71) + verify(a, b) + test(a, b, a, b, verify) + + a.size(341) + b.resize(341) + verify(a, b) + test(a, b, a, b, verify) + + a.size(127) + b.resize(127) + verify(a, b) + test(a, b, a, b, verify) + + +def wraptest(): + + def verify(p, x): + assert p == x.store + for key, value in x.cache.items(): + assert x.store[key] == value + + tmp = list(x.items()) + tmp.sort() + + tmp2 = list(p.items()) + tmp2.sort() + + assert tmp == tmp2 + + p = dict() + q = dict() + x = lruwrap(q, 128) + + test(p, x, p, x, verify) + + + +def wraptest2(): + + def verify(p, x): + for key, value in x.store.items(): + if key not in x.dirty: + assert p[key] == value + + for key in x.dirty: + assert x.cache.peek(key) == p[key] + + for key, value in x.cache.items(): + if key not in x.dirty: + assert x.store[key] == p[key] == value + + tmp = list(x.items()) + tmp.sort() + + tmp2 = list(p.items()) + tmp2.sort() + + assert tmp == tmp2 + + p = dict() + q = dict() + x = lruwrap(q, 128, True) + + test(p, x, p, x, verify) + + x.sync() + assert p == q + +def wraptest3(): + + def verify(p, x): + for key, value in x.store.items(): + if key not in x.dirty: + assert p[key] == value + + for key in x.dirty: + assert x.cache.peek(key) == p[key] + + for key, value in x.cache.items(): + if key not in x.dirty: + assert x.store[key] == p[key] == value + + p = dict() + q = dict() + with lruwrap(q, 128, True) as x: + test(p, x, p, x, verify) + + assert p == q + + +@lrudecorator(100) +def square(x): + return x*x + +def testDecorator(): + for i in range(1000): + x = random.randint(0, 200) + assert square(x) == x*x + + +if __name__ == '__main__': + + random.seed() + + + for i in range(20): + testcache() + wraptest() + wraptest2() + wraptest3() + testDecorator() + + |