D4502: util: allow lrucachedict to track cost of entries

Fri Sep 7 15:15:25 EDT 2018

indygreg updated this revision to Diff 10832.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4502?vs=10821&id=10832

REVISION DETAIL
  https://phab.mercurial-scm.org/D4502

AFFECTED FILES
  contrib/perf.py
  mercurial/util.py
  tests/test-lrucachedict.py

CHANGE DETAILS

diff --git a/tests/test-lrucachedict.py b/tests/test-lrucachedict.py
--- a/tests/test-lrucachedict.py
+++ b/tests/test-lrucachedict.py
@@ -12,27 +12,33 @@
     def testsimple(self):
         d = util.lrucachedict(4)
         self.assertEqual(d.capacity, 4)
-        d['a'] = 'va'
+        d.insert('a', 'va', cost=2)
         d['b'] = 'vb'
         d['c'] = 'vc'
-        d['d'] = 'vd'
+        d.insert('d', 'vd', cost=42)
 
         self.assertEqual(d['a'], 'va')
         self.assertEqual(d['b'], 'vb')
         self.assertEqual(d['c'], 'vc')
         self.assertEqual(d['d'], 'vd')
 
+        self.assertEqual(d.totalcost, 44)
+
         # 'a' should be dropped because it was least recently used.
         d['e'] = 've'
         self.assertNotIn('a', d)
-
         self.assertIsNone(d.get('a'))
+        self.assertEqual(d.totalcost, 42)
 
         self.assertEqual(d['b'], 'vb')
         self.assertEqual(d['c'], 'vc')
         self.assertEqual(d['d'], 'vd')
         self.assertEqual(d['e'], 've')
 
+        # Replacing item with different cost adjusts totalcost.
+        d.insert('e', 've', cost=4)
+        self.assertEqual(d.totalcost, 46)
+
         # Touch entries in some order (both get and set).
         d['e']
         d['c'] = 'vc2'
@@ -63,12 +69,13 @@
 
     def testcopypartial(self):
         d = util.lrucachedict(4)
-        d['a'] = 'va'
-        d['b'] = 'vb'
+        d.insert('a', 'va', cost=4)
+        d.insert('b', 'vb', cost=2)
 
         dc = d.copy()
 
         self.assertEqual(len(dc), 2)
+        self.assertEqual(dc.totalcost, 6)
         for key in ('a', 'b'):
             self.assertIn(key, dc)
             self.assertEqual(dc[key], 'v%s' % key)
@@ -80,8 +87,10 @@
 
         d['c'] = 'vc'
         del d['b']
+        self.assertEqual(d.totalcost, 4)
         dc = d.copy()
         self.assertEqual(len(dc), 2)
+        self.assertEqual(dc.totalcost, 4)
         for key in ('a', 'c'):
             self.assertIn(key, dc)
             self.assertEqual(dc[key], 'v%s' % key)
@@ -93,7 +102,7 @@
 
     def testcopyfull(self):
         d = util.lrucachedict(4)
-        d['a'] = 'va'
+        d.insert('a', 'va', cost=42)
         d['b'] = 'vb'
         d['c'] = 'vc'
         d['d'] = 'vd'
@@ -104,13 +113,19 @@
             self.assertIn(key, dc)
             self.assertEqual(dc[key], 'v%s' % key)
 
+        self.assertEqual(d.totalcost, 42)
+        self.assertEqual(dc.totalcost, 42)
+
         # 'a' should be dropped because it was least recently used.
         dc['e'] = 've'
         self.assertNotIn('a', dc)
         for key in ('b', 'c', 'd', 'e'):
             self.assertIn(key, dc)
             self.assertEqual(dc[key], 'v%s' % key)
 
+        self.assertEqual(d.totalcost, 42)
+        self.assertEqual(dc.totalcost, 0)
+
         # Contents and order of original dict should remain unchanged.
         dc['b'] = 'vb_new'
 
@@ -120,25 +135,28 @@
 
     def testcopydecreasecapacity(self):
         d = util.lrucachedict(5)
-        d['a'] = 'va'
-        d['b'] = 'vb'
+        d.insert('a', 'va', cost=4)
+        d.insert('b', 'vb', cost=2)
         d['c'] = 'vc'
         d['d'] = 'vd'
 
         dc = d.copy(2)
+        self.assertEqual(dc.totalcost, 0)
         for key in ('a', 'b'):
             self.assertNotIn(key, dc)
         for key in ('c', 'd'):
             self.assertIn(key, dc)
             self.assertEqual(dc[key], 'v%s' % key)
 
-        dc['e'] = 've'
+        dc.insert('e', 've', cost=7)
+        self.assertEqual(dc.totalcost, 7)
         self.assertNotIn('c', dc)
         for key in ('d', 'e'):
             self.assertIn(key, dc)
             self.assertEqual(dc[key], 'v%s' % key)
 
         # Original should remain unchanged.
+        self.assertEqual(d.totalcost, 6)
         for key in ('a', 'b', 'c', 'd'):
             self.assertIn(key, d)
             self.assertEqual(d[key], 'v%s' % key)
@@ -174,14 +192,16 @@
 
     def testpopoldest(self):
         d = util.lrucachedict(4)
-        d['a'] = 'va'
-        d['b'] = 'vb'
+        d.insert('a', 'va', cost=10)
+        d.insert('b', 'vb', cost=5)
 
         self.assertEqual(len(d), 2)
         self.assertEqual(d.popoldest(), ('a', 'va'))
         self.assertEqual(len(d), 1)
+        self.assertEqual(d.totalcost, 5)
         self.assertEqual(d.popoldest(), ('b', 'vb'))
         self.assertEqual(len(d), 0)
+        self.assertEqual(d.totalcost, 0)
         self.assertIsNone(d.popoldest())
 
         d['a'] = 'va'
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1209,18 +1209,21 @@
     Holds a reference to nodes on either side as well as a key-value
     pair for the dictionary entry.
     """
-    __slots__ = (u'next', u'prev', u'key', u'value')
+    __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
 
     def __init__(self):
         self.next = None
         self.prev = None
 
         self.key = _notset
         self.value = None
+        self.cost = 0
 
     def markempty(self):
         """Mark the node as emptied."""
         self.key = _notset
+        self.value = None
+        self.cost = 0
 
 class lrucachedict(object):
     """Dict that caches most recent accesses and sets.
@@ -1233,6 +1236,10 @@
     we recycle head.prev and make it the new head. Cache accesses result in
     the node being moved to before the existing head and being marked as the
     new head node.
+
+    Items in the cache can be inserted with an optional "cost" value. This is
+    simply an integer that is specified by the caller. The cache can be queried
+    for the total cost of all items presently in the cache.
     """
     def __init__(self, max):
         self._cache = {}
@@ -1242,6 +1249,7 @@
         head.next = head
         self._size = 1
         self.capacity = max
+        self.totalcost = 0
 
     def __len__(self):
         return len(self._cache)
@@ -1261,11 +1269,15 @@
         self._movetohead(node)
         return node.value
 
-    def __setitem__(self, k, v):
+    def insert(self, k, v, cost=0):
+        """Insert a new item in the cache with optional cost value."""
         node = self._cache.get(k)
         # Replace existing value and mark as newest.
         if node is not None:
+            self.totalcost -= node.cost
             node.value = v
+            node.cost = cost
+            self.totalcost += cost
             self._movetohead(node)
             return
 
@@ -1277,17 +1289,24 @@
 
         # At capacity. Kill the old entry.
         if node.key is not _notset:
+            self.totalcost -= node.cost
             del self._cache[node.key]
 
         node.key = k
         node.value = v
+        node.cost = cost
+        self.totalcost += cost
         self._cache[k] = node
         # And mark it as newest entry. No need to adjust order since it
         # is already self._head.prev.
         self._head = node
 
+    def __setitem__(self, k, v):
+        self.insert(k, v)
+
     def __delitem__(self, k):
         node = self._cache.pop(k)
+        self.totalcost -= node.cost
         node.markempty()
 
         # Temporarily mark as newest item before re-adjusting head to make
@@ -1306,6 +1325,7 @@
     def clear(self):
         n = self._head
         while n.key is not _notset:
+            self.totalcost -= n.cost
             n.markempty()
             n = n.next
 
@@ -1336,7 +1356,7 @@
         # We could potentially skip the first N items when decreasing capacity.
         # But let's keep it simple unless it is a performance problem.
         for i in range(len(self._cache)):
-            result[n.key] = n.value
+            result.insert(n.key, n.value, cost=n.cost)
             n = n.prev
 
         return result
@@ -1359,6 +1379,7 @@
 
         # And remove it from the cache and mark it as empty.
         del self._cache[n.key]
+        self.totalcost -= n.cost
         n.markempty()
 
         return key, value
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -1904,6 +1904,11 @@
     for i in xrange(sets):
         setseq.append(random.randint(0, sys.maxint))
 
+    def doinserts():
+        d = util.lrucachedict(size)
+        for v in setseq:
+            d.insert(v, v)
+
     def dosets():
         d = util.lrucachedict(size)
         for v in setseq:
@@ -1935,6 +1940,7 @@
     benches = [
         (doinit, b'init'),
         (dogets, b'gets'),
+        (doinserts, b'inserts'),
         (dosets, b'sets'),
         (domixed, b'mixed')
     ]



To: indygreg, #hg-reviewers
Cc: lothiraldan, mercurial-devel