D4502: util: allow lrucachedict to track cost of entries
indygreg (Gregory Szorc)
phabricator at mercurial-scm.org
Fri Sep 7 15:15:25 EDT 2018
indygreg updated this revision to Diff 10832.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D4502?vs=10821&id=10832
REVISION DETAIL
https://phab.mercurial-scm.org/D4502
AFFECTED FILES
contrib/perf.py
mercurial/util.py
tests/test-lrucachedict.py
CHANGE DETAILS
diff --git a/tests/test-lrucachedict.py b/tests/test-lrucachedict.py
--- a/tests/test-lrucachedict.py
+++ b/tests/test-lrucachedict.py
@@ -12,27 +12,33 @@
def testsimple(self):
d = util.lrucachedict(4)
self.assertEqual(d.capacity, 4)
- d['a'] = 'va'
+ d.insert('a', 'va', cost=2)
d['b'] = 'vb'
d['c'] = 'vc'
- d['d'] = 'vd'
+ d.insert('d', 'vd', cost=42)
self.assertEqual(d['a'], 'va')
self.assertEqual(d['b'], 'vb')
self.assertEqual(d['c'], 'vc')
self.assertEqual(d['d'], 'vd')
+ self.assertEqual(d.totalcost, 44)
+
# 'a' should be dropped because it was least recently used.
d['e'] = 've'
self.assertNotIn('a', d)
-
self.assertIsNone(d.get('a'))
+ self.assertEqual(d.totalcost, 42)
self.assertEqual(d['b'], 'vb')
self.assertEqual(d['c'], 'vc')
self.assertEqual(d['d'], 'vd')
self.assertEqual(d['e'], 've')
+ # Replacing item with different cost adjusts totalcost.
+ d.insert('e', 've', cost=4)
+ self.assertEqual(d.totalcost, 46)
+
# Touch entries in some order (both get and set).
d['e']
d['c'] = 'vc2'
@@ -63,12 +69,13 @@
def testcopypartial(self):
d = util.lrucachedict(4)
- d['a'] = 'va'
- d['b'] = 'vb'
+ d.insert('a', 'va', cost=4)
+ d.insert('b', 'vb', cost=2)
dc = d.copy()
self.assertEqual(len(dc), 2)
+ self.assertEqual(dc.totalcost, 6)
for key in ('a', 'b'):
self.assertIn(key, dc)
self.assertEqual(dc[key], 'v%s' % key)
@@ -80,8 +87,10 @@
d['c'] = 'vc'
del d['b']
+ self.assertEqual(d.totalcost, 4)
dc = d.copy()
self.assertEqual(len(dc), 2)
+ self.assertEqual(dc.totalcost, 4)
for key in ('a', 'c'):
self.assertIn(key, dc)
self.assertEqual(dc[key], 'v%s' % key)
@@ -93,7 +102,7 @@
def testcopyfull(self):
d = util.lrucachedict(4)
- d['a'] = 'va'
+ d.insert('a', 'va', cost=42)
d['b'] = 'vb'
d['c'] = 'vc'
d['d'] = 'vd'
@@ -104,13 +113,19 @@
self.assertIn(key, dc)
self.assertEqual(dc[key], 'v%s' % key)
+ self.assertEqual(d.totalcost, 42)
+ self.assertEqual(dc.totalcost, 42)
+
# 'a' should be dropped because it was least recently used.
dc['e'] = 've'
self.assertNotIn('a', dc)
for key in ('b', 'c', 'd', 'e'):
self.assertIn(key, dc)
self.assertEqual(dc[key], 'v%s' % key)
+ self.assertEqual(d.totalcost, 42)
+ self.assertEqual(dc.totalcost, 0)
+
# Contents and order of original dict should remain unchanged.
dc['b'] = 'vb_new'
@@ -120,25 +135,28 @@
def testcopydecreasecapacity(self):
d = util.lrucachedict(5)
- d['a'] = 'va'
- d['b'] = 'vb'
+ d.insert('a', 'va', cost=4)
+ d.insert('b', 'vb', cost=2)
d['c'] = 'vc'
d['d'] = 'vd'
dc = d.copy(2)
+ self.assertEqual(dc.totalcost, 0)
for key in ('a', 'b'):
self.assertNotIn(key, dc)
for key in ('c', 'd'):
self.assertIn(key, dc)
self.assertEqual(dc[key], 'v%s' % key)
- dc['e'] = 've'
+ dc.insert('e', 've', cost=7)
+ self.assertEqual(dc.totalcost, 7)
self.assertNotIn('c', dc)
for key in ('d', 'e'):
self.assertIn(key, dc)
self.assertEqual(dc[key], 'v%s' % key)
# Original should remain unchanged.
+ self.assertEqual(d.totalcost, 6)
for key in ('a', 'b', 'c', 'd'):
self.assertIn(key, d)
self.assertEqual(d[key], 'v%s' % key)
@@ -174,14 +192,16 @@
def testpopoldest(self):
d = util.lrucachedict(4)
- d['a'] = 'va'
- d['b'] = 'vb'
+ d.insert('a', 'va', cost=10)
+ d.insert('b', 'vb', cost=5)
self.assertEqual(len(d), 2)
self.assertEqual(d.popoldest(), ('a', 'va'))
self.assertEqual(len(d), 1)
+ self.assertEqual(d.totalcost, 5)
self.assertEqual(d.popoldest(), ('b', 'vb'))
self.assertEqual(len(d), 0)
+ self.assertEqual(d.totalcost, 0)
self.assertIsNone(d.popoldest())
d['a'] = 'va'
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1209,18 +1209,21 @@
Holds a reference to nodes on either side as well as a key-value
pair for the dictionary entry.
"""
- __slots__ = (u'next', u'prev', u'key', u'value')
+ __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
def __init__(self):
self.next = None
self.prev = None
self.key = _notset
self.value = None
+ self.cost = 0
def markempty(self):
"""Mark the node as emptied."""
self.key = _notset
+ self.value = None
+ self.cost = 0
class lrucachedict(object):
"""Dict that caches most recent accesses and sets.
@@ -1233,6 +1236,10 @@
we recycle head.prev and make it the new head. Cache accesses result in
the node being moved to before the existing head and being marked as the
new head node.
+
+ Items in the cache can be inserted with an optional "cost" value. This is
+ simply an integer that is specified by the caller. The cache can be queried
+ for the total cost of all items presently in the cache.
"""
def __init__(self, max):
self._cache = {}
@@ -1242,6 +1249,7 @@
head.next = head
self._size = 1
self.capacity = max
+ self.totalcost = 0
def __len__(self):
return len(self._cache)
@@ -1261,11 +1269,15 @@
self._movetohead(node)
return node.value
- def __setitem__(self, k, v):
+ def insert(self, k, v, cost=0):
+ """Insert a new item in the cache with optional cost value."""
node = self._cache.get(k)
# Replace existing value and mark as newest.
if node is not None:
+ self.totalcost -= node.cost
node.value = v
+ node.cost = cost
+ self.totalcost += cost
self._movetohead(node)
return
@@ -1277,17 +1289,24 @@
# At capacity. Kill the old entry.
if node.key is not _notset:
+ self.totalcost -= node.cost
del self._cache[node.key]
node.key = k
node.value = v
+ node.cost = cost
+ self.totalcost += cost
self._cache[k] = node
# And mark it as newest entry. No need to adjust order since it
# is already self._head.prev.
self._head = node
+ def __setitem__(self, k, v):
+ self.insert(k, v)
+
def __delitem__(self, k):
node = self._cache.pop(k)
+ self.totalcost -= node.cost
node.markempty()
# Temporarily mark as newest item before re-adjusting head to make
@@ -1306,6 +1325,7 @@
def clear(self):
n = self._head
while n.key is not _notset:
+ self.totalcost -= n.cost
n.markempty()
n = n.next
@@ -1336,7 +1356,7 @@
# We could potentially skip the first N items when decreasing capacity.
# But let's keep it simple unless it is a performance problem.
for i in range(len(self._cache)):
- result[n.key] = n.value
+ result.insert(n.key, n.value, cost=n.cost)
n = n.prev
return result
@@ -1359,6 +1379,7 @@
# And remove it from the cache and mark it as empty.
del self._cache[n.key]
+ self.totalcost -= n.cost
n.markempty()
return key, value
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -1904,6 +1904,11 @@
for i in xrange(sets):
setseq.append(random.randint(0, sys.maxint))
+ def doinserts():
+ d = util.lrucachedict(size)
+ for v in setseq:
+ d.insert(v, v)
+
def dosets():
d = util.lrucachedict(size)
for v in setseq:
@@ -1935,6 +1940,7 @@
benches = [
(doinit, b'init'),
(dogets, b'gets'),
+ (doinserts, b'inserts'),
(dosets, b'sets'),
(domixed, b'mixed')
]
To: indygreg, #hg-reviewers
Cc: lothiraldan, mercurial-devel
More information about the Mercurial-devel
mailing list