[PATCH 02 of 22] radixlink: introduce a new data structure
Jun Wu
quark at fb.com
Sun Jun 4 19:59:14 EDT 2017
# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1496536858 25200
# Sat Jun 03 17:40:58 2017 -0700
# Node ID e8e8d713e4b774f6894ee65723e7fdc12bf8a101
# Parent 5c3500de0a229d8aa08058bccec531d8a85b90d9
# Available At https://bitbucket.org/quark-zju/hg-draft
# hg pull https://bitbucket.org/quark-zju/hg-draft -r e8e8d713e4b7
radixlink: introduce a new data structure
The radixlink structure is designed to address several hard-to-solve
performance issues in Mercurial. It is an on-disk base16 radix-tree, linking
to a linked list of integers. It's a "multimap<string, list<uint32_t>>" in
C++ term.
For long, core Mercurial has no real key-value data structure supporting
efficient lookups, and various operations have suboptimal time complexity:
1. changelog.rev(node): O(len(repo))
2. obsstore.precursors[node]: O(len(markers))
3. adjustlinkrev slow path (a complex problem)
The new data structure could make the first two O(len(node)), and provide a
cache for linkrevs where the keys could be "filenode + path" (or another
mapping could be used to map path to short bytes).
Note: we already have a radix tree implementation in revlog.c. However it is
coupled with CPython API and uses in-memory pointers, making it harder to be
reused. It seems to be easier to just re-invent an on-disk version.
diff --git a/mercurial/pure/radixlink.py b/mercurial/pure/radixlink.py
new file mode 100644
--- /dev/null
+++ b/mercurial/pure/radixlink.py
@@ -0,0 +1,236 @@
+# radixlink.py - on-disk radixtree index pointing to linked list data
+#
+# Copyright 2017 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import
+
+import struct
+
+def _enlarge(buf, size):
+ """enlarge a bytearray to at least given size"""
+ l = len(buf)
+ if l < size:
+ buf += bytearray(size - l)
+
+def _tob16(buf):
+ """convert buffer to base16 bytearray"""
+ result = bytearray(len(buf) * 2)
+ for i, b in enumerate(bytearray(buf)):
+ result[i * 2] = b >> 4
+ result[i * 2 + 1] = b & 0xf
+ return result
+
+class radixlink(object):
+ """multimap. keys are variable-length bytes, values are lists of uint32s.
+
+ O(len(key)) lookup and insertion.
+
+ A "radixlink" consists of 2 logical parts: index, and link. "index" is a
+ base16 radix tree with pointers to link. "link" stores linked lists.
+
+ The actual implementation mixes the above 2 parts into 1 buffer for
+ simplicity and easy transaction handling. The format of the buffer is:
+
+ radixlink := header + entry-list
+ header := source-of-truth-size (4B)
+ entry-list := radix-entry | entry-list + entry
+ entry := index-entry | link-entry
+ index-entry := radix-entry | leaf-entry
+ radix-entry := '\0' * 4 + link-offset (4B) + index-offset (4B) * 16
+ leaf-entry := key-length (4B) + link-offset (4B) + key + padding
+ link-entry := next-link-offset (4B) + value (4B)
+
+ leaf-entry's key-length cannot be 0, and is at least len(radix-entry)
+ long so it can be converted to radix-entry in-place later.
+
+ radixlink should be used as a cache, whose source of truth should be an
+ append-only data structure. The header in indexfile consists a number
+ about the size (physically, or logically like revision number) of
+ append-only source of truth used to build radixlink. This makes it easy to
+ update radixlink incrementally.
+
+ radixlink files are expected living in .hg/cache. It's designed to be
+ simple so it does not even have a version header. Shall an incompatible
+ format change happens, we can use different file names in .hg/cache.
+ """
+
+ sourcelen = struct.Struct('>I')
+ headersize = sourcelen.size
+
+ indexoffset = struct.Struct('>I')
+ linkoffset = indexoffset
+
+ keylen = struct.Struct('>I')
+ radixentrysize = keylen.size + linkoffset.size + indexoffset.size * 16
+
+ linkentry = struct.Struct('>II')
+
+ emptyindex = b'\0' * (headersize + radixentrysize)
+
+ def __init__(self, data=None):
+ self.data = data
+
+ def __len__(self):
+ """raw buffer length, could be used to do "isdirty" check"""
+ return len(self._data)
+
+ @property
+ def data(self):
+ """copy of internal state useful for writing to filesystem"""
+ return bytes(self._data)
+
+ @data.setter
+ def data(self, newdata):
+ """replace internal state, useful for loading from filesystem"""
+ self._data = bytearray(newdata or self.emptyindex)
+
+ @property
+ def sourceoftruthsize(self):
+ """bytes of the append-only source of truth used to update the index"""
+ return self.sourcelen.unpack_from(self._data)[0]
+
+ @sourceoftruthsize.setter
+ def sourceoftruthsize(self, newsize):
+ if self.sourceoftruthsize == newsize:
+ return
+ self.sourcelen.pack_into(self._data, 0, newsize)
+
+ def _get(self, key):
+ """lookup a key, return the value list, or an empty array"""
+ ioff = self.headersize
+ bkey = _tob16(key)
+ boff = 0
+ while boff <= len(bkey):
+ ioff, boff, _poff = self._followindex(ioff, bkey, boff)
+ if ioff == 0:
+ return []
+ _ioff, loff = self._readindexlinkoffset(ioff)
+ return self._readlinkvalues(loff)
+
+ def __contains__(self, key):
+ return bool(self._get(key))
+
+ def __getitem__(self, key):
+ v = self._get(key)
+ if not v:
+ raise KeyError(key)
+ return v
+
+ def insert(self, key, value):
+ """D[key (bytes)] = [value (uint32)] + D.get(key, [])"""
+ ioff = self.headersize
+ bkey = _tob16(key)
+ boff = 0
+ data = self._data
+ while boff <= len(bkey):
+ ioff, boff, poff = self._followindex(ioff, bkey, boff, True)
+ if ioff == 0:
+ # create new leaf
+ assert poff != 0
+ ioff = self._appendleafentry(bkey[boff:])
+ self.indexoffset.pack_into(data, poff, ioff)
+ break
+
+ # update linked list
+ ioff, loff = self._readindexlinkoffset(ioff)
+ newloff = self._appendlinkentry(value, loff)
+ self.linkoffset.pack_into(data, ioff, newloff)
+
+ def _followindex(self, offset, b16list, b16offset, splitleaf=False):
+ """follow an index-entry at offset
+
+ For a radix-entry, lookup b16list[b16offset], and return new pointers.
+ For a leaf-entry, check the remaining of b16list.
+
+ If splitleaf is True, split a leaf-entry where b16list does not match.
+ Useful for adding new index-entries.
+
+ return (nextoffset, b16offset, poffset) or raise. poffset is the offset
+ of nextoffset. Useful for being rewritten to newly added index-entry.
+ """
+ data = self._data
+ klen = self.keylen.unpack_from(self._data, offset)[0]
+ koff = offset + self.keylen.size + self.linkoffset.size
+ if klen > 0: # leaf entry
+ if data[koff:koff + klen] == b16list[b16offset:]:
+ return (offset, len(b16list) + 1, 0)
+ if not splitleaf:
+ return (0, b16offset, 0)
+ self._splitleafentry(offset)
+ # radix entry
+ klen = self.keylen.unpack_from(self._data, offset)[0]
+ assert klen == 0
+ if b16offset >= len(b16list):
+ # no need to follow
+ return (offset, b16offset + 1, 0)
+ b16 = b16list[b16offset]
+ poffset = koff + b16 * self.indexoffset.size
+ nextoffset = self.indexoffset.unpack_from(data, poffset)[0]
+ return (nextoffset, b16offset + 1, poffset)
+
+ def _readindexlinkoffset(self, ioffset):
+ """read an index-entry, return (index-offset, link-offset)"""
+ ioffset += self.keylen.size
+ loffset = self.linkoffset.unpack_from(self._data, ioffset)[0]
+ return (ioffset, loffset)
+
+ def _readlinkvalues(self, loffset):
+ """return list of values (uint32)"""
+ result = []
+ data = self._data
+ while loffset != 0:
+ loffset, value = self.linkentry.unpack_from(data, loffset)
+ result.append(value)
+ return result
+
+ def _appendleafentry(self, bkey, linkoffset=0):
+ """append a leaf-entry, return offset of that entry
+
+ when len(bkey) is 0, append an empty radix-entry instead.
+ """
+ data = self._data
+ offset = len(data)
+ buf = bytearray()
+ buf += self.keylen.pack(len(bkey))
+ buf += self.linkoffset.pack(linkoffset)
+ buf += bkey
+ # leaf-entry is at least len(radix-entry) long
+ _enlarge(buf, self.radixentrysize)
+ data += buf
+ return offset
+
+ def _appendlinkentry(self, value, nextoffset=0):
+ """append a link entry, return its offset"""
+ linkdata = self._data
+ offset = len(linkdata)
+ entry = self.linkentry.pack(nextoffset, value)
+ linkdata += entry
+ return offset
+
+ def _splitleafentry(self, offset):
+ """split leaf-entry into a radix-entry and another leaf-entry
+
+ return offset of the new leaf entry.
+ """
+ data = self._data
+ klen = self.keylen.unpack_from(data, offset)[0]
+ assert klen > 0 # must be a leaf
+
+ koff = offset + self.keylen.size
+ loff = self.linkoffset.unpack_from(data, koff)[0]
+ koff += self.linkoffset.size
+ key = data[koff:koff + klen]
+
+ # create a new leaf entry
+ newoffset = self._appendleafentry(key[1:], loff)
+
+ # rewrite entry to radix in-place
+ radixbuf = bytearray(self.radixentrysize)
+ self.indexoffset.pack_into(radixbuf, self.linkoffset.size +
+ self.keylen.size + key[0] *
+ self.indexoffset.size, newoffset)
+ data[offset:offset + len(radixbuf)] = radixbuf
+ return newoffset
+
diff --git a/tests/test-radixlink.py b/tests/test-radixlink.py
new file mode 100644
--- /dev/null
+++ b/tests/test-radixlink.py
@@ -0,0 +1,198 @@
+from __future__ import absolute_import
+
+import random
+import unittest
+
+from mercurial import (
+ policy,
+ util,
+)
+
+import silenttestrunner
+
+# dumpradixlink need some struct.Struct definitions from pure
+from mercurial.pure import radixlink as pureradixlink
+
+# main test respects modulepolicy
+radixlink = policy.importmod(r'radixlink')
+
+randint = random.randint
+
+# conservative LONG_MAX (minimal possible value defined by Python)
+longmax = 0x7fffffff
+
+# keys for manual test - being prefix of other keys is more interesting
+interestingkeys = ['abc', 'a', '', 'abcde', 'abcdg', 'abcd', 'abcxy', 'abcyz',
+ 'b', 'xyz']
+
+# expected content dump for testfixedcase
+expectedfixedcase = '''
+ 4 | link 1324 | radix 6: 76 7: 1228
+ 76 | link 0 | radix 1: 156 2: 1148
+ 148 | link 0 | value 0
+ 156 | link 1316 | radix 6: 228
+ 228 | link 0 | radix 2: 316
+ 300 | link 0 | value 1
+ 308 | link 0 | value 2
+ 316 | link 0 | radix 6: 388
+ 388 | link 0 | radix 3: 460
+ 460 | link 1308 | radix 6: 532 7: 916
+ 532 | link 0 | radix 4: 612
+ 604 | link 0 | value 3
+ 612 | link 1348 | radix 6: 684
+ 684 | link 0 | radix 5: 756 7: 828
+ 756 | link 1332 | radix
+ 828 | link 1340 | radix
+ 900 | link 0 | value 4
+ 908 | link 0 | value 5
+ 916 | link 0 | radix 8: 996 9: 1068
+ 988 | link 0 | value 6
+ 996 | link 1356 | leaf [7, 9]
+ 1068 | link 1364 | leaf [7, 10]
+ 1140 | link 0 | value 7
+ 1148 | link 1372 | radix
+ 1220 | link 0 | value 8
+ 1228 | link 1380 | leaf [8, 7, 9, 7, 10]
+ 1300 | link 0 | value 9
+ 1308 | link 148 | value 0
+ 1316 | link 300 | value 10
+ 1324 | link 308 | value 20
+ 1332 | link 604 | value 30
+ 1340 | link 900 | value 40
+ 1348 | link 908 | value 50
+ 1356 | link 988 | value 60
+ 1364 | link 1140 | value 70
+ 1372 | link 1220 | value 80
+ 1380 | link 1300 | value 90
+'''
+
+def dumpradixlink(rl, fout):
+ """dump radixlink content in a human-readable way"""
+ p = pureradixlink.radixlink
+ data = rl.data
+ linkoffsets = set()
+ descs = {} # {offset: entry-desc}
+ tovisit = [(p.headersize, 'index')] # [(offset, 'index' | 'link')]
+ visited = set([0])
+ while tovisit:
+ offset, type = tovisit.pop()
+ if offset in visited:
+ continue
+ visited.add(offset)
+ if type == 'link':
+ nextloff, value = p.linkentry.unpack_from(data, offset)
+ desc = 'link%5d | value %d' % (nextloff, value)
+ if nextloff:
+ tovisit.append((nextloff, 'link'))
+ elif type == 'index':
+ koff = offset
+ klen = p.keylen.unpack_from(data, koff)[0]
+ koff += p.keylen.size
+ loff = p.linkoffset.unpack_from(data, koff)[0]
+ linkoffsets.add(loff)
+ koff += p.linkoffset.size
+ desc = 'link%5d | ' % loff
+ tovisit.append((loff, 'link'))
+ if klen > 0:
+ key = data[koff:koff + klen]
+ desc += 'leaf %r' % list(bytearray(key))
+ else:
+ desc += 'radix'
+ for i in xrange(16):
+ v = p.indexoffset.unpack_from(
+ data, koff + i * p.indexoffset.size)[0]
+ if v > 0:
+ desc += ' %d: %d' % (i, v)
+ tovisit.append((v, 'index'))
+ descs[offset] = desc
+ for offset, desc in sorted(descs.items()):
+ fout.write('%5d | %s\n' % (offset, desc))
+ fout.flush()
+
+def splitstriplines(text):
+ lines = [l.strip() for l in text.splitlines()]
+ return [l for l in lines if l]
+
+class testradixlink(unittest.TestCase):
+ def randombytes(self, length, maxint):
+ s = bytearray(length)
+ for i in xrange(length):
+ s[i] = randint(0, maxint)
+ return bytes(s)
+
+ def testfixedcase(self):
+ rl = radixlink.radixlink()
+ self.assertEqual(len(rl), 4 + 72)
+ for i, k in enumerate(interestingkeys):
+ with self.assertRaises(KeyError):
+ rl[k]
+ rl.insert(k, i)
+ self.assertEqual(rl[k], [i])
+ self.assertEqual(len(rl), 1308)
+ for i, k in enumerate(interestingkeys):
+ self.assertEqual(rl[k], [i])
+ subkey = k[:len(k) - 1]
+ if subkey not in interestingkeys:
+ with self.assertRaises(KeyError):
+ rl[subkey]
+ rl.insert(k, i * 10)
+ self.assertEqual(rl[k], [i * 10, i])
+ self.assertEqual(len(rl), 1388)
+ fout = util.stringio()
+ dumpradixlink(rl, fout)
+ self.assertEqual(splitstriplines(fout.getvalue()),
+ splitstriplines(expectedfixedcase))
+
+ def _testrandom(self, n, minkeylen, maxkeylen, maxkeyint):
+ rl = radixlink.radixlink()
+ d = {}
+ valuemax = longmax
+ for i in range(n):
+ k = self.randombytes(randint(minkeylen, maxkeylen), maxkeyint)
+ v = randint(0, valuemax)
+ rl.insert(k, v)
+ d.setdefault(k, []).insert(0, v)
+ self.assertEqual(rl[k], d[k])
+ rl.sourceoftruthsize = truthsize = randint(0, valuemax)
+ buf = rl.data
+ rl = radixlink.radixlink(buf) # serialize and de-serialize
+ for k, v in d.items():
+ self.assertTrue(k in rl)
+ self.assertEqual(rl[k], v)
+ for altkey in (k[:len(k) - 1], k + b'\0'):
+ self.assertEqual(altkey in rl, altkey in d)
+ if altkey not in d:
+ with self.assertRaises(KeyError):
+ rl[altkey]
+ self.assertEqual(rl.sourceoftruthsize, truthsize)
+
+ def testrandomdense(self):
+ self._testrandom(5000, 0, 5, 5)
+ self._testrandom(5000, 10, 10, 1)
+
+ def testrandomsparse(self):
+ self._testrandom(1000, 0, 1000, 255)
+
+ @unittest.skipIf(radixlink is pureradixlink, 'unnecessary for pure')
+ def testbinarycompatibilty(self):
+ # binary representation should be the same with pure
+ rlnative = radixlink.radixlink()
+ rlpure = pureradixlink.radixlink()
+ self.assertEqual(len(rlnative), len(rlpure))
+ for i, k in enumerate(interestingkeys):
+ rlnative.insert(k, i)
+ rlpure.insert(k, i)
+ valuemax = longmax
+ for i in range(1000):
+ k = self.randombytes(randint(0, 1000), 255)
+ v = randint(0, valuemax)
+ rlnative.insert(k, v)
+ rlpure.insert(k, v)
+ truthsize = randint(0, valuemax)
+ rlnative.sourceoftruthsize = truthsize
+ rlpure.sourceoftruthsize = truthsize
+ self.assertEqual(len(rlnative), len(rlpure))
+ self.assertEqual(rlnative.data, rlpure.data)
+
+if __name__ == '__main__':
+ silenttestrunner.main(__name__)
More information about the Mercurial-devel
mailing list