[PATCH 1 of 6 RFC] radixlink: introduce a new data structure
Jun Wu
quark at fb.com
Mon May 22 01:31:08 UTC 2017
# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1495396237 25200
# Sun May 21 12:50:37 2017 -0700
# Node ID fec85b1af16b0360e7bd78cd26d4d21edf678962
# Parent 4e51b2a99847904f1cc5a9c74784f19d69e829d0
# Available At https://bitbucket.org/quark-zju/hg-draft
# hg pull https://bitbucket.org/quark-zju/hg-draft -r fec85b1af16b
radixlink: introduce a new data structure
The radixlink structure is designed to address several hard-to-solve
performance issues in Mercurial. It is an on-disk radix-tree as index,
linking to linked list as data (so it could store multiple values per key).
For long, core Mercurial has no real key-value data structure, and various
operations have suboptimal time complexity:
1. changelog.rev(node): O(len(repo))
2. obsstore.precursors[node]: O(len(markers))
3. adjustlinkrev slow path (a complex problem)
The new data structure could make the first two O(len(node)), and provide a
cache for linkrevs where the keys could be "filenode + path + '\0'".
The next patches will try to solve the obsstore performance issue.
diff --git a/mercurial/radixlink.py b/mercurial/radixlink.py
new file mode 100644
--- /dev/null
+++ b/mercurial/radixlink.py
@@ -0,0 +1,244 @@
+# radixlink.py - on-disk radixtree index pointing to linked list data
+#
+# Copyright 2017 Facebook, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import
+
+import os
+import struct
+
+from . import (
+ error,
+)
+
+def _enlarge(buf, size):
+ """enlarge a bytearray to at least given size"""
+ l = len(buf)
+ if l < size:
+ buf += bytearray(size - l)
+
+def _tob16(buf):
+ """convert buffer to base16 bytearray"""
+ result = bytearray(len(buf) * 2)
+ for i, b in enumerate(bytearray(buf)):
+ result[i * 2] = b >> 4
+ result[i * 2 + 1] = b & 0xf
+ return result
+
+class radixlink(object):
+ """multimap with O(len(key) + len([value])) lookup and O(len(key)) insertion
+
+ A "radixlink" structure consists of 2 files: indexfile, and linkfile.
+
+ The indexfile is the radix tree with pointers to linkfile:
+
+ indexdata := index-header + index-entry-list
+ index-header := source of truth offset (8B)
+ index-entry-list := '' | index-entry-list + index-entry
+ index-entry := radix-entry | leaf-entry
+ radix-entry := '\0' + index-offset (4B) * 16
+ leaf-entry := '\1' + link-offset (4B) + key-length (2B) + key
+
+ note: leaf-entry is at least len(radix-entry) long so it could be
+ converted to radix-entry in-place.
+
+ The linkfile is an append-only linked list:
+
+ linkdata := link-header + link-entry-list
+ link-header := '\0'
+ link-entry-list := '' | link-entry-list + link-entry
+ link-entry := next-link-offset (4B) + value (4B)
+
+ radixlink should be used as caches, source of truth should be an
+ append-only data structure. The header in indexfile consists an offset
+ about the append-only source of truth so revlink could be updated
+ incrementally.
+
+ radixlink does not support two keys where one is prefix of the other.
+ Application should use same-length keys, or append unique suffix to avoid
+ that.
+ """
+
+ indexheader = struct.Struct('>Q')
+ indexoffset = struct.Struct('>I')
+ linkoffset = struct.Struct('>I')
+
+ keylen = struct.Struct('>H')
+ radixentrysize = 1 + indexoffset.size * 16
+
+ radixentrytype = 0
+ leafentrytype = 1
+
+ linkentry = struct.Struct('>II')
+
+ emptyindex = b'\0' * (indexheader.size + radixentrysize)
+ emptylink = b'\0'
+
+ def __init__(self, vfs, indexfile, linkfile=None):
+ linkfile = linkfile or indexfile + '.l'
+ self.vfs = vfs
+ self.indexfile = indexfile
+ self.linkfile = linkfile
+ self.indexdata = bytearray(vfs.tryread(indexfile) or self.emptyindex)
+ self.linkdata = bytearray(vfs.tryread(linkfile) or self.emptylink)
+ self._dirty = False
+
+ def flush(self):
+ if self._dirty:
+ for path, data in [(self.linkfile, self.linkdata),
+ (self.indexfile, self.indexdata)]:
+ f = self.vfs(path, 'wb', atomictemp=True)
+ f.write(data)
+ f.close()
+ self._dirty = False
+
+ @property
+ def truthoffset(self):
+ """position of the append-only source of truth the index contains"""
+ if len(self.indexdata) >= self.indexheader.size:
+ return self.indexheader.unpack_from(self.indexdata)[0]
+ return 0
+
+ @truthoffset.setter
+ def truthoffset(self, newoffset):
+ if self.truthoffset == newoffset:
+ return
+ self.indexheader.pack_into(self.indexdata, 0, newoffset)
+ self._dirty = True
+
+ def destroy(self):
+ """destory the revlink cache so it has to be built from scratch"""
+ self.vfs.tryunlink(self.indexfile)
+ self.vfs.tryunlink(self.linkfile)
+ self.indexdata = bytearray(self.emptyindex)
+ self.linkdata = bytearray(self.emptylink)
+
+ def get(self, key):
+ """key: bytearray"""
+ # starting index offset and suboffset
+ ioff = self.indexheader.size
+ bkey = _tob16(key)
+ boff = 0
+ while boff < len(bkey):
+ ioff, boff, _poff = self._followindex(ioff, bkey, boff)
+ if ioff == 0:
+ return []
+ # read linkdata
+ ioff, loff = self._readlinkoffset(ioff)
+ result = []
+ while loff != 0:
+ loff, value = self.linkentry.unpack_from(self.linkdata, loff)
+ result.append(value)
+ return result
+
+ def insert(self, key, value):
+ """key: int, value: buffer"""
+ ioff = self.indexheader.size
+ bkey = _tob16(key)
+ boff = 0
+ data = self.indexdata
+ while boff < len(bkey):
+ ioff, boff, poff = self._followindex(ioff, bkey, boff, True)
+ if ioff == 0:
+ # create new leaf
+ assert poff != 0
+ ioff = self._appendleafentry(bkey[boff:])
+ self.indexoffset.pack_into(data, poff, ioff)
+ break
+
+ # update linked list
+ ioff, loff = self._readlinkoffset(ioff)
+ newloff = self._appendlinkvalue(value, loff)
+ self.linkoffset.pack_into(data, ioff, newloff)
+ self._dirty = True
+
+ def _followindex(self, offset, b16list, b16offset, splitleaf=False):
+ """follow index at char b16list[b16offset]
+
+ return (offset, b16offset, poffset) or raise
+ """
+ data = self.indexdata
+ type = data[offset]
+ if data[offset] == self.leafentrytype:
+ koff = offset + 1 + self.linkoffset.size
+ klen = self.keylen.unpack_from(data, koff)[0]
+ koff += self.keylen.size
+ if data[koff:koff + klen] == b16list[b16offset:b16offset + klen]:
+ if len(b16list) - b16offset > klen:
+ raise error.ProgrammingError('existing key cannot be '
+ 'prefix of new key')
+ return (offset, len(b16list), 0)
+ if not splitleaf:
+ return (0, b16offset, 0)
+ self._splitleaf(offset)
+ # radix entry
+ assert data[offset] == self.radixentrytype
+ b16 = b16list[b16offset]
+ poffset = offset + 1 + b16 * self.indexoffset.size
+ nextoffset = self.indexoffset.unpack_from(data, poffset)[0]
+ return (nextoffset, b16offset + 1, poffset)
+
+ def _readlinkoffset(self, ioffset):
+ """given offset of an index-entry, return (index-offset, link-offset)
+
+ index-offset is an offset in indexdata, where link-offset is stored.
+ link-offset is an offset in linkdata.
+ """
+ # must be a leaf-entry
+ type = self.indexdata[ioffset]
+ ioffset += 1
+ if type != self.leafentrytype:
+ raise error.ProgrammingError('key cannot be prefix of existing key')
+ loffset = self.linkoffset.unpack_from(self.indexdata, ioffset)[0]
+ return (ioffset, loffset)
+
+ def _appendleafentry(self, bkey, linkoffset=0):
+ """append a leaf entry to indexdata, return offset of that entry"""
+ # leaf-entry := '\1' + link-offset (4B) + key-length (2B) + key
+ data = self.indexdata
+ offset = len(data)
+ buf = bytearray([self.leafentrytype])
+ buf += self.linkoffset.pack(linkoffset)
+ buf += self.keylen.pack(len(bkey))
+ buf += bkey
+ # leaf-entry is at least len(radix-entry) long
+ _enlarge(buf, self.radixentrysize)
+ data += buf
+ return offset
+
+ def _splitleaf(self, offset):
+ """split a leaf entry into a radix entry and another leaf entry
+
+ leaf entry at offset will be turned into a radix entry.
+ return new offset of the new leaf entry.
+ """
+ data = self.indexdata
+ assert data[offset] == self.leafentrytype
+
+ loff = self.linkoffset.unpack_from(data, offset + 1)[0]
+ koff = offset + 1 + self.linkoffset.size
+ klen = self.keylen.unpack_from(data, koff)[0]
+ if klen == 0:
+ raise error.ProgrammingError('existing key cannot be prefix '
+ 'of new key')
+ koff += self.keylen.size
+ key = data[koff:koff + klen]
+
+ # create a new leaf entry
+ newoffset = self._appendleafentry(key[1:], loff)
+
+ # rewrite entry to radix in-place
+ radixbuf = bytearray(self.radixentrysize)
+ self.indexoffset.pack_into(radixbuf, 1 + key[0] *
+ self.indexoffset.size, newoffset)
+ data[offset:offset + len(radixbuf)] = radixbuf
+ return newoffset
+
+ def _appendlinkvalue(self, value, nextoffset=0):
+ """append value to linkdata, return offset"""
+ offset = len(self.linkdata)
+ entry = self.linkentry.pack(nextoffset, value)
+ self.linkdata += entry
+ return offset
diff --git a/tests/test-radixlink.py b/tests/test-radixlink.py
new file mode 100644
--- /dev/null
+++ b/tests/test-radixlink.py
@@ -0,0 +1,61 @@
+from __future__ import absolute_import
+
+import os
+import random
+
+from mercurial import (
+ error,
+ radixlink,
+ vfs,
+)
+
+randint = random.randint
+
+def ensure(expr):
+ if not expr:
+ raise RuntimeError, 'unexpected'
+
+fs = vfs.vfs(os.environ['TESTTMP'])
+
+rl1 = radixlink.radixlink(fs, 'rl-test')
+rl1.destroy()
+
+# basic functions
+ensure(rl1.get('notfound') == [])
+ensure(rl1.truthoffset == 0)
+
+# prefix key detection
+rl1.insert('abc', 3)
+for buggykey in ['abcdef', 'ab']:
+ try:
+ rl1.insert('abcdef', 3)
+ raise RuntimeError, 'k1 being prefix of k2 should not be allowed'
+ except error.ProgrammingError:
+ pass
+rl1.destroy()
+
+# insert random data
+def urandom(n):
+ s = bytearray(n + 1)
+ for i in xrange(n):
+ s[i] = randint(0, 254)
+ s[n] = 255
+ return s
+
+d = {}
+for i in range(10000):
+ k = bytes(urandom(randint(0, 20)))
+ v = randint(0, 4294967295)
+ rl1.insert(k, v)
+ d.setdefault(k, []).insert(0, v)
+ v2 = rl1.get(k)
+ ensure(v2[0] == v)
+
+rl1.truthoffset = 333
+rl1.flush()
+
+# reload and verify against d
+rl2 = radixlink.radixlink(fs, 'rl-test')
+ensure(rl2.truthoffset == 333)
+for k, vs in d.items():
+ ensure(rl2.get(k) == vs)
More information about the Mercurial-devel
mailing list