D6259: revset: on-disk cache for children queries

joerg.sonnenberger (Joerg Sonnenberger) phabricator at mercurial-scm.org
Tue Apr 16 22:43:44 UTC 2019


joerg.sonnenberger created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This is a proof of concept for further discussion.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6259

AFFECTED FILES
  mercurial/localrepo.py
  mercurial/revset.py

CHANGE DETAILS

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -385,13 +385,13 @@
     cs = set()
     for r in getset(repo, fullreposet(repo), x):
         for i in range(n):
-            c = repo[r].children()
+            c = repo._childrencache.children(r)
             if len(c) == 0:
                 break
             if len(c) > 1:
                 raise error.RepoLookupError(
                     _("revision in set has more than one child"))
-            r = c[0].rev()
+            r = list(c)[0]
         else:
             cs.add(r)
     return subset & cs
@@ -586,18 +586,7 @@
 def _children(repo, subset, parentset):
     if not parentset:
         return baseset()
-    cs = set()
-    pr = repo.changelog.parentrevs
-    minrev = parentset.min()
-    nullrev = node.nullrev
-    for r in subset:
-        if r <= minrev:
-            continue
-        p1, p2 = pr(r)
-        if p1 in parentset:
-            cs.add(r)
-        if p2 != nullrev and p2 in parentset:
-            cs.add(r)
+    cs = set.union(*[set(repo._childrencache.children(c)) for c in parentset])
     return baseset(cs)
 
 @predicate('children(set)', safe=True)
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -11,6 +11,7 @@
 import hashlib
 import os
 import random
+import struct
 import sys
 import time
 import weakref
@@ -994,6 +995,7 @@
 
         self._branchcaches = {}
         self._revbranchcache = None
+        self.__childrencache = None
         self._filterpats = {}
         self._datafilters = {}
         self._transref = self._lockref = self._wlockref = None
@@ -1084,6 +1086,8 @@
     def _writecaches(self):
         if self._revbranchcache:
             self._revbranchcache.write()
+        if self.__childrencache:
+            self.__childrencache.write()
 
     def _restrictcapabilities(self, caps):
         if self.ui.configbool('experimental', 'bundle2-advertise'):
@@ -1194,6 +1198,11 @@
         return manifest.manifestlog(self.svfs, self, rootstore,
                                     self._storenarrowmatch)
 
+    @unfilteredpropertycache
+    def _childrencache(self):
+        self.__childrencache = childrencache(self)
+        return self.__childrencache
+
     @repofilecache('dirstate')
     def dirstate(self):
         return self._makedirstate()
@@ -2087,6 +2096,8 @@
             # ensure the working copy parents are in the manifestfulltextcache
             for ctx in self['.'].parents():
                 ctx.manifest()  # accessing the manifest is enough
+            self._childrencache.clear()
+            self._childrencache.write()
 
     def invalidatecaches(self):
 
@@ -2097,6 +2108,8 @@
         self.unfiltered()._branchcaches.clear()
         self.invalidatevolatilesets()
         self._sparsesignaturecache.clear()
+        self._childrencache.clear()
+        self._childrencache.write()
 
     def invalidatevolatilesets(self):
         self.filteredrevcache.clear()
@@ -3087,3 +3100,110 @@
     # We may have a repoview, which intercepts __setattr__. So be sure
     # we operate at the lowest level possible.
     object.__setattr__(repo, r'__class__', poisonedrepository)
+
+class childrencache(object):
+    """Persistent cache, mapping from revision number to revision numbers of
+    the direct children. This is a low level cache, independent of filtering.
+    """
+    _filename = 'childrencache'
+    def __init__(self, repo, readonly=False):
+        assert repo.filtername is None
+        self._data = None
+        self._new_data = {}
+        self._read = False
+        self._repo = repo
+        self._readonly = readonly
+
+    def children(self, rev):
+        if not self._read:
+            self.read()
+        children = self._new_data.get(rev, set())
+        rev += 1
+        if rev >= self._lastknown:
+            return children
+        children = children.copy()
+        child_len, child_off = struct.unpack_from('>ll', self._data, 8 * rev)
+        if child_len == 1:
+            children.add(child_off)
+        elif child_len > 1:
+            start = 8 * self._lastknown + child_off * 4
+            if len(self._data) < start + child_len * 4:
+                raise error.Abort('Truncated children cache')
+            for i in xrange(child_len):
+                child, = struct.unpack_from('>l', self._data, start)
+                children.add(child)
+                start += 4
+        return children
+
+    def clear(self):
+        self._read = True
+        self._data = None
+        self._new_data = {}
+        self._lastknown = 0
+        self._update_changes(0)
+
+    def _update_changes(self, start):
+        idx = self._repo.changelog.index
+        for r in xrange(start, len(self._repo)):
+            rev = idx[r]
+            if rev[5] != nullrev:
+                self._new_data.setdefault(rev[5], set()).add(r)
+            if rev[6] != nullrev and rev[5] != rev[6]:
+                self._new_data.setdefault(rev[6], set()).add(r)
+            if rev[5] == nullrev and rev[6] == nullrev:
+                self._new_data.setdefault(nullrev, set()).add(r)
+
+    def read(self):
+        if self._read:
+            return
+        self._read = True
+        try:
+            data = self._repo.cachevfs.read(self._filename)
+        except (OSError, IOError):
+            data = ""
+        if len(data) < 24:
+            self._repo.ui.debug('children cache missing or too short, ignored')
+            self._lastknown = 0
+            self._update_changes(0)
+            return
+        tip, lastrev = struct.unpack_from('>20sl', data, 0)
+        if lastrev == 0:
+            self._repo.ui.debug('children cache empty, ignored')
+            self._lastknown = 0
+            self._update_changes(0)
+            return
+        if lastrev > len(self._repo) + 1 or self._repo.changelog.node(lastrev - 2) != tip:
+            self._lastknown = 0
+            self._update_changes(0)
+            self._repo.ui.debug('children cache does not match current repository state, ignored')
+            return
+        data = data[24:]
+        if lastrev * 8 > len(data):
+            self._lastknown = 0
+            self._update_changes(0)
+            self._repo.ui.debug('children cache was truncated, ignored')
+            return
+        self._data = data
+        self._lastknown = lastrev
+        self._update_changes(lastrev)
+
+    def write(self):
+        if not self._new_data or self._readonly:
+            return
+        lastrev = len(self._repo) + 1
+        data = [struct.pack('>20sl', self._repo.changelog.tip(), lastrev)]
+        data2 = []
+        for r in xrange(lastrev):
+            children = self.children(r - 1)
+            data.append(struct.pack('>l', len(children)))
+            if len(children) == 0:
+                data.append(struct.pack('>l', 0))
+            elif len(children) == 1:
+                children = list(children)
+                data.append(struct.pack('>l', children[0]))
+            else:
+                data.append(struct.pack('>l', len(data2)))
+                for c in children:
+                    data2.append(struct.pack('>l', c))
+        output = b''.join(data) + b''.join(data2)
+        self._repo.cachevfs.write(self._filename, output, atomictemp=True)



To: joerg.sonnenberger, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list