[PATCH 10 of 10 V2] treemanifest: optimize diff using the matcher

Durham Goode durham at fb.com
Tue Mar 7 22:22:42 EST 2017


# HG changeset patch
# User Durham Goode <durham at fb.com>
# Date 1488943242 28800
#      Tue Mar 07 19:20:42 2017 -0800
# Node ID 541bf866729342f534bac425bd8f01b9fe7564e8
# Parent  611fac63adb09c326912e56df59c828ad12ffd9f
treemanifest: optimize diff using the matcher

This optimizes treemanifest.diff() to limit the tree traversal based on the
provided matcher. According to Martin's testing, `hg status --rev .~1 --rev .
foo/` goes from 1.3s to 0.18s on a tree version of Mozilla central. I'd expect
and even greater saving on larger internal repos at big companies.

A previous patch added test coverage for treemanifest diff with patterns.

diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -1053,10 +1053,6 @@ class treemanifest(object):
         the nodeid will be None and the flags will be the empty
         string.
         '''
-        if match:
-            m1 = self._matches(match)
-            m2 = m2._matches(match)
-            return m1.diff(m2, clean=clean)
         result = {}
         emptytree = treemanifest()
         def _diff(t1, t2):
@@ -1065,26 +1061,31 @@ class treemanifest(object):
             t1._load()
             t2._load()
             for d, m1 in t1._dirs.iteritems():
-                m2 = t2._dirs.get(d, emptytree)
-                _diff(m1, m2)
+                if not match or match.visitdir(os.path.join(t1.dir(), d[:-1])):
+                    m2 = t2._dirs.get(d, emptytree)
+                    _diff(m1, m2)
 
             for d, m2 in t2._dirs.iteritems():
                 if d not in t1._dirs:
-                    _diff(emptytree, m2)
+                    if (not match or match.visitdir(os.path.join(t2.dir(),
+                                                                 d[:-1]))):
+                        _diff(emptytree, m2)
 
             for fn, n1 in t1._files.iteritems():
-                fl1 = t1._flags.get(fn, '')
-                n2 = t2._files.get(fn, None)
-                fl2 = t2._flags.get(fn, '')
-                if n1 != n2 or fl1 != fl2:
-                    result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
-                elif clean:
-                    result[t1._subpath(fn)] = None
+                if not match or match(os.path.join(t1.dir(), fn)):
+                    fl1 = t1._flags.get(fn, '')
+                    n2 = t2._files.get(fn, None)
+                    fl2 = t2._flags.get(fn, '')
+                    if n1 != n2 or fl1 != fl2:
+                        result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
+                    elif clean:
+                        result[t1._subpath(fn)] = None
 
             for fn, n2 in t2._files.iteritems():
                 if fn not in t1._files:
-                    fl2 = t2._flags.get(fn, '')
-                    result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
+                    if not match or match(os.path.join(t2.dir(), fn)):
+                        fl2 = t2._flags.get(fn, '')
+                        result[t2._subpath(fn)] = ((None, ''), (n2, fl2))
 
         _diff(self, m2)
         return result


More information about the Mercurial-devel mailing list