[PATCH 1 of 2] match: remove unnecessary optimization where visitdir() returns 'all'

Drew Gottlieb drgott at google.com
Mon May 18 21:55:49 UTC 2015


# HG changeset patch
# User Drew Gottlieb <drgott at google.com>
# Date 1430953175 25200
#      Wed May 06 15:59:35 2015 -0700
# Node ID 44e6ee53585b043a799b73a352fbcafa10b4489c
# Parent  153b9c5235c2f3057f0faac1ce94df9ac02cd83f
match: remove unnecessary optimization where visitdir() returns 'all'

Match's visitdir() was prematurely optimized to return 'all' in some cases, so
that the caller would not have to call it for directories within the current
directory. This change makes the visitdir system less flexible for future
changes, such as making visitdir consider the match's include and exclude
patterns.

As a demonstration of this optimization not actually improving performance,
I ran 'hg files -r . media' on the Mozilla repository, stored as treemanifest
revlogs.

With best of ten tries, the command took 1.07s both with and without the
optimization, even though the optimization reduced the calls from visitdir()
from 987 to 51.

diff --git a/mercurial/manifest.py b/mercurial/manifest.py
--- a/mercurial/manifest.py
+++ b/mercurial/manifest.py
@@ -656,18 +656,10 @@
             if not self.hasdir(fn):
                 match.bad(fn, None)
 
-    def _walk(self, match, alldirs=False):
-        '''Recursively generates matching file names for walk().
-
-        Will visit all subdirectories if alldirs is True, otherwise it will
-        only visit subdirectories for which match.visitdir is True.'''
-
-        if not alldirs:
-            # substring to strip trailing slash
-            visit = match.visitdir(self._dir[:-1] or '.')
-            if not visit:
-                return
-            alldirs = (visit == 'all')
+    def _walk(self, match):
+        '''Recursively generates matching file names for walk().'''
+        if not match.visitdir(self._dir[:-1] or '.'):
+            return
 
         # yield this dir's files and walk its submanifests
         for p in sorted(self._dirs.keys() + self._files.keys()):
@@ -676,7 +668,7 @@
                 if match(fullp):
                     yield fullp
             else:
-                for f in self._dirs[p]._walk(match, alldirs):
+                for f in self._dirs[p]._walk(match):
                     yield f
 
     def matches(self, match):
@@ -686,19 +678,13 @@
 
         return self._matches(match)
 
-    def _matches(self, match, alldirs=False):
+    def _matches(self, match):
         '''recursively generate a new manifest filtered by the match argument.
+        '''
+        ret = treemanifest(self._dir)
 
-        Will visit all subdirectories if alldirs is True, otherwise it will
-        only visit subdirectories for which match.visitdir is True.'''
-
-        ret = treemanifest(self._dir)
-        if not alldirs:
-            # substring to strip trailing slash
-            visit = match.visitdir(self._dir[:-1] or '.')
-            if not visit:
-                return ret
-            alldirs = (visit == 'all')
+        if not match.visitdir(self._dir[:-1] or '.'):
+            return ret
 
         for fn in self._files:
             fullp = self._subpath(fn)
@@ -709,7 +695,7 @@
                 ret._flags[fn] = self._flags[fn]
 
         for dir, subm in self._dirs.iteritems():
-            m = subm._matches(match, alldirs)
+            m = subm._matches(match)
             if not m._isempty():
                 ret._dirs[dir] = m
 
diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -174,14 +174,10 @@
         return set(util.dirs(self._fmap)) | set(['.'])
 
     def visitdir(self, dir):
-        '''Helps while traversing a directory tree. Returns the string 'all' if
-        the given directory and all subdirectories should be visited. Otherwise
-        returns True or False indicating whether the given directory should be
-        visited. If 'all' is returned, calling this method on a subdirectory
-        gives an undefined result.'''
-        if not self._fmap or self.exact(dir):
-            return 'all'
-        return dir in self._dirs
+        return (not self._fmap or '.' in self._fmap or
+                dir in self._fmap or dir in self._dirs or
+                any(parentdir in self._fmap
+                    for parentdir in util.finddirs(dir)))
 
     def exact(self, f):
         '''Returns True if f is in .files().'''


More information about the Mercurial-devel mailing list