D4365: match: make exactmatcher.visitchildrenset return file children as well

spectral (Kyle Lippincott) phabricator at mercurial-scm.org
Fri Aug 24 05:19:33 UTC 2018


spectral created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Previously, if we had an exactmatcher like ['foo.txt', 'a/bar.txt', 'a/b/c/baz.txt'], we'd
  get back the following data:
  
    '.': {'a'}
    'a': {'b'}
    'a/b': {'c'}
    'a/b/c': 'this'
    'a/b/c/d': set()
  
  This was incorrect, since visitchildrenset explicitly says not to pay attention
  to 'foo.txt' and 'a/bar.txt' by not returning them or 'this'. Given the near
  impossibility of making visitchildrenset reliabbly produce only subdirectories,
  a previous commit has made it documented and expected that visitchildrenset can
  return a set containing both files and subdirectories to visit, instead of
  implying/requiring that visitchildrenset() return 'this' if there are files to
  visit. This makes the code for exactmatcher match this clarified documentation.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4365

AFFECTED FILES
  mercurial/match.py
  tests/test-match.py

CHANGE DETAILS

diff --git a/tests/test-match.py b/tests/test-match.py
--- a/tests/test-match.py
+++ b/tests/test-match.py
@@ -202,11 +202,27 @@
         assert isinstance(m, matchmod.exactmatcher)
         self.assertEqual(m.visitchildrenset(b'.'), {b'dir'})
         self.assertEqual(m.visitchildrenset(b'dir'), {b'subdir'})
-        self.assertEqual(m.visitchildrenset(b'dir/subdir'), b'this')
+        self.assertEqual(m.visitchildrenset(b'dir/subdir'), {b'foo.txt'})
         self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), set())
         self.assertEqual(m.visitchildrenset(b'dir/subdir/foo.txt'), set())
         self.assertEqual(m.visitchildrenset(b'folder'), set())
 
+    def testVisitchildrensetFilesAndDirs(self):
+        m = matchmod.match(b'x', b'', patterns=[b'rootfile.txt',
+                                                b'a/file1.txt',
+                                                b'a/b/file2.txt',
+                                                # no file in a/b/c
+                                                b'a/b/c/d/file4.txt'],
+                           exact=True)
+        assert isinstance(m, matchmod.exactmatcher)
+        self.assertEqual(m.visitchildrenset(b'.'), {b'a', b'rootfile.txt'})
+        self.assertEqual(m.visitchildrenset(b'a'), {b'b', b'file1.txt'})
+        self.assertEqual(m.visitchildrenset(b'a/b'), {b'c', b'file2.txt'})
+        self.assertEqual(m.visitchildrenset(b'a/b/c'), {b'd'})
+        self.assertEqual(m.visitchildrenset(b'a/b/c/d'), {b'file4.txt'})
+        self.assertEqual(m.visitchildrenset(b'a/b/c/d/e'), set())
+        self.assertEqual(m.visitchildrenset(b'folder'), set())
+
 class DifferenceMatcherTests(unittest.TestCase):
 
     def testVisitdirM2always(self):
diff --git a/mercurial/match.py b/mercurial/match.py
--- a/mercurial/match.py
+++ b/mercurial/match.py
@@ -587,23 +587,24 @@
         return dir in self._dirs
 
     def visitchildrenset(self, dir):
-        if dir in self._dirs:
-            candidates = self._dirs - {'.'}
-            if dir != '.':
-                d = dir + '/'
-                candidates = set(c[len(d):] for c in candidates if
-                                 c.startswith(d))
-            # self._dirs includes all of the directories, recursively, so if
-            # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
-            # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
-            # '/' in it, indicating a it's for a subdir-of-a-subdir; the
-            # immediate subdir will be in there without a slash.
-            ret = set(c for c in candidates if '/' not in c)
-            # We need to emit 'this' for foo/bar, not set(), not {'baz.txt'}.
-            if not ret:
-                return 'this'
-            return ret
-        return set()
+        if not self._fileset or dir not in self._dirs:
+            return set()
+
+        candidates = self._fileset | self._dirs - {'.'}
+        if dir != '.':
+            d = dir + '/'
+            candidates = set(c[len(d):] for c in candidates if
+                             c.startswith(d))
+        # self._dirs includes all of the directories, recursively, so if
+        # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
+        # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
+        # '/' in it, indicating a it's for a subdir-of-a-subdir; the
+        # immediate subdir will be in there without a slash.
+        ret = set(c for c in candidates if '/' not in c)
+        # We really do not expect ret to be empty, since that would imply that
+        # there's something in _dirs that didn't have a file in _fileset.
+        assert ret
+        return ret
 
     def isexact(self):
         return True



To: spectral, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list