[PATCH 3 of 4] dirstate: add _sortedlist and speed up _addpath

Joshua Redstone joshua.redstone at fb.com
Fri Jun 22 15:49:48 CDT 2012


# HG changeset patch
# User Joshua Redstone <joshua.redstone at fb.com>
# Date 1340034409 25200
# Node ID a97598a0ca30329ad513f8e9fe443ec3b0fc1baa
# Parent  8ed2a82b1237bd1bffa5e9c7a19d6025e153bc8a
dirstate: add _sortedlist and speed up _addpath

Add a new cache property, _sortedfiles, which is a sorted list of files
in _map.  Use this to get rid of creation of _dirs property in hg-add.

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -11,6 +11,7 @@
 import scmutil, util, ignore, osutil, parsers, encoding
 import struct, os, stat, errno
 import cStringIO
+import bisect
 
 _format = ">cllll"
 propertycache = util.propertycache
@@ -147,6 +148,15 @@
     def _checkcase(self):
         return not util.checkcase(self._join('.hg'))
 
+    @propertycache
+    def _sortedfiles(self):
+        slist = [f for f in self._map if self._map[f][0] != 'r']
+        slist.sort()
+        return slist
+
+    def sortedfiles(self):
+        return self._sortedfiles
+
     def _join(self, f):
         # much faster than os.path.join()
         # it's safe because f is always a relative path
@@ -309,15 +319,20 @@
         return self._copymap
 
     def _droppath(self, f):
-        if self[f] not in "?r" and "_dirs" in self.__dict__:
-            _decdirs(self._dirs, f)
+        if self[f] not in "?r":
+            if "_dirs" in self.__dict__:
+                _decdirs(self._dirs, f)
+            if "_sortedfiles" in self.__dict__:
+                idx = bisect.bisect_left(self._sortedfiles, f)
+                assert self._sortedfiles[idx] == f
+                self._sortedfiles.pop(idx)
 
     def _addpath(self, f, state, mode, size, mtime, check=False):
         assert state not in "?r"
         oldstate = self[f]
         if check or oldstate == "r":
             scmutil.checkfilename(f)
-            if f in self._dirs:
+            if util.prefixmatch(f + "/", self._sortedfiles) == f + "/":
                 raise util.Abort(_('directory %r already in dirstate') % f)
             # shadows
             for d in _finddirs(f):
@@ -327,6 +342,8 @@
         if oldstate in "?r" and "_dirs" in self.__dict__:
             _incdirs(self._dirs, f)
         self._dirty = True
+        if self[f] not in "?r":
+            bisect.insort(self._sortedfiles, f)
         self._map[f] = (state, mode, size, mtime)
 
     def normal(self, f):
@@ -469,6 +486,8 @@
         self._map = {}
         if "_dirs" in self.__dict__:
             delattr(self, "_dirs")
+        if "_sortedfiles" in self.__dict__:
+            delattr(self, "_sortedfiles")
         self._copymap = {}
         self._pl = [nullid, nullid]
         self._lastnormaltime = 0
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -18,6 +18,7 @@
 import errno, re, shutil, sys, tempfile, traceback
 import os, time, datetime, calendar, textwrap, signal
 import imp, socket, urllib
+import bisect
 
 if os.name == 'nt':
     import windows as platform
@@ -1800,3 +1801,40 @@
         return fd.isatty()
     except AttributeError:
         return False
+
+def prefixmatch(path, sortedlist):
+    """Returns longest prefix of path that is common to any string in
+    sortedlist.
+
+    >>> prefixmatch('a', ['b'])
+    ''
+    >>> prefixmatch('c', ['b'])
+    ''
+    >>> prefixmatch('a', ['a/b'])
+    'a'
+    >>> prefixmatch('a/', ['a/b'])
+    'a/'
+    >>> prefixmatch('a/d', ['a/b', 'a/e'])
+    'a/'
+    >>> prefixmatch('a/d', ['a/b', 'a/c', 'a/e'])
+    'a/'
+    >>> prefixmatch('d', ['a', 'b', 'e'])
+    ''
+    >>> prefixmatch('c/g', ['a', 'b', 'c/a', 'c/e', 'f'])
+    'c/'
+    >>> prefixmatch('c/e', ['a', 'b', 'c/a', 'c/e', 'f'])
+    'c/e'
+    """
+    if len(sortedlist) == 0:
+        return ""
+    # Idx is index of largest element <= path
+    idx = bisect.bisect_right(sortedlist, path) - 1
+    leftprefix = ""
+    rightprefix = ""
+    if idx >= 0:
+        leftprefix = os.path.commonprefix([path, sortedlist[idx]])
+    if idx + 1 < len(sortedlist):
+        rightprefix = os.path.commonprefix([path, sortedlist[idx + 1]])
+    if len(leftprefix) > len(rightprefix):
+        return leftprefix
+    return rightprefix
diff --git a/tests/test-dirstate.t b/tests/test-dirstate.t
--- a/tests/test-dirstate.t
+++ b/tests/test-dirstate.t
@@ -14,6 +14,21 @@
   moving a/b/c/d/x to z/b/c/d/x (glob)
   moving a/b/c/d/y to z/b/c/d/y (glob)
   moving a/b/c/d/z to z/b/c/d/z (glob)
+
+Test name collisions
+
+  $ rm z/b/c/d/x
+  $ mkdir z/b/c/d/x
+  $ touch z/b/c/d/x/y
+  $ hg add z/b/c/d/x/y
+  abort: file 'z/b/c/d/x' in dirstate clashes with 'z/b/c/d/x/y'
+  [255]
+  $ rm -rf z/b/c/d
+  $ touch z/b/c/d
+  $ hg add z/b/c/d
+  abort: directory 'z/b/c/d' already in dirstate
+  [255]
+
   $ cd ..
 
 Issue1790: dirstate entry locked into unset if file mtime is set into
@@ -38,4 +53,3 @@
   $ hg debugstate
   n 644          2 2021-01-01 12:00:00 a
   $ cd ..
-
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -5,6 +5,10 @@
 import doctest
 
 import mercurial.util
+doctest.run_docstring_examples(mercurial.util.prefixmatch,
+                               mercurial.util.__dict__)
+
+import mercurial.util
 doctest.testmod(mercurial.util)
 
 import mercurial.changelog


More information about the Mercurial-devel mailing list