[PATCH 3 of 4] dirstate: add _sortedlist and speed up _addpath
Joshua Redstone
joshua.redstone at fb.com
Fri Jun 22 15:49:48 CDT 2012
# HG changeset patch
# User Joshua Redstone <joshua.redstone at fb.com>
# Date 1340034409 25200
# Node ID a97598a0ca30329ad513f8e9fe443ec3b0fc1baa
# Parent 8ed2a82b1237bd1bffa5e9c7a19d6025e153bc8a
dirstate: add _sortedlist and speed up _addpath
Add a new cache property, _sortedfiles, which is a sorted list of files
in _map. Use this to get rid of creation of _dirs property in hg-add.
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -11,6 +11,7 @@
import scmutil, util, ignore, osutil, parsers, encoding
import struct, os, stat, errno
import cStringIO
+import bisect
_format = ">cllll"
propertycache = util.propertycache
@@ -147,6 +148,15 @@
def _checkcase(self):
return not util.checkcase(self._join('.hg'))
+ @propertycache
+ def _sortedfiles(self):
+ slist = [f for f in self._map if self._map[f][0] != 'r']
+ slist.sort()
+ return slist
+
+ def sortedfiles(self):
+ return self._sortedfiles
+
def _join(self, f):
# much faster than os.path.join()
# it's safe because f is always a relative path
@@ -309,15 +319,20 @@
return self._copymap
def _droppath(self, f):
- if self[f] not in "?r" and "_dirs" in self.__dict__:
- _decdirs(self._dirs, f)
+ if self[f] not in "?r":
+ if "_dirs" in self.__dict__:
+ _decdirs(self._dirs, f)
+ if "_sortedfiles" in self.__dict__:
+ idx = bisect.bisect_left(self._sortedfiles, f)
+ assert self._sortedfiles[idx] == f
+ self._sortedfiles.pop(idx)
def _addpath(self, f, state, mode, size, mtime, check=False):
assert state not in "?r"
oldstate = self[f]
if check or oldstate == "r":
scmutil.checkfilename(f)
- if f in self._dirs:
+ if util.prefixmatch(f + "/", self._sortedfiles) == f + "/":
raise util.Abort(_('directory %r already in dirstate') % f)
# shadows
for d in _finddirs(f):
@@ -327,6 +342,8 @@
if oldstate in "?r" and "_dirs" in self.__dict__:
_incdirs(self._dirs, f)
self._dirty = True
+ if self[f] not in "?r":
+ bisect.insort(self._sortedfiles, f)
self._map[f] = (state, mode, size, mtime)
def normal(self, f):
@@ -469,6 +486,8 @@
self._map = {}
if "_dirs" in self.__dict__:
delattr(self, "_dirs")
+ if "_sortedfiles" in self.__dict__:
+ delattr(self, "_sortedfiles")
self._copymap = {}
self._pl = [nullid, nullid]
self._lastnormaltime = 0
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -18,6 +18,7 @@
import errno, re, shutil, sys, tempfile, traceback
import os, time, datetime, calendar, textwrap, signal
import imp, socket, urllib
+import bisect
if os.name == 'nt':
import windows as platform
@@ -1800,3 +1801,40 @@
return fd.isatty()
except AttributeError:
return False
+
+def prefixmatch(path, sortedlist):
+ """Returns longest prefix of path that is common to any string in
+ sortedlist.
+
+ >>> prefixmatch('a', ['b'])
+ ''
+ >>> prefixmatch('c', ['b'])
+ ''
+ >>> prefixmatch('a', ['a/b'])
+ 'a'
+ >>> prefixmatch('a/', ['a/b'])
+ 'a/'
+ >>> prefixmatch('a/d', ['a/b', 'a/e'])
+ 'a/'
+ >>> prefixmatch('a/d', ['a/b', 'a/c', 'a/e'])
+ 'a/'
+ >>> prefixmatch('d', ['a', 'b', 'e'])
+ ''
+ >>> prefixmatch('c/g', ['a', 'b', 'c/a', 'c/e', 'f'])
+ 'c/'
+ >>> prefixmatch('c/e', ['a', 'b', 'c/a', 'c/e', 'f'])
+ 'c/e'
+ """
+ if len(sortedlist) == 0:
+ return ""
+ # Idx is index of largest element <= path
+ idx = bisect.bisect_right(sortedlist, path) - 1
+ leftprefix = ""
+ rightprefix = ""
+ if idx >= 0:
+ leftprefix = os.path.commonprefix([path, sortedlist[idx]])
+ if idx + 1 < len(sortedlist):
+ rightprefix = os.path.commonprefix([path, sortedlist[idx + 1]])
+ if len(leftprefix) > len(rightprefix):
+ return leftprefix
+ return rightprefix
diff --git a/tests/test-dirstate.t b/tests/test-dirstate.t
--- a/tests/test-dirstate.t
+++ b/tests/test-dirstate.t
@@ -14,6 +14,21 @@
moving a/b/c/d/x to z/b/c/d/x (glob)
moving a/b/c/d/y to z/b/c/d/y (glob)
moving a/b/c/d/z to z/b/c/d/z (glob)
+
+Test name collisions
+
+ $ rm z/b/c/d/x
+ $ mkdir z/b/c/d/x
+ $ touch z/b/c/d/x/y
+ $ hg add z/b/c/d/x/y
+ abort: file 'z/b/c/d/x' in dirstate clashes with 'z/b/c/d/x/y'
+ [255]
+ $ rm -rf z/b/c/d
+ $ touch z/b/c/d
+ $ hg add z/b/c/d
+ abort: directory 'z/b/c/d' already in dirstate
+ [255]
+
$ cd ..
Issue1790: dirstate entry locked into unset if file mtime is set into
@@ -38,4 +53,3 @@
$ hg debugstate
n 644 2 2021-01-01 12:00:00 a
$ cd ..
-
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -5,6 +5,10 @@
import doctest
import mercurial.util
+doctest.run_docstring_examples(mercurial.util.prefixmatch,
+ mercurial.util.__dict__)
+
+import mercurial.util
doctest.testmod(mercurial.util)
import mercurial.changelog
More information about the Mercurial-devel
mailing list