[PATCH v2] scmutil: 25% speedup in casecollisionauditor

Joshua Redstone joshua.redstone at fb.com
Tue Jul 17 17:31:05 CDT 2012


# HG changeset patch
# User Joshua Redstone <joshua.redstone at fb.com>
# Date 1341608200 25200
# Node ID e26591d089d3c1b68f3ab738489aa5d2e0d53aaf
# Parent  2e139644f2b22337ff565e842ca88c0df837ce1d
scmutil: 25% speedup in casecollisionauditor

On a large repository, switching casecollisionauditor to lowercasing all file
names at once rather than one at a time improves hg-add time by 25%.

diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -177,7 +177,7 @@
     ui.popbuffer()
 
 def perfcca(ui, repo):
-    timer(lambda: scmutil.casecollisionauditor(ui, False, repo[None]))
+    timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
 
 def perffncacheload(ui, repo):
     from mercurial import scmutil, store
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1202,7 +1202,7 @@
     cca = None
     abort, warn = scmutil.checkportabilityalert(ui)
     if abort or warn:
-        cca = scmutil.casecollisionauditor(ui, abort, wctx)
+        cca = scmutil.casecollisionauditor(ui, abort, repo.dirstate)
     for f in repo.walk(match):
         exact = match.exact(f)
         if exact or not explicitonly and f not in repo.dirstate:
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -49,22 +49,27 @@
     return abort, warn
 
 class casecollisionauditor(object):
-    def __init__(self, ui, abort, existingiter):
+    def __init__(self, ui, abort, dirstate):
         self._ui = ui
         self._abort = abort
-        self._map = {}
-        for f in existingiter:
-            self._map[encoding.lower(f)] = f
+        allfiles = '\0'.join(dirstate._map)
+        self._loweredfiles = set(encoding.lower(allfiles).split('\0'))
+        self._dirstate = dirstate
+        # The purpose of _newfiles is so that we don't complain about
+        # case collisions if someone were to call this object with the
+        # same filename twice.
+        self._newfiles = set()
 
     def __call__(self, f):
         fl = encoding.lower(f)
-        map = self._map
-        if fl in map and map[fl] != f:
+        if (fl in self._loweredfiles and f not in self._dirstate and
+            f not in self._newfiles):
             msg = _('possible case-folding collision for %s') % f
             if self._abort:
                 raise util.Abort(msg)
             self._ui.warn(_("warning: %s\n") % msg)
-        map[fl] = f
+        self._loweredfiles.add(fl)
+        self._newfiles.add(f)
 
 class pathauditor(object):
     '''ensure that a filesystem path contains no banned components.


More information about the Mercurial-devel mailing list