[PATCH 6 of 6 v2] scmutil: speed up case collision checking
Joshua Redstone
joshua.redstone at fb.com
Tue Jun 26 14:04:43 CDT 2012
# HG changeset patch
# User Joshua Redstone <joshua.redstone at fb.com>
# Date 1340048916 25200
# Node ID d963a5794553462ee6605eb9efc89a0986c97a71
# Parent 592a6e3e9f4df3e2e3b8a137cb3aaa5143a96451
scmutil: speed up case collision checking
scmutil.casecollisionauditor is slow for large repos. Switch to a faster
implementation based on reducing the fraction of all files that are scanned
and caching the results of case conversion lazily rather than precomputing.
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1202,12 +1202,13 @@
cca = None
abort, warn = scmutil.checkportabilityalert(ui)
if abort or warn:
- cca = scmutil.casecollisionauditor(ui, abort, wctx)
+ cca = scmutil.casecollisionauditor(ui, abort,
+ repo.dirstate.sortedfiles())
for f in repo.walk(match):
exact = match.exact(f)
if exact or not explicitonly and f not in repo.dirstate:
if cca:
- cca(f)
+ cca.checkcollision(f)
names.append(f)
if ui.verbose or not exact:
ui.status(_('adding %s\n') % match.rel(join(f)))
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -9,6 +9,7 @@
import util, error, osutil, revset, similar, encoding
import match as matchmod
import os, errno, re, stat, sys, glob
+import bisect
def nochangesfound(ui, secretlist=None):
'''report no changes for push/pull'''
@@ -49,22 +50,56 @@
return abort, warn
class casecollisionauditor(object):
- def __init__(self, ui, abort, existingiter):
+ def __init__(self, ui, abort, sortedfiles):
self._ui = ui
self._abort = abort
- self._map = {}
- for f in existingiter:
- self._map[encoding.lower(f)] = f
+ self._loweredfiles = set()
+ self._lowereddirs = set()
+ self._sortedfiles = sortedfiles
- def __call__(self, f):
- fl = encoding.lower(f)
- map = self._map
- if fl in map and map[fl] != f:
- msg = _('possible case-folding collision for %s') % f
+ def checkcollision(self, path):
+ """
+ Check if path has a case collision with another file, either
+ one we've checked before or one in a sorted list of files.
+ """
+ dirn = os.path.dirname(path)
+ hit = False
+ loweredpath = encoding.lower(path)
+ if dirn in self._lowereddirs:
+ # All files in dirn (but not subdirs) have been lowercased so we
+ # need only check for the lowered filename in the cache of lowered
+ # files.
+ hit = loweredpath in self._loweredfiles
+ else:
+ dirlevel = path.count('/')
+ # If we're checking a/b/c, and we know there's already a file in the
+ # repo in the dir a/b, then we know that the 'a/b' component can not
+ # have a case collision, so we can limit our search to files in the
+ # tree under a/b. The prefixmatch call is to find the longest path
+ # component we can use to limit the search.
+ prefix = util.prefixmatch(path, self._sortedfiles)
+ prefixdirslash = os.path.dirname(prefix) + "/"
+ if prefixdirslash == "/":
+ prefixdirslash = ""
+ startidx = bisect.bisect_right(self._sortedfiles, prefixdirslash)
+ hit = False
+ for idx in xrange(startidx, len(self._sortedfiles)):
+ f = self._sortedfiles[idx]
+ if not f.startswith(prefixdirslash):
+ break
+ if f.count('/') != dirlevel:
+ continue
+ lowerf = encoding.lower(f)
+ self._loweredfiles.add(lowerf)
+ if lowerf == loweredpath and f != path:
+ hit = True
+ self._lowereddirs.add(dirn)
+ self._loweredfiles.add(loweredpath)
+ if hit:
+ msg = _('possible case-folding collision for %s') % path
if self._abort:
raise util.Abort(msg)
self._ui.warn(_("warning: %s\n") % msg)
- map[fl] = f
class pathauditor(object):
'''ensure that a filesystem path contains no banned components.
diff --git a/tests/test-casecollision.t b/tests/test-casecollision.t
--- a/tests/test-casecollision.t
+++ b/tests/test-casecollision.t
@@ -31,6 +31,29 @@
$ hg st
A A
A a
+ $ mkdir b
+ $ touch b/c b/D
+ $ hg add b
+ adding b/D
+ adding b/c
+ $ touch b/d b/C
+ $ hg add b/C
+ warning: possible case-folding collision for b/C
+ $ hg add b/d
+ warning: possible case-folding collision for b/d
+ $ touch b/a1 b/a2
+ $ hg add b
+ adding b/a1
+ adding b/a2
+ $ touch b/A2 b/a1.1
+ $ hg add b/a1.1 b/A2
+ warning: possible case-folding collision for b/A2
+ $ touch b/f b/F
+ $ hg add b/f b/F
+ warning: possible case-folding collision for b/f
+ $ touch g G
+ $ hg add g G
+ warning: possible case-folding collision for g
case changing rename must not warn or abort
More information about the Mercurial-devel
mailing list