[PATCH v2] scmutil: 25% speedup in casecollisionauditor

Pierre-Yves David pierre-yves.david at ens-lyon.org
Tue Jul 17 17:35:29 CDT 2012


Your V1 is already pushed as http://selenic.com/repo/hg//rev/afd75476939e

You need to provide a diff between the older version and your V2

$ hg rebase -r e26591d089d3c1b68f3ab738489aa5d2e0d53aaf -d afd75476939e^
$ hg diff -r afd75476939e

On 18 juil. 2012, at 00:31, Joshua Redstone wrote:

> # HG changeset patch
> # User Joshua Redstone <joshua.redstone at fb.com>
> # Date 1341608200 25200
> # Node ID e26591d089d3c1b68f3ab738489aa5d2e0d53aaf
> # Parent  2e139644f2b22337ff565e842ca88c0df837ce1d
> scmutil: 25% speedup in casecollisionauditor
> 
> On a large repository, switching casecollisionauditor to lowercasing all file
> names at once rather than one at a time improves hg-add time by 25%.
> 
> diff --git a/contrib/perf.py b/contrib/perf.py
> --- a/contrib/perf.py
> +++ b/contrib/perf.py
> @@ -177,7 +177,7 @@
>     ui.popbuffer()
> 
> def perfcca(ui, repo):
> -    timer(lambda: scmutil.casecollisionauditor(ui, False, repo[None]))
> +    timer(lambda: scmutil.casecollisionauditor(ui, False, repo.dirstate))
> 
> def perffncacheload(ui, repo):
>     from mercurial import scmutil, store
> diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
> --- a/mercurial/cmdutil.py
> +++ b/mercurial/cmdutil.py
> @@ -1202,7 +1202,7 @@
>     cca = None
>     abort, warn = scmutil.checkportabilityalert(ui)
>     if abort or warn:
> -        cca = scmutil.casecollisionauditor(ui, abort, wctx)
> +        cca = scmutil.casecollisionauditor(ui, abort, repo.dirstate)
>     for f in repo.walk(match):
>         exact = match.exact(f)
>         if exact or not explicitonly and f not in repo.dirstate:
> diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
> --- a/mercurial/scmutil.py
> +++ b/mercurial/scmutil.py
> @@ -49,22 +49,27 @@
>     return abort, warn
> 
> class casecollisionauditor(object):
> -    def __init__(self, ui, abort, existingiter):
> +    def __init__(self, ui, abort, dirstate):
>         self._ui = ui
>         self._abort = abort
> -        self._map = {}
> -        for f in existingiter:
> -            self._map[encoding.lower(f)] = f
> +        allfiles = '\0'.join(dirstate._map)
> +        self._loweredfiles = set(encoding.lower(allfiles).split('\0'))
> +        self._dirstate = dirstate
> +        # The purpose of _newfiles is so that we don't complain about
> +        # case collisions if someone were to call this object with the
> +        # same filename twice.
> +        self._newfiles = set()
> 
>     def __call__(self, f):
>         fl = encoding.lower(f)
> -        map = self._map
> -        if fl in map and map[fl] != f:
> +        if (fl in self._loweredfiles and f not in self._dirstate and
> +            f not in self._newfiles):
>             msg = _('possible case-folding collision for %s') % f
>             if self._abort:
>                 raise util.Abort(msg)
>             self._ui.warn(_("warning: %s\n") % msg)
> -        map[fl] = f
> +        self._loweredfiles.add(fl)
> +        self._newfiles.add(f)
> 
> class pathauditor(object):
>     '''ensure that a filesystem path contains no banned components.
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel



More information about the Mercurial-devel mailing list