[PATCH 3 of 3] use per-directory clustered stat calls even in cases where known tree is walked

Benoit Boissinot bboissin at gmail.com
Tue Oct 14 10:34:12 CDT 2008


On Mon, Oct 6, 2008 at 12:59 PM, Petr Kodl <petrkodl at gmail.com> wrote:
>>
>> Wouldn't it be cleaner to call normpath on those filenames ?
>
> normcase is probably what you want in this case. I submitted a patch based
> on Benoit's suggestions a while ago, but here it is again - just in case .
> It is reasonably clean except for one thing - the error handling of errors
> coming from os.listdir - result of some incompatibilities in Python versions

Matt, do you have any objection to this patch ? conceptual or anything else ?

It really helps windows for big trees.

regards,

Benoit

>
>
> use per-directory clustered stat when appropriate
>
> util module implements two versions of statfiles function
>
> _statfiles_direct calls lstat per file
>
> _statfiles_clustered takes advantage of optimizations in osutil.c and stats
> all
> files in directory at once when new directory is hit
>
> util.statfiles dispatches to appropriate version during module loading
>
> At the moment win32 uses clustered stat, rest of OS versions use the direct
> version.
>
> diff -r 8b58cb4b4395 mercurial/dirstate.py
> --- a/mercurial/dirstate.py     Wed Oct 01 18:23:25 2008 -0400
> +++ b/mercurial/dirstate.py     Thu Oct 02 12:56:13 2008 -0400
> @@ -535,17 +535,11 @@
>                         results[nf] = None
>
>         # step 3: report unseen items in the dmap hash
> -        visit = [f for f in dmap if f not in results and match(f)]
> -        for nf in util.sort(visit):
> -            results[nf] = None
> -            try:
> -                st = lstat(join(nf))
> -                kind = getkind(st.st_mode)
> -                if kind == regkind or kind == lnkkind:
> -                    results[nf] = st
> -            except OSError, inst:
> -                if inst.errno not in (errno.ENOENT, errno.ENOTDIR):
> -                    raise
> +        visit = util.sort([f for f in dmap if f not in results and
> match(f)])
> +        for nf, st in zip(visit, util.statfiles([join(i) for i in visit])):
> +            if not st is None and not getkind(st.st_mode) in (regkind,
> lnkkind):
> +                st = None
> +            results[nf] = st
>
>         del results['.hg']
>         return results
> diff -r 8b58cb4b4395 mercurial/util.py
> --- a/mercurial/util.py Wed Oct 01 18:23:25 2008 -0400
> +++ b/mercurial/util.py Thu Oct 02 12:56:13 2008 -0400
> @@ -798,6 +798,55 @@
>  def openhardlinks():
>     '''return true if it is safe to hold open file handles to hardlinks'''
>     return True
> +
> +def _statfiles_direct(files):
> +    '''Stat each file in the list
> +    Return None if files does not exist'''
> +    lstat = os.lstat
> +    for nf in files:
> +        try:
> +            st = lstat(nf)
> +        except OSError, err:
> +            if err.errno not in (errno.ENOENT, errno.ENOTDIR):
> +                raise
> +            st = None
> +        yield st
> +
> +def _statfiles_clustered(files):
> +    '''Stat each file in the list
> +    Return None if files does not exist
> +    Cluster stat per directory to improve performance'''
> +    lstat = os.lstat
> +    ncase = os.path.normcase
> +    sep   = os.sep
> +    dircache = {} # dirname->filename->status
> +    for nf in files:
> +        nf  = ncase(nf)
> +        pos = nf.rfind(sep)
> +        if pos == -1:
> +            dir, base = '.', nf
> +        else:
> +            dir, base = nf[:pos], nf[pos+1:]
> +        cache = dircache.get(dir, None)
> +        if cache is None:
> +            try:
> +                dmap = dict([(ncase(n), s)
> +                    for n, k, s in osutil.listdir(dir, True)])
> +            except OSError, err:
> +                # 3 is required to work around Win32 errno handling
> +                # of directory not found
> +                # Python <= 2.4 returns errno native winerr 3
> +                # starting Python 2.5 this behaves correctly and returns
> ENOENT
> +                if err.errno not in (3, errno.ENOENT, errno.ENOTDIR):
> +                    raise
> +                dmap = {}
> +            cache = dircache.setdefault(dir, dmap)
> +        yield cache.get(base, None)
> +
> +if sys.platform == 'win32':
> +    statfiles = _statfiles_clustered
> +else:
> +    statfiles = _statfiles_direct
>
>  getuser_fallback = None


More information about the Mercurial-devel mailing list