[PATCH] dirtat clustering with latest updates and proper handling of folding etc.

Petr Kodl petrkodl at gmail.com
Wed Oct 1 16:29:38 CDT 2008


# HG changeset patch
# User Petr Kodl <petrkodl at gmail.com>
# Date 1222896540 14400
# Node ID 646b724a3a5fc2fbe65da4cab746836c9bfa05ba
# Parent  b5bb24b015b05ce149926776720dc7ae38e1751d
use per-directory clustered stat calls when appropriate

util module implements two versions of statfiles

statfiles_direct calls lstat per file and yield results

statfiles_clustered takes advantage of optimizations in osuti.c and stats all
files in directory when new directory is hit

The choice of appropriate implementation is made during module loading.
At the moment win32 uses clustered stat, rest of OS versions use the direct
version.

diff -r b5bb24b015b0 -r 646b724a3a5f mercurial/dirstate.py
--- a/mercurial/dirstate.py	Wed Oct 01 15:30:12 2008 -0400
+++ b/mercurial/dirstate.py	Wed Oct 01 17:29:00 2008 -0400
@@ -535,17 +535,11 @@
                         results[nf] = None
 
         # step 3: report unseen items in the dmap hash
-        visit = [f for f in dmap if f not in results and match(f)]
-        for nf in util.sort(visit):
-            results[nf] = None
-            try:
-                st = lstat(join(nf))
-                kind = getkind(st.st_mode)
-                if kind == regkind or kind == lnkkind:
-                    results[nf] = st
-            except OSError, inst:
-                if inst.errno not in (errno.ENOENT, errno.ENOTDIR):
-                    raise
+        visit = util.sort([f for f in dmap if f not in results and match(f)])
+        for nf, st in zip(visit, util.statfiles([join(i) for i in visit])):
+            if not st is None and not getkind(st.st_mode) in (regkind, lnkkind):
+                st = None
+            results[nf] = st
 
         del results['.hg']
         return results
diff -r b5bb24b015b0 -r 646b724a3a5f mercurial/util.py
--- a/mercurial/util.py	Wed Oct 01 15:30:12 2008 -0400
+++ b/mercurial/util.py	Wed Oct 01 17:29:00 2008 -0400
@@ -15,7 +15,7 @@
 from i18n import _
 import cStringIO, errno, getpass, re, shutil, sys, tempfile
 import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
-import imp, urlparse
+import imp, urlparse, stat
 
 # Python compatibility
 
@@ -799,8 +799,52 @@
     '''return true if it is safe to hold open file handles to hardlinks'''
     return True
 
+def _statfiles_direct(files):
+    '''Stat each file in the list
+    Return None if files does not exist'''
+    lstat = os.lstat
+    for nf in files:
+        try:
+            st = lstat(nf)
+        except OSError, err:
+            if err.errno not in (errno.ENOENT, errno.ENOTDIR):
+                raise
+            st = None
+        yield st
+
+def _statfiles_clustered(files):
+    '''Stat each file in the list
+    Return None if files does not exist
+    Cluster stat per directory to improve performance'''
+    lstat = os.lstat
+    ncase = os.path.normcase
+    sep   = os.sep
+    dircache = {} # dirname->filename->status
+    for nf in files:
+        nf  = ncase(nf)
+        pos = nf.rfind(sep)
+        if pos == -1:
+            dir, base = '.', nf
+        else:
+            dir, base = nf[:pos], nf[pos+1:]
+        cache = dircache.get(dir, None)
+        if cache is None:
+            try:
+                dmap = dict([(ncase(n), s)
+                    for n, k, s in osutil.listdir(dir, True)])
+            except WindowsError, err:
+                if err.errno!=errno.ENOENT:
+                    raise
+                dmap = {}
+            cache = dircache.setdefault(dir, dmap)
+        yield cache.get(base, None)
+
+if sys.platform=='win32':
+    statfiles = _statfiles_clustered
+else:
+    statfiles = _statfiles_direct
+
 getuser_fallback = None
-
 def getuser():
     '''return name of current user'''
     try:


More information about the Mercurial-devel mailing list