[PATCH] Take advantage of fstat calls clustering per directory if OS support it

Petr Kodl petrkodl at gmail.com
Thu Oct 9 09:30:07 CDT 2008


# HG changeset patch
# User Petr Kodl <petrkodl at gmail.com>
# Date 1223562587 14400
# Node ID 55e47f21c5886410ce3647722625abf2b050a88a
# Parent  4e0d54fbd34fc10a565616e3a2ea7ad9600b4c2b
Take advantage of fstat calls clustering per directory if OS support it.

util module implements two versions of statfiles function

_statfiles calls lstat per file

_statfiles_clustered takes advantage of optimizations in osutil.c, stats all
files in directory at once when new directory is hit and caches the results

util.statfiles dispatches to appropriate version during module loading

The speedup on directory tree with 2k directories and 63k files is about
factor of 1.8 (1.3s -> 0.8s for hg diff - hg startup overhead about .2s)

At this point only Win32 now benefit from this patch.
Rest of OSes use the non clustered implementation.

diff -r 4e0d54fbd34f -r 55e47f21c588 mercurial/dirstate.py
--- a/mercurial/dirstate.py	Thu Oct 09 14:16:17 2008 +0200
+++ b/mercurial/dirstate.py	Thu Oct 09 10:29:47 2008 -0400
@@ -535,17 +535,11 @@
                         results[nf] = None
 
         # step 3: report unseen items in the dmap hash
-        visit = [f for f in dmap if f not in results and match(f)]
-        for nf in util.sort(visit):
-            results[nf] = None
-            try:
-                st = lstat(join(nf))
-                kind = getkind(st.st_mode)
-                if kind == regkind or kind == lnkkind:
-                    results[nf] = st
-            except OSError, inst:
-                if inst.errno not in (errno.ENOENT, errno.ENOTDIR):
-                    raise
+        visit = util.sort([f for f in dmap if f not in results and match(f)])
+        for nf, st in zip(visit, util.statfiles([join(i) for i in visit])):
+            if not st is None and not getkind(st.st_mode) in (regkind, lnkkind):
+                st = None
+            results[nf] = st
 
         del results['.hg']
         return results
diff -r 4e0d54fbd34f -r 55e47f21c588 mercurial/util.py
--- a/mercurial/util.py	Thu Oct 09 14:16:17 2008 +0200
+++ b/mercurial/util.py	Thu Oct 09 10:29:47 2008 -0400
@@ -798,6 +798,52 @@
 def openhardlinks():
     '''return true if it is safe to hold open file handles to hardlinks'''
     return True
+
+def _statfiles(files):
+    'Stat each file in files and yield stat or None if file does not exist.'
+    lstat = os.lstat
+    for nf in files:
+        try:
+            st = lstat(nf)
+        except OSError, err:
+            if err.errno not in (errno.ENOENT, errno.ENOTDIR):
+                raise
+            st = None
+        yield st
+
+def _statfiles_clustered(files):
+    '''Stat each file in files and yield stat or None if file does not exist.
+    Cluster and cache stat per directory to minimize number of OS stat calls.'''
+    lstat = os.lstat
+    ncase = os.path.normcase
+    sep   = os.sep
+    dircache = {} # dirname -> filename -> status | None if file does not exist
+    for nf in files:
+        nf  = ncase(nf)
+        pos = nf.rfind(sep)
+        if pos == -1:
+            dir, base = '.', nf
+        else:
+            dir, base = nf[:pos], nf[pos+1:]
+        cache = dircache.get(dir, None)
+        if cache is None:
+            try:
+                dmap = dict([(ncase(n), s)
+                    for n, k, s in osutil.listdir(dir, True)])
+            except OSError, err:
+                # handle directory not found in Python version prior to 2.5
+                # Python <= 2.4 returns native Windows code 3 in errno
+                # Python >= 2.5 returns ENOENT and adds winerror field
+                if err.errno not in (3, errno.ENOENT, errno.ENOTDIR):
+                    raise
+                dmap = {}
+            cache = dircache.setdefault(dir, dmap)
+        yield cache.get(base, None)
+
+if sys.platform == 'win32':
+    statfiles = _statfiles_clustered
+else:
+    statfiles = _statfiles
 
 getuser_fallback = None
 


More information about the Mercurial-devel mailing list