[PATCH] dirstate: pass the dict to modify to statfiles

Bryan O'Sullivan bos at serpentine.com
Fri Nov 30 18:47:52 CST 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1354322866 28800
# Node ID 56b75493b37ba8a7b8df16e442c10adea53e4685
# Parent  1cb1d6b9565d14fbb4e8521a79fc549f227c6be4
dirstate: pass the dict to modify to statfiles

This improves perfstatus numbers by 25% on a clean working dir with
170,000 files, from 1.27 to 1.02 seconds.

I was initially not thrilled by this idea, but the performance
implications are hard to ignore.

diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -697,8 +697,7 @@
         if not skipstep3 and not exact:
             visit = sorted([f for f in dmap if f not in results and matchfn(f)])
             nf = iter(visit).next
-            for st in util.statfiles([join(i) for i in visit]):
-                results[nf()] = st
+            util.statfiles([join(i) for i in visit], results)
         for s in subrepos:
             del results[s]
         del results['.hg']
diff --git a/mercurial/osutil.c b/mercurial/osutil.c
--- a/mercurial/osutil.c
+++ b/mercurial/osutil.c
@@ -394,48 +394,50 @@
         PyObject *names, *stats;
         Py_ssize_t i, count;
 
-        if (!PyArg_ParseTuple(args, "O:statfiles", &names))
+        if (!PyArg_ParseTuple(args, "O!O!:statfiles",
+                              &PyList_Type, &names,
+                              &PyDict_Type, &stats))
                 return NULL;
 
-        count = PySequence_Length(names);
-        if (count == -1) {
-                PyErr_SetString(PyExc_TypeError, "not a sequence");
-                return NULL;
-        }
-
-        stats = PyList_New(count);
-        if (stats == NULL)
-                return NULL;
+        count = PyList_GET_SIZE(names);
 
         for (i = 0; i < count; i++) {
-                PyObject *stat;
+                PyObject *stat, *name;
                 struct stat st;
                 int ret, kind;
                 char *path;
 
-                path = PyString_AsString(PySequence_GetItem(names, i));
+                name = PyList_GET_ITEM(names, i);
+                path = PyString_AsString(name);
                 if (path == NULL) {
                         PyErr_SetString(PyExc_TypeError, "not a string");
-                        goto bail;
+                        return NULL;
                 }
                 ret = lstat(path, &st);
                 kind = st.st_mode & S_IFMT;
                 if (ret != -1 && (kind == S_IFREG || kind == S_IFLNK)) {
                         stat = makestat(&st);
                         if (stat == NULL)
-                                goto bail;
-                        PyList_SET_ITEM(stats, i, stat);
+                                return NULL;
+                        Py_INCREF(name);
+                        ret = PyDict_SetItem(stats, name, stat);
+                        if (ret == -1) {
+                                Py_DECREF(name);
+                                return NULL;
+                        }
                 } else {
+                        Py_INCREF(name);
                         Py_INCREF(Py_None);
-                        PyList_SET_ITEM(stats, i, Py_None);
+                        ret = PyDict_SetItem(stats, name, Py_None);
+                        if (ret == -1) {
+                                Py_DECREF(name);
+                                Py_DECREF(Py_None);
+                                return NULL;
+                        }
                 }
         }
 
-        return stats;
-
-bail:
-        Py_DECREF(stats);
-        return NULL;
+        Py_RETURN_NONE;
 }
 
 #endif /* ndef _WIN32 */
diff --git a/mercurial/posix.py b/mercurial/posix.py
--- a/mercurial/posix.py
+++ b/mercurial/posix.py
@@ -354,9 +354,9 @@
 
 _wantedkinds = set([stat.S_IFREG, stat.S_IFLNK])
 
-def statfiles(files):
-    '''Stat each file in files. Yield each stat, or None if a file does not
-    exist or has a type we don't care about.'''
+def statfiles(files, stats):
+    '''Stat each file in files. Update the dict with each stat, or
+    None if a file does not exist or has a type we don't care about.'''
     lstat = os.lstat
     getkind = stat.S_IFMT
     for nf in files:
@@ -368,7 +368,7 @@
             if err.errno not in (errno.ENOENT, errno.ENOTDIR):
                 raise
             st = None
-        yield st
+        stats[nf] = st
 
 def getuser():
     '''return name of current user'''
diff --git a/mercurial/windows.py b/mercurial/windows.py
--- a/mercurial/windows.py
+++ b/mercurial/windows.py
@@ -215,9 +215,9 @@
 
 _wantedkinds = set([stat.S_IFREG, stat.S_IFLNK])
 
-def statfiles(files):
-    '''Stat each file in files. Yield each stat, or None if a file
-    does not exist or has a type we don't care about.
+def statfiles(files, stats):
+    '''Stat each file in files. Update the dict with each stat, or
+    None if a file does not exist or has a type we don't care about.
 
     Cluster and cache stat per directory to minimize number of OS stat calls.'''
     dircache = {} # dirname -> filename -> status | None if file does not exist
@@ -243,7 +243,7 @@
                     raise
                 dmap = {}
             cache = dircache.setdefault(dir, dmap)
-        yield cache.get(base, None)
+        stats[nf] = cache.get(base, None)
 
 def username(uid=None):
     """Return the name of the user with the given uid.


More information about the Mercurial-devel mailing list