[PATCH 6 of 6] dirs: reuse strings in _incdir
Bryan O'Sullivan
bos at serpentine.com
Thu Mar 28 20:22:48 CDT 2013
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1364520157 25200
# Thu Mar 28 18:22:37 2013 -0700
# Node ID b46e9a87a202d0c001ee6e25c0ce47c8c56fbd5e
# Parent 96da5c8a645016dc0b68855ff26293ceb1188cf2
dirs: reuse strings in _incdir
In the common case where we are updating a refcount for an existing
directory, this reduces the number of memory allocations and deallocations
from O(n) to 1, where n is the number of '/' characters in a path.
Below are some cumulative effects of these three changes (combining
the original naive C code, and the two speedups) relative to the base
Python code.
perfdirs performance in a working dir with 170,000 files:
Python 650 msec
now 167
Commands run in the same working dir show that this speedup is visible
in practice:
hg add
Python 1.09 sec
now .60
hg addremove
Python 4.79
now 4.27
hg rebase -d @^
Python 17.2
now 15.5
diff --git a/mercurial/dirs.c b/mercurial/dirs.c
--- a/mercurial/dirs.c
+++ b/mercurial/dirs.c
@@ -15,6 +15,9 @@
* We violate the Python rule that integers are immutable. Said
* integers are used only for internal refcounting by this code, and
* are not (and must not be) used by Python code.
+ *
+ * We also violate the rule that strings are immutable, but this is an
+ * internal implementation detail that is not visible to Python code.
*/
static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos)
@@ -32,17 +35,25 @@ static inline Py_ssize_t _finddir(PyObje
static int _incdirs(PyObject *dirs, PyObject *path)
{
+ const char *cpath = PyString_AS_STRING(path);
Py_ssize_t pos = PyString_GET_SIZE(path);
- PyObject *newval = NULL, *key = NULL;
+ PyObject *newval = NULL, *key;
int ret = -1;
+ /* It's likely that every prefix already has an entry in the
+ map. Try to avoid allocating and deallocating a string for
+ each prefix we check. */
+ key = PyString_FromStringAndSize(cpath, pos);
+
+ if (key == NULL)
+ goto bail;
+
while ((pos = _finddir(path, pos - 1)) != -1) {
- PyObject *val;
+ PyObject *val, *newkey;
- key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
-
- if (key == NULL)
- goto bail;
+ ((PyStringObject *) key)->ob_shash = -1;
+ PyString_GET_SIZE(key) = pos;
+ PyString_AS_STRING(key)[pos] = '\0';
val = PyDict_GetItem(dirs, key);
/* Avoid allocating and deallocating an int every time
@@ -57,7 +68,6 @@ static int _incdirs(PyObject *dirs, PyOb
goto bail;
}
PyInt_AS_LONG(val) += 1;
- Py_CLEAR(key);
continue;
}
@@ -72,7 +82,13 @@ static int _incdirs(PyObject *dirs, PyOb
ret = PyDict_SetItem(dirs, key, newval);
if (ret == -1)
goto bail;
- Py_CLEAR(key);
+ newkey = PyString_FromStringAndSize(PyString_AS_STRING(key),
+ pos - 1);
+ if (newkey == NULL)
+ goto bail;
+
+ Py_DECREF(key);
+ key = newkey;
Py_CLEAR(newval);
}
ret = 0;
More information about the Mercurial-devel
mailing list