[PATCH 4 of 6] dirstate: perform dirs calculation in C
Bryan O'Sullivan
bos at serpentine.com
Thu Mar 28 20:22:46 CDT 2013
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1364520157 25200
# Thu Mar 28 18:22:37 2013 -0700
# Node ID 492566bf24e94dcbb87bdfcfb10c102742c90ab5
# Parent 9479dece303d06808cce9e2e3d0a196511fe6ba8
dirstate: perform dirs calculation in C
This speeds up the computation substantially.
perfdirs performance in a working dir with 170,000 files:
Python 650 msec
C 249
This is a naive translation of the Python code. Future commits will
nearly double the speed of this C code.
diff --git a/mercurial/dirs.c b/mercurial/dirs.c
new file mode 100644
--- /dev/null
+++ b/mercurial/dirs.c
@@ -0,0 +1,211 @@
+/*
+ dirs.c - dynamic directory diddling for dirstates
+
+ Copyright 2013 Facebook
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "util.h"
+
+static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos)
+{
+ const char *s = PyString_AS_STRING(path);
+
+ while (pos != -1) {
+ if (s[pos] == '/')
+ break;
+ pos -= 1;
+ }
+
+ return pos;
+}
+
+static int _incdirs(PyObject *dirs, PyObject *path)
+{
+ Py_ssize_t pos = PyString_GET_SIZE(path);
+ PyObject *newval = NULL, *key = NULL;
+ int ret = -1;
+
+ while ((pos = _finddir(path, pos - 1)) != -1) {
+ PyObject *val;
+ long v = 0;
+
+ key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+ if (key == NULL)
+ goto bail;
+
+ val = PyDict_GetItem(dirs, key);
+ if (val != NULL) {
+ if (!PyInt_Check(val)) {
+ PyErr_SetString(PyExc_TypeError,
+ "expected int value");
+ goto bail;
+ }
+ v = PyInt_AS_LONG(val);
+ }
+
+ newval = PyInt_FromLong(v + 1);
+
+ if (newval == NULL)
+ goto bail;
+
+ ret = PyDict_SetItem(dirs, key, newval);
+ if (ret == -1)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(newval);
+ }
+ ret = 0;
+
+bail:
+ Py_XDECREF(key);
+ Py_XDECREF(newval);
+
+ return ret;
+}
+
+static int _decdirs(PyObject *dirs, PyObject *path)
+{
+ Py_ssize_t pos = PyString_GET_SIZE(path);
+ PyObject *newval = NULL, *key = NULL;
+ int ret = -1;
+
+ while ((pos = _finddir(path, pos - 1)) != -1) {
+ PyObject *val;
+ long v;
+
+ key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+ if (key == NULL)
+ goto bail;
+
+ val = PyDict_GetItem(dirs, key);
+ if (val == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "expected a value, found none");
+ goto bail;
+ }
+ else if (!PyInt_Check(val)) {
+ PyErr_SetString(PyExc_TypeError,
+ "expected int value");
+ goto bail;
+ }
+ v = PyInt_AS_LONG(val);
+
+ if (v < 1) {
+ PyErr_SetString(PyExc_ValueError,
+ "expected a positive value");
+ goto bail;
+ }
+ else if (v == 1) {
+ if (PyDict_DelItem(dirs, key) == -1)
+ goto bail;
+ continue;
+ }
+ newval = PyInt_FromLong(v - 1);
+
+ if (newval == NULL)
+ goto bail;
+
+ ret = PyDict_SetItem(dirs, key, newval);
+ if (ret == -1)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(newval);
+ }
+ ret = 0;
+
+bail:
+ Py_XDECREF(key);
+ Py_XDECREF(newval);
+
+ return ret;
+}
+
+/*
+ * Calculate a refcounted set of directory names for the files in a
+ * dirstate. These refcounts are internal; client code must only look at
+ * the directory names.
+ */
+PyObject *calcdirs(PyObject *self, PyObject *args)
+{
+ PyObject *dirs = NULL, *map;
+ PyObject *key, *value;
+ Py_ssize_t pos = 0;
+
+ if (!PyArg_ParseTuple(args, "O!:calcdirs", &PyDict_Type, &map))
+ goto done;
+
+ dirs = PyDict_New();
+
+ if (dirs == NULL)
+ goto done;
+
+ while (PyDict_Next(map, &pos, &key, &value)) {
+ PyObject *st;
+
+ if (!PyString_Check(key)) {
+ PyErr_SetString(PyExc_TypeError, "expected string key");
+ goto bail;
+ }
+ if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) == 0) {
+ PyErr_SetString(PyExc_TypeError,
+ "expected non-empty tuple value");
+ goto bail;
+ }
+
+ st = PyTuple_GET_ITEM(value, 0);
+
+ if (!PyString_Check(st) || PyString_GET_SIZE(st) == 0) {
+ PyErr_SetString(PyExc_TypeError,
+ "expected non-empty string in slot 0");
+ goto bail;
+ }
+
+ if (PyString_AS_STRING(st)[0] == 'r')
+ continue;
+
+ if (_incdirs(dirs, key) == -1)
+ goto bail;
+ }
+ goto done;
+
+bail:
+ Py_CLEAR(dirs);
+
+done:
+ return dirs;
+}
+
+PyObject *incdirs(PyObject *self, PyObject *args)
+{
+ PyObject *dirs, *path;
+
+ if (!PyArg_ParseTuple(args, "O!O!:incdirs",
+ &PyDict_Type, &dirs, &PyString_Type, &path))
+ return NULL;
+
+ if (_incdirs(dirs, path) == -1)
+ return NULL;
+
+ Py_RETURN_NONE;
+}
+
+PyObject *decdirs(PyObject *self, PyObject *args)
+{
+ PyObject *dirs, *path;
+
+ if (!PyArg_ParseTuple(args, "O!O!:decdirs",
+ &PyDict_Type, &dirs, &PyString_Type, &path))
+ return NULL;
+
+ if (_decdirs(dirs, path) == -1)
+ return NULL;
+
+ Py_RETURN_NONE;
+}
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -52,6 +52,11 @@ def _calcdirs(m):
_incdirs(dirs, f)
return dirs
+if util.safehasattr(parsers, 'calcdirs'):
+ _calcdirs = parsers.calcdirs
+ _incdirs = parsers.incdirs
+ _decdirs = parsers.decdirs
+
class dirstate(object):
def __init__(self, opener, ui, root, validate):
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1878,6 +1878,9 @@ static char parsers_doc[] = "Efficient c
PyObject *encodedir(PyObject *self, PyObject *args);
PyObject *pathencode(PyObject *self, PyObject *args);
PyObject *lowerencode(PyObject *self, PyObject *args);
+PyObject *calcdirs(PyObject *self, PyObject *args);
+PyObject *incdirs(PyObject *self, PyObject *args);
+PyObject *decdirs(PyObject *self, PyObject *args);
static PyMethodDef methods[] = {
{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
@@ -1887,6 +1890,11 @@ static PyMethodDef methods[] = {
{"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
{"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
{"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
+ {"calcdirs", calcdirs, METH_VARARGS, "build a dict of directories\n"},
+ {"incdirs", incdirs, METH_VARARGS,
+ "increment paths in dict of directories\n"},
+ {"decdirs", decdirs, METH_VARARGS,
+ "decrement paths in dict of directories\n"},
{NULL, NULL}
};
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -427,7 +427,8 @@ extmodules = [
Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
- Extension('mercurial.parsers', ['mercurial/parsers.c',
+ Extension('mercurial.parsers', ['mercurial/dirs.c',
+ 'mercurial/parsers.c',
'mercurial/pathencode.c']),
]
More information about the Mercurial-devel
mailing list