[PATCH 2 of 2 resend] revlog: switch to a C version of headrevs
Bryan O'Sullivan
bos at serpentine.com
Sat May 12 12:13:46 CDT 2012
# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1336842350 -7200
# Node ID 59113b339afae7273cf4174e7c70eff2ac145b77
# Parent 6e74ed120c0115cf7619ffc0e5f0cab01b5dcb1b
revlog: switch to a C version of headrevs
The C implementation is more than 100 times faster than the Python
version (which is still available as a fallback). The result of the
headrevs calculation is cached, because it's still somewhat expensive,
and headrevs is called several times when the tag cache is stale
or missing.
In practice, in a repo with 330,000 revs and a stale .hg/cache/tags
file, this patch improves the performance of "hg tip" from 2.2 to
1.6 seconds.
diff -r 6e74ed120c01 -r 59113b339afa mercurial/parsers.c
--- a/mercurial/parsers.c Sat May 12 19:03:22 2012 +0200
+++ b/mercurial/parsers.c Sat May 12 19:05:50 2012 +0200
@@ -246,6 +246,7 @@
Py_ssize_t raw_length; /* original number of elements */
Py_ssize_t length; /* current number of elements */
PyObject *added; /* populated on demand */
+ PyObject *headrevs; /* cache, invalidated on changes */
nodetree *nt; /* base-16 trie */
int ntlength; /* # nodes in use */
int ntcapacity; /* # nodes allocated */
@@ -463,6 +464,11 @@
if (self->nt)
nt_insert(self, node, (int)offset);
+ if (self->headrevs) {
+ Py_DECREF(self->headrevs);
+ self->headrevs = NULL;
+ }
+
Py_RETURN_NONE;
}
@@ -488,6 +494,10 @@
free(self->nt);
self->nt = NULL;
}
+ if (self->headrevs) {
+ Py_DECREF(self->headrevs);
+ self->headrevs = NULL;
+ }
}
static PyObject *index_clearcaches(indexObject *self)
@@ -538,6 +548,104 @@
return NULL;
}
+/*
+ * When we cache a list, we want to be sure the caller can't mutate
+ * the cached copy.
+ */
+static PyObject *list_copy(PyObject *list)
+{
+ Py_ssize_t len = PyList_GET_SIZE(list);
+ PyObject *newlist = PyList_New(len);
+ Py_ssize_t i;
+
+ if (newlist == NULL)
+ return NULL;
+
+ for (i = 0; i < len; i++) {
+ PyObject *obj = PyList_GET_ITEM(list, i);
+ Py_INCREF(obj);
+ PyList_SET_ITEM(newlist, i, obj);
+ }
+
+ return newlist;
+}
+
+static PyObject *index_headrevs(indexObject *self)
+{
+ Py_ssize_t i, len, addlen;
+ char *ishead = NULL;
+ PyObject *heads;
+
+ if (self->headrevs)
+ return list_copy(self->headrevs);
+
+ len = index_length(self) - 1;
+ heads = PyList_New(0);
+ if (heads == NULL)
+ goto bail;
+ if (len == 0) {
+ PyObject *nullid = PyInt_FromLong(-1);
+ if (nullid == NULL || PyList_Append(heads, nullid) == -1)
+ goto bail;
+ goto done;
+ }
+
+ ishead = malloc(len);
+ if (ishead == NULL)
+ goto bail;
+ memset(ishead, 1, len);
+
+ for (i = 0; i < self->raw_length; i++) {
+ const char *data = index_deref(self, i);
+ int parent_1 = getbe32(data + 24);
+ int parent_2 = getbe32(data + 28);
+ if (parent_1 >= 0)
+ ishead[parent_1] = 0;
+ if (parent_2 >= 0)
+ ishead[parent_2] = 0;
+ }
+
+ addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
+
+ for (i = 0; i < addlen; i++) {
+ PyObject *rev = PyList_GET_ITEM(self->added, i);
+ PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
+ PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
+ long parent_1, parent_2;
+
+ if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
+ PyErr_SetString(PyExc_TypeError,
+ "revlog parents are invalid");
+ goto bail;
+ }
+ parent_1 = PyInt_AS_LONG(p1);
+ parent_2 = PyInt_AS_LONG(p2);
+ if (parent_1 >= 0)
+ ishead[parent_1] = 0;
+ if (parent_2 >= 0)
+ ishead[parent_2] = 0;
+ }
+
+ for (i = 0; i < len; i++) {
+ PyObject *head;
+
+ if (!ishead[i])
+ continue;
+ head = PyInt_FromLong(i);
+ if (head == NULL || PyList_Append(heads, head) == -1)
+ goto bail;
+ }
+
+done:
+ self->headrevs = heads;
+ free(ishead);
+ return list_copy(self->headrevs);
+bail:
+ Py_XDECREF(heads);
+ free(ishead);
+ return NULL;
+}
+
static inline int nt_level(const char *node, Py_ssize_t level)
{
int v = node[level>>1];
@@ -936,6 +1044,7 @@
{
Py_ssize_t start, stop, step, slicelength;
Py_ssize_t length = index_length(self);
+ int ret = 0;
if (PySlice_GetIndicesEx((PySliceObject*)item, length,
&start, &stop, &step, &slicelength) < 0)
@@ -981,7 +1090,9 @@
self->ntrev = (int)start;
}
self->length = start + 1;
- return 0;
+ if (start < self->raw_length)
+ self->raw_length = start;
+ goto done;
}
if (self->nt) {
@@ -989,10 +1100,15 @@
if (self->ntrev > start)
self->ntrev = (int)start;
}
- return self->added
- ? PyList_SetSlice(self->added, start - self->length + 1,
- PyList_GET_SIZE(self->added), NULL)
- : 0;
+ if (self->added)
+ ret = PyList_SetSlice(self->added, start - self->length + 1,
+ PyList_GET_SIZE(self->added), NULL);
+done:
+ if (self->headrevs) {
+ Py_DECREF(self->headrevs);
+ self->headrevs = NULL;
+ }
+ return ret;
}
/*
@@ -1082,6 +1198,7 @@
self->cache = NULL;
self->added = NULL;
+ self->headrevs = NULL;
self->offsets = NULL;
self->nt = NULL;
self->ntlength = self->ntcapacity = 0;
@@ -1146,6 +1263,8 @@
"clear the index caches"},
{"get", (PyCFunction)index_m_get, METH_VARARGS,
"get an index entry"},
+ {"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
+ "get head revisions"},
{"insert", (PyCFunction)index_insert, METH_VARARGS,
"insert an index entry"},
{"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
diff -r 6e74ed120c01 -r 59113b339afa mercurial/revlog.py
--- a/mercurial/revlog.py Sat May 12 19:03:22 2012 +0200
+++ b/mercurial/revlog.py Sat May 12 19:05:50 2012 +0200
@@ -635,6 +635,10 @@
return (orderedout, roots, heads)
def headrevs(self):
+ try:
+ return self.index.headrevs()
+ except AttributeError:
+ pass
count = len(self)
if not count:
return [nullrev]
More information about the Mercurial-devel
mailing list