[PATCH 3 of 5] revsbetween: add a C implementation

Pierre-Yves David pierre-yves.david at ens-lyon.org
Fri Aug 7 02:41:52 CDT 2015



On 08/06/2015 11:10 PM, Laurent Charignon wrote:
> # HG changeset patch
> # User Laurent Charignon <lcharignon at fb.com>
> # Date 1438921725 25200
> #      Thu Aug 06 21:28:45 2015 -0700
> # Branch stable
> # Node ID 074dd6f6412076af9704bc9848c97d4c647f330d
> # Parent  c0fc894d0539cb526724b1cd15488ee6dd9314ff
> revsbetween: add a C implementation
>
> This patch is part of a series of patches to speed up the computation of
> revset.revsbetween by introducing a C implementation. The main motivation is to
> speed up smartlog on big repositories. At the end of the series, on our big
> repositories the computation of revsbetween is 10-50x faster and smartlog on is
> 2x-5x faster.
>
> This patch introduces a C implementation for revsbetween following closely the
> Python implementation but optimized by using C data structures.

The logic seems good to me but I've a couple of comment inline. I also 
would like someone with more Cpython experience to look at this.

>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -1105,6 +1105,136 @@
>   		phases[i] = phases[parent_2];
>   }
>
> +static PyObject *revsbetween(indexObject *self, PyObject *args)
> +{
> +
> +	/* Input */
> +	long minroot;
> +	PyObject *onlyrootsarg = NULL;
> +	int onlyroots = 0;
> +	PyObject *heads = NULL;
> +	Py_ssize_t numheads;
> +	PyObject *roots = NULL;
> +	PyObject *reachable = NULL;
> +
> +	PyObject *val;
> +	Py_ssize_t len = index_length(self) - 1;
> +	long revnum;
> +	Py_ssize_t k;
> +	Py_ssize_t i;
> +	int r;
> +	int minidx;
> +	int parents[2];
> +
> +	/* Internal data structure:
> +	 * tovisit: array of length len, filled upto lentovisit
> +	 * seen: array of length len+1 (because nullrev) 0: not seen, 1 seen*/
> +	long *tovisit = NULL;
> +	long lentovisit = 0;
> +	char *seen = NULL;
> +
> +	/* Get arguments */
> +	if (!PyArg_ParseTuple(args, "lOOO", &minroot, &heads, &roots, &onlyrootsarg))
> +		goto bail;
> +	if (roots == NULL || !PySet_Check(roots))
> +		goto bail;
> +	if (heads == NULL || !PyList_Check(heads))
> +		goto bail;
> +	if (onlyrootsarg == NULL || !PyBool_Check(onlyrootsarg))
> +		goto bail;
> +	if (onlyrootsarg == Py_True)
> +		onlyroots = 1;
> +
> +	/* Initialize return set */
> +	reachable = PySet_New(0);
> +	if (reachable == NULL)
> +		goto bail;
> +
> +	/* Initialize internal datastructures */
> +	tovisit = (long *)malloc(2*len * sizeof(long));

Looks like you let a 2* in there.

> +	if (tovisit == NULL)
> +		goto release_reachable;
> +
> +	seen = (char *)calloc(len+1, 1);
> +	if (seen == NULL)
> +		goto release_tovisit;

Can we get some more comment about what each of this failure mean?

> +	/* Populate tovisit with all the heads */
> +	numheads = PyList_GET_SIZE(heads);
> +	for (i = 0; i < numheads; i++) {
> +		revnum = PyInt_AS_LONG(PyList_GET_ITEM(heads, i));
> +		if (seen[revnum+1] == 0) {
> +			tovisit[lentovisit++] = revnum;
> +			seen[revnum+1]=1;
> +		}
> +	}
> +
> +	/* Visit the tovisit list and find the reachable roots */
> +	k = 0;
> +	while (k < lentovisit) {
> +		/* Add the node to reachable if it is a root*/
> +		revnum = tovisit[k++];
> +		val = PyInt_FromLong(revnum);
> +		if (PySet_Contains(roots, val) == 1) {
> +			PySet_Add(reachable, val);
> +			if (onlyroots == 1) {
> +				Py_XDECREF(val);
> +				continue;
> +			}
> +		}
> +		Py_XDECREF(val);
> +
> +		/* And its parents to the list of nodes to visit */
> +		if (revnum != -1) {
> +			r = index_get_parents(self, revnum, parents, (int)len - 1);
> +			if (r < 0)
> +				goto release_seen;
> +
> +			for (i = 0; i < 2; i++) {
> +				if (seen[parents[i]+1] == 0 && parents[i] >= minroot) {
> +					tovisit[lentovisit++] = parents[i];
> +					seen[parents[i]+1]=1;
> +				}
> +			}
> +		}
> +	}
> +
> +	/* Find all the nodes in between the roots we found and the heads
> +	 * and add them to the reachable set */
> +	if (onlyroots == 0) {
> +		minidx = minroot;
> +		if (minidx < 0)
> +			minidx = 0;
> +		for (i = minidx; i < len; i++) {
> +			if (seen[i+1] == 1) {
> +				r = index_get_parents(self, i, parents, (int)len - 1);
> +				if (r < 0)
> +					goto release_seen;

Same feedback about explaining the error case.

> +				for (k = 0; k < 2; k++) {
> +					PyObject *p = PyInt_FromLong(parents[k]);
> +					if (PySet_Contains(reachable, p) == 1)
> +						PySet_Add(reachable, PyInt_FromLong(i));
> +					Py_XDECREF(p);
> +				}
> +			}
> +		}
> +	}
> +
> +release_seen:
> +	if (seen != NULL)
> +		free(seen);
> +release_tovisit:
> +	if (tovisit != NULL)
> +		free(tovisit);
> +	return reachable;
> +release_reachable:
> +	Py_XDECREF(reachable);
> +bail:
> +	val = Py_None;
> +	Py_INCREF(Py_None);
> +	return val;
> +}
> +
>   static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
>   {
>   	PyObject *roots = Py_None;
> @@ -2279,6 +2409,8 @@
>   	 "get an index entry"},
>   	{"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
>   			METH_VARARGS, "compute phases"},
> +	{"revsbetween", (PyCFunction)revsbetween, METH_VARARGS,
> +		"revsbetween"},
>   	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
>   	 "get head revisions"}, /* Can do filtering since 3.2 */
>   	{"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel
>

-- 
Pierre-Yves David


More information about the Mercurial-devel mailing list