[PATCH 1 of 2 V2] phases: add set per phase in C phase computation

Fri May 15 15:51:59 CDT 2015

On 05/12/2015 02:13 PM, Laurent Charignon wrote:
> # HG changeset patch
> # User Laurent Charignon <lcharignon at fb.com>
> # Date 1427912237 25200
> #      Wed Apr 01 11:17:17 2015 -0700
> # Node ID 3ac4006b09e1bf149714eb444caa2452720ae4aa
> # Parent  e9edd53770fb77a9787a3e6592a3bf0a29c1bd80
> phases: add set per phase in C phase computation
>
> To speed up the computation of draft(), secret(), divergent(), obsolete() and
> unstable() we need to have a fast way of getting the list of revisions that
> are in draft(), secret() or the union of both: not public().
>
> This patch extends the work on phase computation in C and make the phase
> computation code also return a list of set for each non public phase.
> Using these sets we can quickly obtain all the revisions of a given phase.
> We do not return a set for the public phase as we expect it to be roughly the
> size of the repo. Also, it can be computed easily by substracting the entries in the
> non public phases from all the revs in the repo.
>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -1071,14 +1071,17 @@
>   		phases[i] = phases[parent_2];
>   }
>
> -static PyObject *compute_phases(indexObject *self, PyObject *args)
> +static PyObject *compute_revs_phases(indexObject *self, PyObject *args)

I'm not sure if the C function name have to change. (but the python name 
must, as pointed by Matt). However I'm not fan of the new name.
'compute_revs_phases' is a bit vague. What about something more 
informative 'compute_phases_map_sets' to convey the fact we return a 
{rev → phase} map (as a list) and sets for each phases?

>   {
>   	PyObject *roots = Py_None;
> +	PyObject *ret = NULL;
>   	PyObject *phaseslist = NULL;
>   	PyObject *phaseroots = NULL;
>   	PyObject *rev = NULL;
>   	PyObject *p1 = NULL;
>   	PyObject *p2 = NULL;
> +	PyObject *phaseset = NULL;
> +	PyObject *phasessetlist = NULL;
>   	Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
>   	Py_ssize_t len = index_length(self) - 1;
>   	Py_ssize_t numphase = 0;
> @@ -1088,6 +1091,7 @@
>   	int parent_1, parent_2;
>   	char *phases = NULL;
>   	const char *data;
> +	long phase;
>
>   	if (!PyArg_ParseTuple(args, "O", &roots))
>   		goto release_none;
> @@ -1100,13 +1104,24 @@
>   	/* Put the phase information of all the roots in phases */
>   	numphase = PyList_GET_SIZE(roots)+1;
>   	minrevallphases = len + 1;
> +	phasessetlist = PyList_New(numphase);
> +	if (phasessetlist == NULL)
> +		goto release_none;
> +
> +	PyList_SET_ITEM(phasessetlist, 0, Py_None);
> +	Py_INCREF(Py_None);
> +
>   	for (i = 0; i < numphase-1; i++) {
>   		phaseroots = PyList_GET_ITEM(roots, i);
> +		phaseset = PySet_New(NULL);
> +		if (phaseset == NULL)
> +			goto release_phasesetlist;
> +		PyList_SET_ITEM(phasessetlist, i+1, phaseset);
>   		if (!PyList_Check(phaseroots))
> -			goto release_phases;
> +			goto release_phasesetlist;
>   		minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
>   		if (minrevphase == -2) /* Error from add_roots_get_min */
> -			goto release_phases;
> +			goto release_phasesetlist;
>   		minrevallphases = MIN(minrevallphases, minrevphase);
>   	}
>   	/* Propagate the phase information from the roots to the revs */
> @@ -1121,7 +1136,7 @@
>   			p2 = PyTuple_GET_ITEM(rev, 6);
>   			if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
>   				PyErr_SetString(PyExc_TypeError, "revlog parents are invalid");
> -				goto release_phases;
> +				goto release_phasesetlist;
>   			}
>   			parent_1 = (int)PyInt_AS_LONG(p1);
>   			parent_2 = (int)PyInt_AS_LONG(p2);
> @@ -1131,14 +1146,35 @@
>   	/* Transform phase list to a python list */
>   	phaseslist = PyList_New(len);
>   	if (phaseslist == NULL)
> -		goto release_phases;
> -	for (i = 0; i < len; i++)
> -		PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phases[i]));
> +		goto release_phasesetlist;
> +	for (i = 0; i < len; i++) {
> +		phase = phases[i];
> +		/* We only store the sets of phase for non public phase, the public phase
> +		 * is computed as a difference */
> +		if (phase != 0) {
> +			phaseset = PyList_GET_ITEM(phasessetlist, phase);
> +			PySet_Add(phaseset, PyInt_FromLong(i));
> +		}
> +		PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phase));
> +	}
> +	ret = PyList_New(2);
> +	if (ret == NULL)
> +		goto release_phaseslist;
>
> +	PyList_SET_ITEM(ret, 0, phaseslist);
> +	PyList_SET_ITEM(ret, 1, phasessetlist);
> +	/* We don't release phaseslist and phasessetlist as we return them to
> +	 * python */
> +	goto release_phases;
> +
> +release_phaseslist:
> +	Py_XDECREF(phaseslist);
> +release_phasesetlist:
> +	Py_XDECREF(phasessetlist);
>   release_phases:
>   	free(phases);
>   release_none:
> -	return phaseslist;
> +	return ret;
>   }
>
>   static PyObject *index_headrevs(indexObject *self, PyObject *args)
> @@ -2278,7 +2314,7 @@
>   	 "clear the index caches"},
>   	{"get", (PyCFunction)index_m_get, METH_VARARGS,
>   	 "get an index entry"},
> -	{"computephases", (PyCFunction)compute_phases, METH_VARARGS,
> +	{"computephases", (PyCFunction)compute_revs_phases, METH_VARARGS,
>   		"compute phases"},
>   	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
>   	 "get head revisions"}, /* Can do filtering since 3.2 */
> diff --git a/mercurial/phases.py b/mercurial/phases.py
> --- a/mercurial/phases.py
> +++ b/mercurial/phases.py
> @@ -99,7 +99,6 @@
>   - content is pushed as draft
>
>   """
> -

unrelated change

>   import os
>   import errno
>   from node import nullid, nullrev, bin, hex, short
> @@ -155,6 +154,7 @@
>               # Cheap trick to allow shallow-copy without copy module
>               self.phaseroots, self.dirty = _readroots(repo, phasedefaults)
>               self._phaserevs = None
> +            self._phasesets = None
>               self.filterunknown(repo)
>               self.opener = repo.svfs
>
> @@ -199,7 +199,8 @@
>                                         'nativephaseskillswitch'):
>                       self._computephaserevspure(repo)
>                   else:
> -                    self._phaserevs = self._getphaserevsnative(repo)
> +                    res = self._getphaserevsnative(repo)
> +                    self._phaserevs, self._phasesets = res

As pointed by matt, no change in the function python side, So old/new 
code will not recognised new/old C code.

>               except AttributeError:
>                   self._computephaserevspure(repo)

Unrelated to this patch, but explicitly testing for the attribute 
(util.safehasattr) would be safer. the current code can shadow other 
legitimate Attribute Error.

-- 
Pierre-Yves David