[PATCH 12 of 14] sparse-revlog: introduce native (C) implementation of slicechunktodensity
Boris FELD
lothiraldan at gmail.com
Tue Nov 13 10:35:06 EST 2018
On 12/11/2018 15:50, Yuya Nishihara wrote:
> On Mon, 12 Nov 2018 10:55:47 +0100, Boris Feld wrote:
>> # HG changeset patch
>> # User Boris Feld <boris.feld at octobus.net>
>> # Date 1541785632 -3600
>> # Fri Nov 09 18:47:12 2018 +0100
>> # Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
>> # Parent 0ea42453fa491793d1e145f5093b65e84cb65e97
>> # EXP-Topic sparse-perf
>> # Available At https://bitbucket.org/octobus/mercurial-devel/
>> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 4a1104eade1d
>> sparse-revlog: introduce native (C) implementation of slicechunktodensity
> Just quickly scanned this patch, no other patches in this series nor "sparse"
> logic is reviewed.
>
>> +struct Gap {
>> + long size;
>> + long idx;
>> +};
>> +
>> +static int gap_compare(const void *left, const void *right)
>> +{
>> + return ((struct Gap *)left)->size - ((struct Gap *)right)->size;
>> +}
>> +static int long_compare(const void *left, const void *right)
>> +{
>> + return *(long *)left - *(long *)right;
>> +}
> Nit: (const <type> *)<left|right> as the argument is const void*.
>
> If we're sure 'left - right' never exceeds the int range, it might be better
> to explicitly cast the result to (int) to silence possible warning.
>
>> +static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
>> +{
>> + /* method arguments */
>> + PyObject *list_revs = NULL; /* revisions in the chain */
>> + double targetdensity = 0.5; /* min density to achieve */
>> + long mingapsize = 0; /* threshold to ignore gaps */
>> +
>> + /* other core variables */
>> + long i; /* used for various iteration */
>> + PyObject *result = NULL; /* the final return of the function */
>> +
>> + /* generic information about the delta chain being slice */
>> + Py_ssize_t num_revs = 0; /* size of the full delta chain */
>> + Py_ssize_t *revs = NULL; /* native array of revision in the chain */
>> + long chainpayload = 0; /* sum of all delta in the chain */
>> + long deltachainspan = 0; /* distance from first byte to last byte */
>> +
>> + /* variable used for slicing the delta chain */
>> + long readdata = 0; /* amount of data currently planned to be read */
>> + double density = 0; /* ration of payload data compared to read ones */
>> + struct Gap *gaps = NULL; /* array of notable gap in the chain */
>> + long num_gaps = 0; /* total number of notable gap recorded so far */
>> + Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
>> + long num_selected = 0; /* number of gaps skipped */
> Maybe i, num_rgaps, num_selected should be Py_ssize_t? In general, long can
> be narrower than ssize_t.
>
>> + PyObject *chunk = NULL; /* individual slice */
>> + PyObject *allchunks = PyList_New(num_selected); /* all slices */
> Needs to make sure that allchunks isn't NULL.
>
> And it's probably better to just initialize the list with literal 0, since
> we have no for-loop to fill in the list items up to num_selected.
>
>> + /* parsing argument */
>> + if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs,
>> + &targetdensity, &mingapsize)) {
>> + goto bail;
>> + }
>> +
>> + /* If the delta chain contains a single element, we do not need slicing
>> + */
>> + num_revs = PyList_GET_SIZE(list_revs);
>> + if (num_revs <= 1) {
>> + result = PyTuple_Pack(1, list_revs);
>> + goto done;
>> + }
>> +
>> + /* Turn the python list into a native integer array (for efficiency) */
>> + revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t));
> num_revs * sizeof(...) can overflow. Using calloc() is safer if the zeroing
> cost isn't significant.
>
>> + if (revs == NULL) {
>> + PyErr_NoMemory();
>> + goto bail;
>> + }
>> + for (i = 0; i < num_revs; i++) {
>> + Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
>> + if (revnum == -1 && PyErr_Occurred()) {
>> + goto bail;
>> + }
>> + revs[i] = revnum;
>> + }
> Are we sure revnum is in valid range?
What do you mean ?
>
>> +
>> + /* Compute and check various property of the unsliced delta chain */
>> + deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
>> +
>> + if (deltachainspan <= mingapsize) {
>> + result = PyTuple_Pack(1, list_revs);
>> + goto done;
>> + }
>> + chainpayload = 0;
>> + for (i = 0; i < num_revs; i++) {
>> + chainpayload += index_get_length(self, revs[i]);
>> + }
>> +
>> + readdata = deltachainspan;
>> + density = 1.0;
>> +
>> + if (0 < deltachainspan) {
>> + density = (double)chainpayload / (double)deltachainspan;
>> + };
>> +
>> + if (density >= targetdensity) {
>> + result = PyTuple_Pack(1, list_revs);
>> + goto done;
>> + }
>> +
>> + /* if chain is too sparse, look for relevant gaps */
>> + gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap));
> This, too.
>> + if (gaps == NULL) {
>> + PyErr_NoMemory();
>> + goto bail;
>> + }
>> +
>> + Py_ssize_t previous_end = -1;
>> + for (i = 0; i < num_revs; i++) {
>> + Py_ssize_t revstart = index_get_start(self, revs[i]);
>> + Py_ssize_t revsize = index_get_length(self, revs[i]);
>> + if (revsize <= 0) {
>> + continue;
>> + }
>> + if (previous_end >= 0) {
>> + Py_ssize_t gapsize = revstart - previous_end;
>> + if (gapsize > mingapsize) {
>> + gaps[num_gaps].size = gapsize;
>> + gaps[num_gaps].idx = i;
>> + num_gaps += 1;
>> + }
>> + }
>> + previous_end = revstart + revsize;
>> + }
>> + if (num_gaps == 0) {
>> + result = PyTuple_Pack(1, list_revs);
>> + goto done;
>> + }
>> + qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
>> +
>> + /* Slice the largest gap first, they improve the density the most */
>> + selected_indices =
>> + (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
>> + if (selected_indices == NULL) {
>> + PyErr_NoMemory();
>> + goto bail;
>> + }
>> +
>> + for (i = num_gaps - 1; i >= 0; i--) {
>> + selected_indices[num_selected] = gaps[i].idx;
>> + readdata -= gaps[i].size;
>> + num_selected += 1;
>> + if (readdata <= 0) {
>> + density = 1.0;
>> + } else {
>> + density = (double)chainpayload / (double)readdata;
>> + }
>> + if (density >= targetdensity) {
>> + break;
>> + }
>> + }
>> + qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
>> + &long_compare);
> Py_ssize_t != long on Windows, IIRC.
>
>> + /* create the resulting slice */
>> + long previdx = 0;
>> + selected_indices[num_selected] = num_revs;
>> + for (i = 0; i <= num_selected; i++) {
>> + long idx = selected_indices[i];
>> + chunk = _trimchunk(self, list_revs, previdx, idx);
>> + if (chunk == NULL) {
>> + goto bail;
>> + }
>> + if (PyList_GET_SIZE(chunk) > 0) {
>> + if (PyList_Append(allchunks, chunk) == -1) {
>> + goto bail;
>> + }
>> + }
>> + Py_DECREF(chunk);
>> + chunk = NULL;
>> + previdx = idx;
>> + }
>> + result = allchunks;
>> + goto done;
>> +
>> +bail:
>> + if (allchunks != NULL) {
>> + Py_DECREF(allchunks);
>> + }
>> + if (chunk != NULL) {
>> + Py_DECREF(chunk);
>> + }
> Py_XDECREF() can be used instead of != NULL.
>
>> +done:
>> + if (revs != NULL) {
>> + free(revs);
>> + }
>> + if (gaps != NULL) {
>> + free(gaps);
>> + }
>> + if (selected_indices != NULL) {
>> + free(selected_indices);
>> + }
> Nit: free(NULL) is guaranteed to be noop. No need to check != NULL.
>
>> + if (result != NULL) {
>> + Py_INCREF(result);
>> + }
> Looks like an excessive incref as PyTuple_Pack() returns a new reference
> for example.
>
>> + return result;
>> +}
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list