D1973: bdiff: write a native version of splitnewlines
durin42 (Augie Fackler)
phabricator at mercurial-scm.org
Thu Feb 1 21:58:34 UTC 2018
durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile before:
! wall 0.309280 comb 0.350000 user 0.290000 sys 0.060000 (best of 32)
./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile after:
! wall 0.241572 comb 0.260000 user 0.240000 sys 0.020000 (best of 39)
so it's about 20% faster. I hate Python. I wish we could usefully
write this in Rust, but it doesn't look like that's realistic without
using the cpython crate, which I'd still like to avoid.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D1973
AFFECTED FILES
mercurial/cext/bdiff.c
mercurial/mdiff.py
CHANGE DETAILS
diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -40,6 +40,8 @@
lines[-1] = lines[-1][:-1]
return lines
+splitnewlines = getattr(bdiff, 'splitnewlines', splitnewlines)
+
class diffopts(object):
'''context is the number of context lines
text treats all files as text
diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c
--- a/mercurial/cext/bdiff.c
+++ b/mercurial/cext/bdiff.c
@@ -182,13 +182,64 @@
return result ? result : PyErr_NoMemory();
}
+bool sliceintolist(PyObject *list, Py_ssize_t destidx,
+ const char *source, Py_ssize_t len) {
+ PyObject *sliced = PyString_FromStringAndSize(source, len);
+ if (sliced == NULL)
+ return false;
+ PyList_SetItem(list, destidx, sliced);
+ return true;
+}
+
+static PyObject *splitnewlines(PyObject *self, PyObject *args)
+{
+ const char *text;
+ int i, start = 0;
+ Py_ssize_t nelts = 0, size;
+ PyObject *result;
+
+ if (!PyArg_ParseTuple(args, "s#", &text, &size))
+ goto abort;
+ if (!size) {
+ return PyList_New(0);
+ }
+ /* This loops to size-1 because if the last byte is a newline,
+ * we don't want to perform a split there. */
+ for (i = 0; i < size - 1; ++i) {
+ if (text[i] == '\n') {
+ ++nelts;
+ }
+ }
+ if ((result = PyList_New(nelts+1)) == NULL)
+ goto abort;
+ nelts = 0;
+ for (i = 0; i < size - 1; ++i) {
+ if (text[i] == '\n') {
+ if (!sliceintolist(
+ result, nelts++, text+start, i-start+1))
+ goto abort;
+ start = i+1;
+ }
+ }
+ if (start < size) {
+ if (!sliceintolist(result, nelts++, text+start, size-start))
+ goto abort;
+ }
+ return result;
+abort:
+ Py_XDECREF(result);
+ return NULL;
+}
+
static char mdiff_doc[] = "Efficient binary diff.";
static PyMethodDef methods[] = {
{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
+ {"splitnewlines", splitnewlines, METH_VARARGS,
+ "like str.splitlines, but only split on newlines\n"},
{NULL, NULL}
};
To: durin42, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list