D1973: bdiff: write a native version of splitnewlines

durin42 (Augie Fackler) phabricator at mercurial-scm.org
Thu Feb 1 21:58:34 UTC 2018


durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  ./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile before:
  ! wall 0.309280 comb 0.350000 user 0.290000 sys 0.060000 (best of 32)
  
  ./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile after:
  ! wall 0.241572 comb 0.260000 user 0.240000 sys 0.020000 (best of 39)
  
  so it's about 20% faster. I hate Python. I wish we could usefully
  write this in Rust, but it doesn't look like that's realistic without
  using the cpython crate, which I'd still like to avoid.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D1973

AFFECTED FILES
  mercurial/cext/bdiff.c
  mercurial/mdiff.py

CHANGE DETAILS

diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -40,6 +40,8 @@
             lines[-1] = lines[-1][:-1]
     return lines
 
+splitnewlines = getattr(bdiff, 'splitnewlines', splitnewlines)
+
 class diffopts(object):
     '''context is the number of context lines
     text treats all files as text
diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c
--- a/mercurial/cext/bdiff.c
+++ b/mercurial/cext/bdiff.c
@@ -182,13 +182,64 @@
 	return result ? result : PyErr_NoMemory();
 }
 
+bool sliceintolist(PyObject *list, Py_ssize_t destidx,
+		   const char *source, Py_ssize_t len) {
+	PyObject *sliced = PyString_FromStringAndSize(source, len);
+	if (sliced == NULL)
+		return false;
+	PyList_SetItem(list, destidx, sliced);
+	return true;
+}
+
+static PyObject *splitnewlines(PyObject *self, PyObject *args)
+{
+	const char *text;
+	int i, start = 0;
+	Py_ssize_t nelts = 0, size;
+	PyObject *result;
+
+	if (!PyArg_ParseTuple(args, "s#", &text, &size))
+		goto abort;
+	if (!size) {
+		return PyList_New(0);
+	}
+	/* This loops to size-1 because if the last byte is a newline,
+	 * we don't want to perform a split there. */
+	for (i = 0; i < size - 1; ++i) {
+		if (text[i] == '\n') {
+			++nelts;
+		}
+	}
+	if ((result = PyList_New(nelts+1)) == NULL)
+		goto abort;
+	nelts = 0;
+	for (i = 0; i < size - 1; ++i) {
+		if (text[i] == '\n') {
+			if (!sliceintolist(
+				    result, nelts++, text+start, i-start+1))
+				goto abort;
+			start = i+1;
+		}
+	}
+	if (start < size) {
+		if (!sliceintolist(result, nelts++, text+start, size-start))
+			goto abort;
+	}
+	return result;
+abort:
+	Py_XDECREF(result);
+	return NULL;
+}
+
 
 static char mdiff_doc[] = "Efficient binary diff.";
 
 static PyMethodDef methods[] = {
 	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
 	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
 	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
+	{"splitnewlines", splitnewlines, METH_VARARGS,
+	 "like str.splitlines, but only split on newlines\n"},
 	{NULL, NULL}
 };
 



To: durin42, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list