[PATCH 1 of 2 STABLE] diffhelpers: add canstripcr=True to fix_newline

Phil Cohen phillco at fb.com
Mon Apr 24 20:10:00 UTC 2017


# HG changeset patch
# User Phil Cohen <phillco at fb.com>
# Date 1493055298 25200
#      Mon Apr 24 10:34:58 2017 -0700
# Branch stable
# Node ID 7f413064263ff1b3cf98d1b23ae35e4c098da482
# Parent  0537d2d94c0fb747e75272f9f950cc29b6e0dce8
diffhelpers: add canstripcr=True to fix_newline

When processing a patch ending in "\ No newline at end of file", we used to
remove the \n from the end of that line, and if a \r was present before the \n,
we would remove that as well, probably assuming the patch was from a Windows
system.

However, this isn't always correct: patches created on Unix can legitimately end
in a CR, and so this logic will falsely strip the CR and fail to import.

A better way to determine if the CR is part of the patch, or part of the newline
inserted by the patcher, is to see what the "\ No newline" line itself ends
with. If CRLF, then CRLFs may be stripped; if only LF, then only LFs may be.

This is needed to fix issue4042.

diff --git a/mercurial/diffhelpers.c b/mercurial/diffhelpers.c
--- a/mercurial/diffhelpers.c
+++ b/mercurial/diffhelpers.c
@@ -18,8 +18,9 @@
 
 
 /* fixup the last lines of a and b when the patch has no newline at eof */
-static void _fix_newline(PyObject *hunk, PyObject *a, PyObject *b)
-{
+static void _fix_newline(PyObject *hunk, PyObject *a, PyObject *b,
+			 bool canstripcr)
+ {
 	Py_ssize_t hunksz = PyList_Size(hunk);
 	PyObject *s = PyList_GET_ITEM(hunk, hunksz-1);
 	char *l = PyBytes_AsString(s);
@@ -29,7 +30,7 @@
 	PyObject *hline;
 	Py_ssize_t sz = PyBytes_GET_SIZE(s);
 
-	if (sz > 1 && l[sz-2] == '\r')
+	if (sz > 1 && l[sz-2] == '\r' && canstripcr)
 		/* tolerate CRLF in last line */
 		sz -= 1;
 
@@ -54,9 +55,10 @@
 fix_newline(PyObject *self, PyObject *args)
 {
 	PyObject *hunk, *a, *b;
-	if (!PyArg_ParseTuple(args, "OOO", &hunk, &a, &b))
+	bool canstripcr = true;
+	if (!PyArg_ParseTuple(args, "OOO|B", &hunk, &a, &b, &canstripcr))
 		return NULL;
-	_fix_newline(hunk, a, b);
+	_fix_newline(hunk, a, b, canstripcr);
 	return Py_BuildValue("l", 0);
 }
 
@@ -97,8 +99,18 @@
 			x = PyFile_GetLine(fp, 0);
 			s = PyBytes_AsString(x);
 			c = *s;
-			if (strcmp(s, "\\ No newline at end of file\n") == 0) {
-				_fix_newline(hunk, a, b);
+			char *prefix = "\\ No newline at end of file";
+
+			if (strncmp(s, prefix, strlen(prefix)) == 0) {
+				bool canstripcr = true;
+				size_t len = strlen(s);
+				/* The "\\ No newline" line must end in CRLF to
+				strip CRs -- see comment in diffhelpers.py */
+				if (len > 2 && s[len - 2] != '\r') {
+					canstripcr = false;
+				}
+
+				_fix_newline(hunk, a, b, canstripcr);
 				continue;
 			}
 			if (c == '\n') {
diff --git a/mercurial/pure/diffhelpers.py b/mercurial/pure/diffhelpers.py
--- a/mercurial/pure/diffhelpers.py
+++ b/mercurial/pure/diffhelpers.py
@@ -34,10 +34,18 @@
                 a.append(s)
     return 0
 
-def fix_newline(hunk, a, b):
+# fix_newline removes the newline from the last line of a patch that, while
+# currently ends in a newline, did not originally as indicated by a subsequent
+# "\ No newline at end of file" line.
+#
+# canstripcr should be true only if the "\ No newline" line itself ends with a
+# CRLF. If so, a CRLF in the last line of the patch can also be removed (in
+# addition to just a LF). If not, it is likely the CR was part of the original
+# patch and should remain.
+def fix_newline(hunk, a, b, canstripcr=True):
     l = hunk[-1]
     # tolerate CRLF in last line
-    if l.endswith('\r\n'):
+    if l.endswith('\r\n') and canstripcr:
         hline = l[:-2]
     else:
         hline = l[:-1]


More information about the Mercurial-devel mailing list