D2683: xdiff: remove whitespace related feature

quark (Jun Wu) phabricator at mercurial-scm.org
Mon Mar 5 00:50:20 UTC 2018


quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  In Mercurial, whitespace related handling are done at a higher level than
  the low-level diff algorithm so "ignore spaces". So it's not used by mdiff.
  Some of the upcoming optimizations would be more difficult with whitespace
  related features kept. So let's remove them.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2683

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xutils.c
  mercurial/thirdparty/xdiff/xutils.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xutils.h b/mercurial/thirdparty/xdiff/xutils.h
--- a/mercurial/thirdparty/xdiff/xutils.h
+++ b/mercurial/thirdparty/xdiff/xutils.h
@@ -32,7 +32,6 @@
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
-int xdl_blankline(const char *line, long size, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
 unsigned long xdl_hash_record(char const **data, char const *top, long flags);
 unsigned int xdl_hashbits(unsigned int size);
diff --git a/mercurial/thirdparty/xdiff/xutils.c b/mercurial/thirdparty/xdiff/xutils.c
--- a/mercurial/thirdparty/xdiff/xutils.c
+++ b/mercurial/thirdparty/xdiff/xutils.c
@@ -143,168 +143,17 @@
 	return nl + 1;
 }
 
-int xdl_blankline(const char *line, long size, long flags)
-{
-	long i;
-
-	if (!(flags & XDF_WHITESPACE_FLAGS))
-		return (size <= 1);
-
-	for (i = 0; i < size && XDL_ISSPACE(line[i]); i++)
-		;
-
-	return (i == size);
-}
-
-/*
- * Have we eaten everything on the line, except for an optional
- * CR at the very end?
- */
-static int ends_with_optional_cr(const char *l, long s, long i)
-{
-	int complete = s && l[s-1] == '\n';
-
-	if (complete)
-		s--;
-	if (s == i)
-		return 1;
-	/* do not ignore CR at the end of an incomplete line */
-	if (complete && s == i + 1 && l[i] == '\r')
-		return 1;
-	return 0;
-}
-
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 {
-	int i1, i2;
-
 	if (s1 == s2 && !memcmp(l1, l2, s1))
 		return 1;
-	if (!(flags & XDF_WHITESPACE_FLAGS))
-		return 0;
-
-	i1 = 0;
-	i2 = 0;
-
-	/*
-	 * -w matches everything that matches with -b, and -b in turn
-	 * matches everything that matches with --ignore-space-at-eol,
-	 * which in turn matches everything that matches with --ignore-cr-at-eol.
-	 *
-	 * Each flavor of ignoring needs different logic to skip whitespaces
-	 * while we have both sides to compare.
-	 */
-	if (flags & XDF_IGNORE_WHITESPACE) {
-		goto skip_ws;
-		while (i1 < s1 && i2 < s2) {
-			if (l1[i1++] != l2[i2++])
-				return 0;
-		skip_ws:
-			while (i1 < s1 && XDL_ISSPACE(l1[i1]))
-				i1++;
-			while (i2 < s2 && XDL_ISSPACE(l2[i2]))
-				i2++;
-		}
-	} else if (flags & XDF_IGNORE_WHITESPACE_CHANGE) {
-		while (i1 < s1 && i2 < s2) {
-			if (XDL_ISSPACE(l1[i1]) && XDL_ISSPACE(l2[i2])) {
-				/* Skip matching spaces and try again */
-				while (i1 < s1 && XDL_ISSPACE(l1[i1]))
-					i1++;
-				while (i2 < s2 && XDL_ISSPACE(l2[i2]))
-					i2++;
-				continue;
-			}
-			if (l1[i1++] != l2[i2++])
-				return 0;
-		}
-	} else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL) {
-		while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
-			i1++;
-			i2++;
-		}
-	} else if (flags & XDF_IGNORE_CR_AT_EOL) {
-		/* Find the first difference and see how the line ends */
-		while (i1 < s1 && i2 < s2 && l1[i1] == l2[i2]) {
-			i1++;
-			i2++;
-		}
-		return (ends_with_optional_cr(l1, s1, i1) &&
-			ends_with_optional_cr(l2, s2, i2));
-	}
-
-	/*
-	 * After running out of one side, the remaining side must have
-	 * nothing but whitespace for the lines to match.  Note that
-	 * ignore-whitespace-at-eol case may break out of the loop
-	 * while there still are characters remaining on both lines.
-	 */
-	if (i1 < s1) {
-		while (i1 < s1 && XDL_ISSPACE(l1[i1]))
-			i1++;
-		if (s1 != i1)
-			return 0;
-	}
-	if (i2 < s2) {
-		while (i2 < s2 && XDL_ISSPACE(l2[i2]))
-			i2++;
-		return (s2 == i2);
-	}
-	return 1;
-}
-
-static unsigned long xdl_hash_record_with_whitespace(char const **data,
-		char const *top, long flags) {
-	unsigned long ha = 5381;
-	char const *ptr = *data;
-	int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL;
-
-	for (; ptr < top && *ptr != '\n'; ptr++) {
-		if (cr_at_eol_only) {
-			/* do not ignore CR at the end of an incomplete line */
-			if (*ptr == '\r' &&
-			    (ptr + 1 < top && ptr[1] == '\n'))
-				continue;
-		}
-		else if (XDL_ISSPACE(*ptr)) {
-			const char *ptr2 = ptr;
-			int at_eol;
-			while (ptr + 1 < top && XDL_ISSPACE(ptr[1])
-					&& ptr[1] != '\n')
-				ptr++;
-			at_eol = (top <= ptr + 1 || ptr[1] == '\n');
-			if (flags & XDF_IGNORE_WHITESPACE)
-				; /* already handled */
-			else if (flags & XDF_IGNORE_WHITESPACE_CHANGE
-				 && !at_eol) {
-				ha += (ha << 5);
-				ha ^= (unsigned long) ' ';
-			}
-			else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL
-				 && !at_eol) {
-				while (ptr2 != ptr + 1) {
-					ha += (ha << 5);
-					ha ^= (unsigned long) *ptr2;
-					ptr2++;
-				}
-			}
-			continue;
-		}
-		ha += (ha << 5);
-		ha ^= (unsigned long) *ptr;
-	}
-	*data = ptr < top ? ptr + 1: ptr;
-
-	return ha;
+	return 0;
 }
 
 unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
 	unsigned long ha = 5381;
 	char const *ptr = *data;
 
-	if (flags & XDF_WHITESPACE_FLAGS)
-		return xdl_hash_record_with_whitespace(data, top, flags);
-
 	for (; ptr < top && *ptr != '\n'; ptr++) {
 		ha += (ha << 5);
 		ha ^= (unsigned long) *ptr;
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -1037,27 +1037,6 @@
 	return 0;
 }
 
-static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
-{
-	xdchange_t *xch;
-
-	for (xch = xscr; xch; xch = xch->next) {
-		int ignore = 1;
-		xrecord_t **rec;
-		long i;
-
-		rec = &xe->xdf1.recs[xch->i1];
-		for (i = 0; i < xch->chg1 && ignore; i++)
-			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
-
-		rec = &xe->xdf2.recs[xch->i2];
-		for (i = 0; i < xch->chg2 && ignore; i++)
-			ignore = xdl_blankline(rec[i]->ptr, rec[i]->size, flags);
-
-		xch->ignore = ignore;
-	}
-}
-
 int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
 	     xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
 	xdchange_t *xscr;
@@ -1076,8 +1055,6 @@
 		return -1;
 	}
 
-	if (xpp->flags & XDF_IGNORE_BLANK_LINES)
-		xdl_mark_ignorable(xscr, &xe, xpp->flags);
 	if (ef(&xe, xscr, ecb, xecfg) < 0) {
 		xdl_free_script(xscr);
 		xdl_free_env(&xe);
diff --git a/mercurial/thirdparty/xdiff/xdiff.h b/mercurial/thirdparty/xdiff/xdiff.h
--- a/mercurial/thirdparty/xdiff/xdiff.h
+++ b/mercurial/thirdparty/xdiff/xdiff.h
@@ -32,17 +32,6 @@
 /* xpparm_t.flags */
 #define XDF_NEED_MINIMAL (1 << 0)
 
-#define XDF_IGNORE_WHITESPACE (1 << 1)
-#define XDF_IGNORE_WHITESPACE_CHANGE (1 << 2)
-#define XDF_IGNORE_WHITESPACE_AT_EOL (1 << 3)
-#define XDF_IGNORE_CR_AT_EOL (1 << 4)
-#define XDF_WHITESPACE_FLAGS (XDF_IGNORE_WHITESPACE | \
-			      XDF_IGNORE_WHITESPACE_CHANGE | \
-			      XDF_IGNORE_WHITESPACE_AT_EOL | \
-			      XDF_IGNORE_CR_AT_EOL)
-
-#define XDF_IGNORE_BLANK_LINES (1 << 7)
-
 #define XDF_INDENT_HEURISTIC (1 << 23)
 
 /* xdemitconf_t.flags */



To: quark, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list