D2686: xdiff: add a preprocessing step that trims files
quark (Jun Wu)
phabricator at mercurial-scm.org
Tue Mar 6 01:09:37 EST 2018
quark updated this revision to Diff 6664.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D2686?vs=6650&id=6664
REVISION DETAIL
https://phab.mercurial-scm.org/D2686
AFFECTED FILES
mercurial/thirdparty/xdiff/xdiffi.c
mercurial/thirdparty/xdiff/xemit.c
mercurial/thirdparty/xdiff/xprepare.c
mercurial/thirdparty/xdiff/xprepare.h
mercurial/thirdparty/xdiff/xtypes.h
CHANGE DETAILS
diff --git a/mercurial/thirdparty/xdiff/xtypes.h b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -60,6 +60,7 @@
typedef struct s_xdfenv {
xdfile_t xdf1, xdf2;
+ long prefix_lines, suffix_lines;
} xdfenv_t;
diff --git a/mercurial/thirdparty/xdiff/xprepare.h b/mercurial/thirdparty/xdiff/xprepare.h
--- a/mercurial/thirdparty/xdiff/xprepare.h
+++ b/mercurial/thirdparty/xdiff/xprepare.h
@@ -26,7 +26,7 @@
int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
- xdfenv_t *xe);
+ xdfenv_t *xe, xdemitconf_t const *xecfg);
void xdl_free_env(xdfenv_t *xe);
diff --git a/mercurial/thirdparty/xdiff/xprepare.c b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -61,6 +61,8 @@
static void xdl_free_ctx(xdfile_t *xdf);
static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
+static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
+ xdfenv_t* xe, mmfile_t *out_mf1, mmfile_t *out_mf2);
static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
@@ -156,6 +158,99 @@
}
+/*
+ * Trim common prefix from files.
+ * Note: trimming common prefix and suffix as-is will break hunk shifting, and
+ * context lines.
+ *
+ * a.py | common | b.py | common | diff
+ * | p | s | | p | s |
+ * -------------------------------------------
+ * try: | Y | try: | Y |
+ * 1 | Y | 1 | Y |
+ * except: | Y y | except: | Y |
+ * pass | Y y | pass | Y |
+ * try: | Y y | try: | Y | +
+ * 3 | Y | 2 | | +
+ * except: | Y | except: | y | +
+ * pass | Y | pass | y | +
+ * | | try: | y |
+ * | | 3 | Y |
+ * | | except: | Y |
+ * | | pass | Y |
+ *
+ * Since the diff output tends to shift hunks towards the end, common prefix is
+ * first calculated, and common suffix is calculated in a way that it cannot
+ * overlap with common prefix (ex. "y" in the above table is not considered as
+ * common suffix). Then remove "reserved" lines from both prefix and suffix to
+ * make shifting and context lines work.
+ */
+static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
+ xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
+ mmfile_t msmall, mlarge;
+ long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
+ const char *pp1, *pp2, *ps1, *ps2;
+
+ /* reserved must >= 0 for the line boundary adjustment to work */
+ if (reserved < 0)
+ reserved = 0;
+
+ if (mf1->size < mf2->size) {
+ memcpy(&msmall, mf1, sizeof(mmfile_t));
+ memcpy(&mlarge, mf2, sizeof(mmfile_t));
+ } else {
+ memcpy(&msmall, mf2, sizeof(mmfile_t));
+ memcpy(&mlarge, mf1, sizeof(mmfile_t));
+ }
+
+ pp1 = msmall.ptr, pp2 = mlarge.ptr;
+ for (i = 0; i < msmall.size && *pp1 == *pp2; ++i) {
+ plines += (*pp1 == '\n');
+ pp1++, pp2++;
+ }
+
+ ps1 = msmall.ptr + msmall.size - 1, ps2 = mlarge.ptr + mlarge.size - 1;
+ for (; ps1 > pp1 && *ps1 == *ps2; ++i) {
+ slines += (*ps1 == '\n');
+ ps1--, ps2--;
+ }
+
+ /* Retract common prefix and suffix boundaries for reserved lines */
+ if (plines <= reserved + 1) {
+ plines = 0;
+ } else {
+ for (i = 0; i <= reserved;) {
+ pp1--;
+ i += (*pp1 == '\n');
+ }
+ /* The new mmfile starts at the next char just after '\n' */
+ pbytes = pp1 - msmall.ptr + 1;
+ plines -= reserved;
+ }
+
+ if (slines <= reserved + 1) {
+ slines = 0;
+ } else {
+ for (i = 0; i <= reserved;) {
+ ps1++;
+ i += (*ps1 == '\n');
+ }
+ /* The new mmfile includes this '\n' */
+ sbytes = msmall.ptr + msmall.size - ps1 - 1;
+ slines -= reserved;
+ if (msmall.ptr[msmall.size - 1] == '\n')
+ slines -= 1;
+ }
+
+ xe->prefix_lines = plines;
+ xe->suffix_lines = slines;
+ out_mf1->ptr = mf1->ptr + pbytes;
+ out_mf1->size = mf1->size - pbytes - sbytes;
+ out_mf2->ptr = mf2->ptr + pbytes;
+ out_mf2->size = mf2->size - pbytes - sbytes;
+}
+
+
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
xdlclassifier_t *cf, xdfile_t *xdf) {
unsigned int hbits;
@@ -254,10 +349,14 @@
xdl_cha_free(&xdf->rcha);
}
+/* Minimal reserved lines during file trimming. This is to leave room for
+ * shifting */
+#define MIN_RESERVED_LINES 100
int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
- xdfenv_t *xe) {
- long enl1, enl2, sample;
+ xdfenv_t *xe, xdemitconf_t const *xecfg) {
+ mmfile_t tmf1, tmf2;
+ long enl1, enl2, sample, reserved_lines = MIN_RESERVED_LINES;
xdlclassifier_t cf;
memset(&cf, 0, sizeof(cf));
@@ -270,12 +369,16 @@
if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
return -1;
- if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
+ if (xecfg)
+ reserved_lines += xecfg->ctxlen;
+ xdl_trim_files(mf1, mf2, reserved_lines, xe, &tmf1, &tmf2);
+
+ if (xdl_prepare_ctx(1, &tmf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
xdl_free_classifier(&cf);
return -1;
}
- if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
+ if (xdl_prepare_ctx(2, &tmf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
diff --git a/mercurial/thirdparty/xdiff/xemit.c b/mercurial/thirdparty/xdiff/xemit.c
--- a/mercurial/thirdparty/xdiff/xemit.c
+++ b/mercurial/thirdparty/xdiff/xemit.c
@@ -169,6 +169,7 @@
long s1, s2, e1, e2, lctx;
xdchange_t *xch, *xche;
long funclineprev = -1;
+ long p = xe->prefix_lines;
struct func_line func_line = { 0 };
for (xch = xscr; xch; xch = xche->next) {
@@ -261,7 +262,7 @@
s1 - 1, funclineprev);
funclineprev = s1 - 1;
}
- if (xdl_emit_hunk_hdr(s1 + 1, e1 - s1, s2 + 1, e2 - s2,
+ if (xdl_emit_hunk_hdr(s1 + 1 + p, e1 - s1, s2 + 1 + p, e2 - s2,
func_line.buf, func_line.len, ecb) < 0)
return -1;
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -325,14 +325,14 @@
}
-int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
- xdfenv_t *xe) {
+int xdl_do_diff2(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfenv_t *xe, xdemitconf_t const *xecfg) {
long ndiags;
long *kvd, *kvdf, *kvdb;
xdalgoenv_t xenv;
diffdata_t dd1, dd2;
- if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
+ if (xdl_prepare_env(mf1, mf2, xpp, xe, xecfg) < 0) {
return -1;
}
@@ -381,6 +381,12 @@
}
+int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
+ xdfenv_t *xe) {
+ return xdl_do_diff2(mf1, mf2, xpp, xe, NULL);
+}
+
+
static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2) {
xdchange_t *xch;
@@ -1010,30 +1016,38 @@
static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
xdemitconf_t const *xecfg)
{
+ long p = xe->prefix_lines, s = xe->suffix_lines;
xdchange_t *xch, *xche;
if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {
long i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;
for (xch = xscr; xch; xch = xche->next) {
xche = xdl_get_hunk(&xch, xecfg);
if (!xch)
break;
+ if (xch != xche)
+ xdl_bug("xch != xche");
+ xch->i1 += p;
+ xch->i2 += p;
if (xch->i1 > i1 || xch->i2 > i2) {
- if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2, ecb->priv) < 0)
+ if (xecfg->hunk_func(i1, xch->i1, i2, xch->i2,
+ ecb->priv) < 0)
return -1;
}
- i1 = xche->i1 + xche->chg1;
- i2 = xche->i2 + xche->chg2;
+ i1 = xch->i1 + xch->chg1;
+ i2 = xch->i2 + xch->chg2;
}
- if (xecfg->hunk_func(i1, n1, i2, n2, ecb->priv) < 0)
+ if (xecfg->hunk_func(i1, n1 + p + s, i2, n2 + p + s,
+ ecb->priv) < 0)
return -1;
} else {
for (xch = xscr; xch; xch = xche->next) {
xche = xdl_get_hunk(&xch, xecfg);
if (!xch)
break;
- if (xecfg->hunk_func(
- xch->i1, xche->i1 + xche->chg1 - xch->i1,
- xch->i2, xche->i2 + xche->chg2 - xch->i2,
+ if (xecfg->hunk_func(xch->i1 + p,
+ xche->i1 + xche->chg1 - xch->i1,
+ xch->i2 + p,
+ xche->i2 + xche->chg2 - xch->i2,
ecb->priv) < 0)
return -1;
}
@@ -1068,7 +1082,7 @@
xdfenv_t xe;
emit_func_t ef = xecfg->hunk_func ? xdl_call_hunk_func : xdl_emit_diff;
- if (xdl_do_diff(mf1, mf2, xpp, &xe) < 0) {
+ if (xdl_do_diff2(mf1, mf2, xpp, &xe, xecfg) < 0) {
return -1;
}
To: quark, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list