[PATCH 4 of 5 rfc] bdiff: give preference to longest matches in the middle of b side

Mads Kiilerich mads at kiilerich.com
Thu Nov 3 17:34:14 EDT 2016


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1478208837 -3600
#      Thu Nov 03 22:33:57 2016 +0100
# Node ID 3e0216b2a0995cb21946bc13fb21391013332c57
# Parent  a3e3c7075c3c4b92e6b8c27e28bef7b2c061008d
bdiff: give preference to longest matches in the middle of b side

j is iterating the b range backwards and we thus use a flag to stop searching
for more-in-the-middle matches when the first hit at or before the middle has
been seen.

This makes the test-bhalf diff "correct". It obviously also gives more
preference to balanced recursion than to appending to sequences. That is kind
of correct, but will also unfortunately make some bundles bigger. No doubt, we
can also create examples where it will make them smaller ...

diff --git a/mercurial/bdiff.c b/mercurial/bdiff.c
--- a/mercurial/bdiff.c
+++ b/mercurial/bdiff.c
@@ -138,7 +138,7 @@ static int longest_match(struct bdiff_li
 			struct pos *pos,
 			 int a1, int a2, int b1, int b2, int *omi, int *omj)
 {
-	int mi = a1, mj = b1, mk = 0, i, j, k, half;
+	int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf, findbetterb;
 
 	/* window our search on large regions to better bound
 	   worst-case performance. by choosing a window at the end, we
@@ -147,6 +147,7 @@ static int longest_match(struct bdiff_li
 		a1 = a2 - 30000;
 
 	half = (a1 + a2 - 1) / 2;
+	bhalf = (b1 + b2 - 1) / 2;
 
 	for (i = a1; i < a2; i++) {
 		/* skip all lines in b after the current block */
@@ -154,6 +155,7 @@ static int longest_match(struct bdiff_li
 			;
 
 		/* loop through all lines match a[i] in b */
+		findbetterb = 1;
 		for (; j >= b1; j = b[j].n) {
 			/* does this extend an earlier match? */
 			for (k = 1; j - k >= b1 && i - k >= a1; k++) {
@@ -182,9 +184,11 @@ static int longest_match(struct bdiff_li
 					/* better i in first lower half */
 					mi = i;
 					mj = j;
-				} else if (i == mi) {
-					/* an earlier j is "better" */
+				} else if (i == mi && findbetterb) {
+					/* better j in first upper half */
 					mj = j;
+					if (j <= bhalf)
+						findbetterb = 0;
 				}
 			}
 		}
diff --git a/tests/test-annotate.t b/tests/test-annotate.t
--- a/tests/test-annotate.t
+++ b/tests/test-annotate.t
@@ -91,9 +91,9 @@ annotate (JSON)
 annotate -n b
 
   $ hg annotate -n b
+  1: a
   0: a
   1: a
-  1: a
   3: b4
   3: b5
   3: b6
@@ -111,8 +111,8 @@ annotate --no-follow b
 annotate -nl b
 
   $ hg annotate -nl b
+  1:1: a
   0:1: a
-  1:2: a
   1:3: a
   3:4: b4
   3:5: b5
@@ -121,9 +121,9 @@ annotate -nl b
 annotate -nf b
 
   $ hg annotate -nf b
+  1 a: a
   0 a: a
   1 a: a
-  1 a: a
   3 b: b4
   3 b: b5
   3 b: b6
@@ -131,8 +131,8 @@ annotate -nf b
 annotate -nlf b
 
   $ hg annotate -nlf b
+  1 a:1: a
   0 a:1: a
-  1 a:2: a
   1 a:3: a
   3 b:4: b4
   3 b:5: b5
@@ -156,9 +156,9 @@ annotate -nlf b
 annotate after merge
 
   $ hg annotate -nf b
+  1 a: a
   0 a: a
   1 a: a
-  1 a: a
   3 b: b4
   4 b: c
   3 b: b5
@@ -166,8 +166,8 @@ annotate after merge
 annotate after merge with -l
 
   $ hg annotate -nlf b
+  1 a:1: a
   0 a:1: a
-  1 a:2: a
   1 a:3: a
   3 b:4: b4
   4 b:5: c
@@ -198,7 +198,7 @@ annotate after merge with -l
 annotate after rename merge
 
   $ hg annotate -nf b
-  0 a: a
+  1 a: a
   6 b: z
   1 a: a
   3 b: b4
@@ -209,7 +209,7 @@ annotate after rename merge
 annotate after rename merge with -l
 
   $ hg annotate -nlf b
-  0 a:1: a
+  1 a:1: a
   6 b:2: z
   1 a:3: a
   3 b:4: b4
@@ -226,7 +226,7 @@ Issue2807: alignment of line numbers wit
   $ echo more >> b
   $ hg ci -mmore -d '7 0'
   $ hg annotate -nlf b
-   0 a: 1: a
+   1 a: 1: a
    6 b: 2: z
    1 a: 3: a
    3 b: 4: b4
@@ -240,15 +240,15 @@ Issue2807: alignment of line numbers wit
 linkrev vs rev
 
   $ hg annotate -r tip -n a
+  1: a
   0: a
   1: a
-  1: a
 
 linkrev vs rev with -l
 
   $ hg annotate -r tip -nl a
+  1:1: a
   0:1: a
-  1:2: a
   1:3: a
 
 Issue589: "undelete" sequence leads to crash
diff --git a/tests/test-bhalf.t b/tests/test-bhalf.t
--- a/tests/test-bhalf.t
+++ b/tests/test-bhalf.t
@@ -14,58 +14,48 @@ Diff of boring files:
   +++ b/this-is-the-filename
   @@ -1,31 +1,31 @@
   -once upon a time 1
-  -The quick brown fox jumps over the lazy dog
-  -once upon a time 2
-  -The quick brown fox jumps over the lazy dog
-  -once upon a time 3
-  -The quick brown fox jumps over the lazy dog
-  -once upon a time 4
-  -The quick brown fox jumps over the lazy dog
-  -once upon a time 5
-  -The quick brown fox jumps over the lazy dog
-  -once upon a time 6
-  -The quick brown fox jumps over the lazy dog
-  -once upon a time 7
   +twice upon a time 1
    The quick brown fox jumps over the lazy dog
+  -once upon a time 2
+  +twice upon a time 2
+   The quick brown fox jumps over the lazy dog
+  -once upon a time 3
+  +twice upon a time 3
+   The quick brown fox jumps over the lazy dog
+  -once upon a time 4
+  +twice upon a time 4
+   The quick brown fox jumps over the lazy dog
+  -once upon a time 5
+  +twice upon a time 5
+   The quick brown fox jumps over the lazy dog
+  -once upon a time 6
+  +twice upon a time 6
+   The quick brown fox jumps over the lazy dog
+  -once upon a time 7
+  +twice upon a time 7
+   The quick brown fox jumps over the lazy dog
   -once upon a time 8
-  -The quick brown fox jumps over the lazy dog
+  +twice upon a time 8
+   The quick brown fox jumps over the lazy dog
   -once upon a time 9
-  -The quick brown fox jumps over the lazy dog
+  +twice upon a time 9
+   The quick brown fox jumps over the lazy dog
   -once upon a time 10
-  -The quick brown fox jumps over the lazy dog
+  +twice upon a time 10
+   The quick brown fox jumps over the lazy dog
   -once upon a time 11
-  +twice upon a time 2
+  +twice upon a time 11
    The quick brown fox jumps over the lazy dog
   -once upon a time 12
-  -The quick brown fox jumps over the lazy dog
+  +twice upon a time 12
+   The quick brown fox jumps over the lazy dog
   -once upon a time 13
-  +twice upon a time 3
+  +twice upon a time 13
    The quick brown fox jumps over the lazy dog
   -once upon a time 14
-  +twice upon a time 4
+  +twice upon a time 14
    The quick brown fox jumps over the lazy dog
   -once upon a time 15
-  +twice upon a time 5
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 6
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 7
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 8
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 9
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 10
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 11
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 12
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 13
-  +The quick brown fox jumps over the lazy dog
-  +twice upon a time 14
-  +The quick brown fox jumps over the lazy dog
   +twice upon a time 15
    The quick brown fox jumps over the lazy dog
    
@@ -77,7 +67,7 @@ That's an odd diff for a trivial change!
   $ cd ..
 
   $ f --size bundle.hg
-  bundle.hg: size=870
+  bundle.hg: size=920
 
 Explore some bdiff implementation edge cases:
 
@@ -115,9 +105,9 @@ Explore some bdiff implementation edge c
   --- a/x
   +++ b/x
   @@ -1,1 +1,3 @@
+  +a
    a
   +a
-  +a
   diff --git a/y b/y
   --- a/y
   +++ b/y
diff --git a/tests/test-commit-amend.t b/tests/test-commit-amend.t
--- a/tests/test-commit-amend.t
+++ b/tests/test-commit-amend.t
@@ -47,9 +47,9 @@ Amending changeset with changes in worki
   --- a/a	Thu Jan 01 00:00:00 1970 +0000
   +++ b/a	Thu Jan 01 00:00:00 1970 +0000
   @@ -1,1 +1,3 @@
+  +a
    a
   +a
-  +a
   $ hg log
   changeset:   1:43f1ba15f28a
   tag:         tip
@@ -122,13 +122,13 @@ No changes, just a different message:
   uncompressed size of bundle content:
        254 (changelog)
        163 (manifests)
-       129  a
+       141  a
   saved backup bundle to $TESTTMP/.hg/strip-backup/74609c7f506e-1bfde511-amend-backup.hg (glob)
   1 changesets found
   uncompressed size of bundle content:
        250 (changelog)
        163 (manifests)
-       129  a
+       141  a
   adding branch
   adding changesets
   adding manifests
@@ -140,9 +140,9 @@ No changes, just a different message:
   --- a/a	Thu Jan 01 00:00:00 1970 +0000
   +++ b/a	Thu Jan 01 00:00:00 1970 +0000
   @@ -1,1 +1,3 @@
+  +a
    a
   +a
-  +a
   $ hg log
   changeset:   1:1cd866679df8
   tag:         tip
@@ -266,13 +266,13 @@ then, test editing custom commit message
   uncompressed size of bundle content:
        249 (changelog)
        163 (manifests)
-       131  a
+       143  a
   saved backup bundle to $TESTTMP/.hg/strip-backup/5f357c7560ab-e7c84ade-amend-backup.hg (glob)
   1 changesets found
   uncompressed size of bundle content:
        257 (changelog)
        163 (manifests)
-       131  a
+       143  a
   adding branch
   adding changesets
   adding manifests
@@ -309,13 +309,13 @@ Same, but with changes in working dir (d
   uncompressed size of bundle content:
        464 (changelog)
        322 (manifests)
-       249  a
+       261  a
   saved backup bundle to $TESTTMP/.hg/strip-backup/7ab3bf440b54-8e3b5088-amend-backup.hg (glob)
   1 changesets found
   uncompressed size of bundle content:
        257 (changelog)
        163 (manifests)
-       133  a
+       145  a
   adding branch
   adding changesets
   adding manifests


More information about the Mercurial-devel mailing list