[PATCH 5 of 7 v2] bdiff: give slight preference to longest matches in the middle of the B side

Mads Kiilerich mads at kiilerich.com
Tue Nov 15 15:57:06 EST 2016


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1478626653 -3600
#      Tue Nov 08 18:37:33 2016 +0100
# Node ID 5ae7e8061a9671e8941a7a17e316254d228acf59
# Parent  5bb26f29b1509520ca3af4c540775cab50b4d6c0
bdiff: give slight preference to longest matches in the middle of the B side

We already have a slight preference for matches close to the middle on the A
side. Now, do the same on the B side.

j is iterating the b range backwards and we thus accept a new j if the previous
match was in the upper half.

This makes the test-bhalf diff "correct". It obviously also gives more
preference to balanced recursion than to appending to sequences. That is kind
of correct, but will also unfortunately make some bundles bigger. No doubt, we
can also create examples where it will make them smaller ...

The bundle size for 4.0 (hg bundle --base null -r 4.0 x.hg) happens to go from
22803824 to 22806817 bytes - an 0.01% increase.

diff --git a/mercurial/bdiff.c b/mercurial/bdiff.c
--- a/mercurial/bdiff.c
+++ b/mercurial/bdiff.c
@@ -143,7 +143,7 @@ static int longest_match(struct bdiff_li
 			struct pos *pos,
 			 int a1, int a2, int b1, int b2, int *omi, int *omj)
 {
-	int mi = a1, mj = b1, mk = 0, i, j, k, half;
+	int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf;
 
 	/* window our search on large regions to better bound
 	   worst-case performance. by choosing a window at the end, we
@@ -152,6 +152,7 @@ static int longest_match(struct bdiff_li
 		a1 = a2 - 30000;
 
 	half = (a1 + a2 - 1) / 2;
+	bhalf = (b1 + b2 - 1) / 2;
 
 	for (i = a1; i < a2; i++) {
 		/* skip all lines in b after the current block */
@@ -187,8 +188,8 @@ static int longest_match(struct bdiff_li
 					/* same match but closer to half */
 					mi = i;
 					mj = j;
-				} else if (i == mi) {
-					/* same i but earlier j */
+				} else if (i == mi && mj > bhalf) {
+					/* same i but best earlier j */
 					mj = j;
 				}
 			}
diff --git a/tests/test-annotate.t b/tests/test-annotate.t
--- a/tests/test-annotate.t
+++ b/tests/test-annotate.t
@@ -91,9 +91,9 @@ annotate (JSON)
 annotate -n b
 
   $ hg annotate -n b
+  1: a
   0: a
   1: a
-  1: a
   3: b4
   3: b5
   3: b6
@@ -111,8 +111,8 @@ annotate --no-follow b
 annotate -nl b
 
   $ hg annotate -nl b
+  1:1: a
   0:1: a
-  1:2: a
   1:3: a
   3:4: b4
   3:5: b5
@@ -121,9 +121,9 @@ annotate -nl b
 annotate -nf b
 
   $ hg annotate -nf b
+  1 a: a
   0 a: a
   1 a: a
-  1 a: a
   3 b: b4
   3 b: b5
   3 b: b6
@@ -131,8 +131,8 @@ annotate -nf b
 annotate -nlf b
 
   $ hg annotate -nlf b
+  1 a:1: a
   0 a:1: a
-  1 a:2: a
   1 a:3: a
   3 b:4: b4
   3 b:5: b5
@@ -156,9 +156,9 @@ annotate -nlf b
 annotate after merge
 
   $ hg annotate -nf b
+  1 a: a
   0 a: a
   1 a: a
-  1 a: a
   3 b: b4
   4 b: c
   3 b: b5
@@ -166,8 +166,8 @@ annotate after merge
 annotate after merge with -l
 
   $ hg annotate -nlf b
+  1 a:1: a
   0 a:1: a
-  1 a:2: a
   1 a:3: a
   3 b:4: b4
   4 b:5: c
@@ -198,7 +198,7 @@ annotate after merge with -l
 annotate after rename merge
 
   $ hg annotate -nf b
-  0 a: a
+  1 a: a
   6 b: z
   1 a: a
   3 b: b4
@@ -209,7 +209,7 @@ annotate after rename merge
 annotate after rename merge with -l
 
   $ hg annotate -nlf b
-  0 a:1: a
+  1 a:1: a
   6 b:2: z
   1 a:3: a
   3 b:4: b4
@@ -226,7 +226,7 @@ Issue2807: alignment of line numbers wit
   $ echo more >> b
   $ hg ci -mmore -d '7 0'
   $ hg annotate -nlf b
-   0 a: 1: a
+   1 a: 1: a
    6 b: 2: z
    1 a: 3: a
    3 b: 4: b4
@@ -240,15 +240,15 @@ Issue2807: alignment of line numbers wit
 linkrev vs rev
 
   $ hg annotate -r tip -n a
+  1: a
   0: a
   1: a
-  1: a
 
 linkrev vs rev with -l
 
   $ hg annotate -r tip -nl a
+  1:1: a
   0:1: a
-  1:2: a
   1:3: a
 
 Issue589: "undelete" sequence leads to crash
diff --git a/tests/test-bdiff.py b/tests/test-bdiff.py
--- a/tests/test-bdiff.py
+++ b/tests/test-bdiff.py
@@ -79,14 +79,14 @@ testfixws("", "", 0)
 
 print("done")
 
-print("Odd diff for a trivial change:")
+print("Nice diff for a trivial change:")
 showdiff(
     ''.join('<%s\n-\n' % i for i in range(5)),
     ''.join('>%s\n-\n' % i for i in range(5)))
 
-print("Diff 1 to 3 lines - preference for adding / removing at the end of sequences:")
+print("Diff 1 to 3 lines - preference for balanced recursion:")
 showdiff('a\n', 'a\n' * 3)
-print("Diff 1 to 5 lines - preference for adding / removing at the end of sequences:")
+print("Diff 1 to 5 lines - preference for balanced recursion:")
 showdiff('a\n', 'a\n' * 5)
 print("Diff 3 to 1 lines - preference for balanced recursion:")
 showdiff('a\n' * 3, 'a\n')
diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out
--- a/tests/test-bdiff.py.out
+++ b/tests/test-bdiff.py.out
@@ -42,31 +42,34 @@ showdiff(
  'f\n'
 done
 done
-Odd diff for a trivial change:
+Nice diff for a trivial change:
 showdiff(
   '<0\n-\n<1\n-\n<2\n-\n<3\n-\n<4\n-\n',
   '>0\n-\n>1\n-\n>2\n-\n>3\n-\n>4\n-\n'):
- 0 8 '<0\n-\n<1\n' -> '>0\n'
- '-\n'
- 10 13 '<2\n' -> '>1\n'
+ 0 3 '<0\n' -> '>0\n'
  '-\n'
- 15 18 '<3\n' -> '>2\n'
+ 5 8 '<1\n' -> '>1\n'
  '-\n'
- 20 23 '<4\n' -> '>3\n'
+ 10 13 '<2\n' -> '>2\n'
  '-\n'
- 25 25 '' -> '>4\n-\n'
-Diff 1 to 3 lines - preference for adding / removing at the end of sequences:
+ 15 18 '<3\n' -> '>3\n'
+ '-\n'
+ 20 23 '<4\n' -> '>4\n'
+ '-\n'
+Diff 1 to 3 lines - preference for balanced recursion:
 showdiff(
   'a\n',
   'a\na\na\n'):
+ 0 0 '' -> 'a\n'
  'a\n'
- 2 2 '' -> 'a\na\n'
-Diff 1 to 5 lines - preference for adding / removing at the end of sequences:
+ 2 2 '' -> 'a\n'
+Diff 1 to 5 lines - preference for balanced recursion:
 showdiff(
   'a\n',
   'a\na\na\na\na\n'):
+ 0 0 '' -> 'a\na\n'
  'a\n'
- 2 2 '' -> 'a\na\na\na\n'
+ 2 2 '' -> 'a\na\n'
 Diff 3 to 1 lines - preference for balanced recursion:
 showdiff(
   'a\na\na\n',
diff --git a/tests/test-commit-amend.t b/tests/test-commit-amend.t
--- a/tests/test-commit-amend.t
+++ b/tests/test-commit-amend.t
@@ -47,9 +47,9 @@ Amending changeset with changes in worki
   --- a/a	Thu Jan 01 00:00:00 1970 +0000
   +++ b/a	Thu Jan 01 00:00:00 1970 +0000
   @@ -1,1 +1,3 @@
+  +a
    a
   +a
-  +a
   $ hg log
   changeset:   1:43f1ba15f28a
   tag:         tip
@@ -122,13 +122,13 @@ No changes, just a different message:
   uncompressed size of bundle content:
        254 (changelog)
        163 (manifests)
-       129  a
+       141  a
   saved backup bundle to $TESTTMP/.hg/strip-backup/74609c7f506e-1bfde511-amend-backup.hg (glob)
   1 changesets found
   uncompressed size of bundle content:
        250 (changelog)
        163 (manifests)
-       129  a
+       141  a
   adding branch
   adding changesets
   adding manifests
@@ -140,9 +140,9 @@ No changes, just a different message:
   --- a/a	Thu Jan 01 00:00:00 1970 +0000
   +++ b/a	Thu Jan 01 00:00:00 1970 +0000
   @@ -1,1 +1,3 @@
+  +a
    a
   +a
-  +a
   $ hg log
   changeset:   1:1cd866679df8
   tag:         tip
@@ -266,13 +266,13 @@ then, test editing custom commit message
   uncompressed size of bundle content:
        249 (changelog)
        163 (manifests)
-       131  a
+       143  a
   saved backup bundle to $TESTTMP/.hg/strip-backup/5f357c7560ab-e7c84ade-amend-backup.hg (glob)
   1 changesets found
   uncompressed size of bundle content:
        257 (changelog)
        163 (manifests)
-       131  a
+       143  a
   adding branch
   adding changesets
   adding manifests
@@ -309,13 +309,13 @@ Same, but with changes in working dir (d
   uncompressed size of bundle content:
        464 (changelog)
        322 (manifests)
-       249  a
+       261  a
   saved backup bundle to $TESTTMP/.hg/strip-backup/7ab3bf440b54-8e3b5088-amend-backup.hg (glob)
   1 changesets found
   uncompressed size of bundle content:
        257 (changelog)
        163 (manifests)
-       133  a
+       145  a
   adding branch
   adding changesets
   adding manifests
diff --git a/tests/test-mq-qfold.t b/tests/test-mq-qfold.t
--- a/tests/test-mq-qfold.t
+++ b/tests/test-mq-qfold.t
@@ -51,8 +51,8 @@ specified)
   --- a/a
   +++ b/a
   @@ -1,1 +1,3 @@
+  +a
    a
-  +a
   +b
 
 Fold with local changes:
@@ -67,8 +67,8 @@ Fold with local changes:
   --- a/a
   +++ b/a
   @@ -1,1 +1,3 @@
+  +a
    a
-  +a
   +b
 
   $ hg revert -a --no-backup


More information about the Mercurial-devel mailing list