# [PATCH 5 of 7 v2] bdiff: give slight preference to longest matches in the middle of the B side

Tue Nov 15 15:57:06 EST 2016

```# HG changeset patch
# Date 1478626653 -3600
#      Tue Nov 08 18:37:33 2016 +0100
# Node ID 5ae7e8061a9671e8941a7a17e316254d228acf59
# Parent  5bb26f29b1509520ca3af4c540775cab50b4d6c0
bdiff: give slight preference to longest matches in the middle of the B side

We already have a slight preference for matches close to the middle on the A
side. Now, do the same on the B side.

j is iterating the b range backwards and we thus accept a new j if the previous
match was in the upper half.

This makes the test-bhalf diff "correct". It obviously also gives more
preference to balanced recursion than to appending to sequences. That is kind
of correct, but will also unfortunately make some bundles bigger. No doubt, we
can also create examples where it will make them smaller ...

The bundle size for 4.0 (hg bundle --base null -r 4.0 x.hg) happens to go from
22803824 to 22806817 bytes - an 0.01% increase.

diff --git a/mercurial/bdiff.c b/mercurial/bdiff.c
--- a/mercurial/bdiff.c
+++ b/mercurial/bdiff.c
@@ -143,7 +143,7 @@ static int longest_match(struct bdiff_li
struct pos *pos,
int a1, int a2, int b1, int b2, int *omi, int *omj)
{
-	int mi = a1, mj = b1, mk = 0, i, j, k, half;
+	int mi = a1, mj = b1, mk = 0, i, j, k, half, bhalf;

/* window our search on large regions to better bound
worst-case performance. by choosing a window at the end, we
@@ -152,6 +152,7 @@ static int longest_match(struct bdiff_li
a1 = a2 - 30000;

half = (a1 + a2 - 1) / 2;
+	bhalf = (b1 + b2 - 1) / 2;

for (i = a1; i < a2; i++) {
/* skip all lines in b after the current block */
@@ -187,8 +188,8 @@ static int longest_match(struct bdiff_li
/* same match but closer to half */
mi = i;
mj = j;
-				} else if (i == mi) {
-					/* same i but earlier j */
+				} else if (i == mi && mj > bhalf) {
+					/* same i but best earlier j */
mj = j;
}
}
diff --git a/tests/test-annotate.t b/tests/test-annotate.t
--- a/tests/test-annotate.t
+++ b/tests/test-annotate.t
@@ -91,9 +91,9 @@ annotate (JSON)
annotate -n b

\$ hg annotate -n b
+  1: a
0: a
1: a
-  1: a
3: b4
3: b5
3: b6
@@ -111,8 +111,8 @@ annotate --no-follow b
annotate -nl b

\$ hg annotate -nl b
+  1:1: a
0:1: a
-  1:2: a
1:3: a
3:4: b4
3:5: b5
@@ -121,9 +121,9 @@ annotate -nl b
annotate -nf b

\$ hg annotate -nf b
+  1 a: a
0 a: a
1 a: a
-  1 a: a
3 b: b4
3 b: b5
3 b: b6
@@ -131,8 +131,8 @@ annotate -nf b
annotate -nlf b

\$ hg annotate -nlf b
+  1 a:1: a
0 a:1: a
-  1 a:2: a
1 a:3: a
3 b:4: b4
3 b:5: b5
@@ -156,9 +156,9 @@ annotate -nlf b
annotate after merge

\$ hg annotate -nf b
+  1 a: a
0 a: a
1 a: a
-  1 a: a
3 b: b4
4 b: c
3 b: b5
@@ -166,8 +166,8 @@ annotate after merge
annotate after merge with -l

\$ hg annotate -nlf b
+  1 a:1: a
0 a:1: a
-  1 a:2: a
1 a:3: a
3 b:4: b4
4 b:5: c
@@ -198,7 +198,7 @@ annotate after merge with -l
annotate after rename merge

\$ hg annotate -nf b
-  0 a: a
+  1 a: a
6 b: z
1 a: a
3 b: b4
@@ -209,7 +209,7 @@ annotate after rename merge
annotate after rename merge with -l

\$ hg annotate -nlf b
-  0 a:1: a
+  1 a:1: a
6 b:2: z
1 a:3: a
3 b:4: b4
@@ -226,7 +226,7 @@ Issue2807: alignment of line numbers wit
\$ echo more >> b
\$ hg ci -mmore -d '7 0'
\$ hg annotate -nlf b
-   0 a: 1: a
+   1 a: 1: a
6 b: 2: z
1 a: 3: a
3 b: 4: b4
@@ -240,15 +240,15 @@ Issue2807: alignment of line numbers wit

\$ hg annotate -r tip -n a
+  1: a
0: a
1: a
-  1: a

\$ hg annotate -r tip -nl a
+  1:1: a
0:1: a
-  1:2: a
1:3: a

Issue589: "undelete" sequence leads to crash
diff --git a/tests/test-bdiff.py b/tests/test-bdiff.py
--- a/tests/test-bdiff.py
+++ b/tests/test-bdiff.py
@@ -79,14 +79,14 @@ testfixws("", "", 0)

print("done")

-print("Odd diff for a trivial change:")
+print("Nice diff for a trivial change:")
showdiff(
''.join('<%s\n-\n' % i for i in range(5)),
''.join('>%s\n-\n' % i for i in range(5)))

-print("Diff 1 to 3 lines - preference for adding / removing at the end of sequences:")
+print("Diff 1 to 3 lines - preference for balanced recursion:")
showdiff('a\n', 'a\n' * 3)
-print("Diff 1 to 5 lines - preference for adding / removing at the end of sequences:")
+print("Diff 1 to 5 lines - preference for balanced recursion:")
showdiff('a\n', 'a\n' * 5)
print("Diff 3 to 1 lines - preference for balanced recursion:")
showdiff('a\n' * 3, 'a\n')
diff --git a/tests/test-bdiff.py.out b/tests/test-bdiff.py.out
--- a/tests/test-bdiff.py.out
+++ b/tests/test-bdiff.py.out
@@ -42,31 +42,34 @@ showdiff(
'f\n'
done
done
-Odd diff for a trivial change:
+Nice diff for a trivial change:
showdiff(
'<0\n-\n<1\n-\n<2\n-\n<3\n-\n<4\n-\n',
'>0\n-\n>1\n-\n>2\n-\n>3\n-\n>4\n-\n'):
- 0 8 '<0\n-\n<1\n' -> '>0\n'
- '-\n'
- 10 13 '<2\n' -> '>1\n'
+ 0 3 '<0\n' -> '>0\n'
'-\n'
- 15 18 '<3\n' -> '>2\n'
+ 5 8 '<1\n' -> '>1\n'
'-\n'
- 20 23 '<4\n' -> '>3\n'
+ 10 13 '<2\n' -> '>2\n'
'-\n'
- 25 25 '' -> '>4\n-\n'
-Diff 1 to 3 lines - preference for adding / removing at the end of sequences:
+ 15 18 '<3\n' -> '>3\n'
+ '-\n'
+ 20 23 '<4\n' -> '>4\n'
+ '-\n'
+Diff 1 to 3 lines - preference for balanced recursion:
showdiff(
'a\n',
'a\na\na\n'):
+ 0 0 '' -> 'a\n'
'a\n'
- 2 2 '' -> 'a\na\n'
-Diff 1 to 5 lines - preference for adding / removing at the end of sequences:
+ 2 2 '' -> 'a\n'
+Diff 1 to 5 lines - preference for balanced recursion:
showdiff(
'a\n',
'a\na\na\na\na\n'):
+ 0 0 '' -> 'a\na\n'
'a\n'
- 2 2 '' -> 'a\na\na\na\n'
+ 2 2 '' -> 'a\na\n'
Diff 3 to 1 lines - preference for balanced recursion:
showdiff(
'a\na\na\n',
diff --git a/tests/test-commit-amend.t b/tests/test-commit-amend.t
--- a/tests/test-commit-amend.t
+++ b/tests/test-commit-amend.t
@@ -47,9 +47,9 @@ Amending changeset with changes in worki
--- a/a	Thu Jan 01 00:00:00 1970 +0000
+++ b/a	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +1,3 @@
+  +a
a
+a
-  +a
\$ hg log
changeset:   1:43f1ba15f28a
tag:         tip
@@ -122,13 +122,13 @@ No changes, just a different message:
uncompressed size of bundle content:
254 (changelog)
163 (manifests)
-       129  a
+       141  a
saved backup bundle to \$TESTTMP/.hg/strip-backup/74609c7f506e-1bfde511-amend-backup.hg (glob)
1 changesets found
uncompressed size of bundle content:
250 (changelog)
163 (manifests)
-       129  a
+       141  a
@@ -140,9 +140,9 @@ No changes, just a different message:
--- a/a	Thu Jan 01 00:00:00 1970 +0000
+++ b/a	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +1,3 @@
+  +a
a
+a
-  +a
\$ hg log
changeset:   1:1cd866679df8
tag:         tip
@@ -266,13 +266,13 @@ then, test editing custom commit message
uncompressed size of bundle content:
249 (changelog)
163 (manifests)
-       131  a
+       143  a
saved backup bundle to \$TESTTMP/.hg/strip-backup/5f357c7560ab-e7c84ade-amend-backup.hg (glob)
1 changesets found
uncompressed size of bundle content:
257 (changelog)
163 (manifests)
-       131  a
+       143  a
@@ -309,13 +309,13 @@ Same, but with changes in working dir (d
uncompressed size of bundle content:
464 (changelog)
322 (manifests)
-       249  a
+       261  a
saved backup bundle to \$TESTTMP/.hg/strip-backup/7ab3bf440b54-8e3b5088-amend-backup.hg (glob)
1 changesets found
uncompressed size of bundle content:
257 (changelog)
163 (manifests)
-       133  a
+       145  a
diff --git a/tests/test-mq-qfold.t b/tests/test-mq-qfold.t
--- a/tests/test-mq-qfold.t
+++ b/tests/test-mq-qfold.t
@@ -51,8 +51,8 @@ specified)
--- a/a
+++ b/a
@@ -1,1 +1,3 @@
+  +a
a
-  +a
+b

Fold with local changes:
@@ -67,8 +67,8 @@ Fold with local changes:
--- a/a
+++ b/a
@@ -1,1 +1,3 @@
+  +a
a
-  +a
+b

\$ hg revert -a --no-backup
```