[PATCH 3 of 6 STABLE V2] bdiff: fix latent normalization bug

Matt Mackall mpm at selenic.com
Mon Apr 25 18:10:19 EDT 2016


# HG changeset patch
# User Matt Mackall <mpm at selenic.com>
# Date 1461293598 18000
#      Thu Apr 21 21:53:18 2016 -0500
# Branch stable
# Node ID 7f6d156943543c0c5ce48a3bf7cb0770ae1f0aef
# Parent  e6294e0367777d1cfd5121db40559e89a7e71b68
bdiff: fix latent normalization bug

This bug is hidden by the current bias towards matches at the
beginning of the file. When this bias is tweaked later to address
recursion balancing, the normalization code could cause the next block
to shrink to a negative length, thus creating invalid delta chunks. We
add checks here to disallow that.

This bug requires test cases that are an awkwardly large size for the test
suite, but is very rapidly picked up by the included torture tester.

diff -r e6294e036777 -r 7f6d15694354 contrib/bdiff-torture.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/bdiff-torture.py	Thu Apr 21 21:53:18 2016 -0500
@@ -0,0 +1,98 @@
+# Randomized torture test generation for bdiff
+
+from __future__ import absolute_import, print_function
+import random, sys
+from mercurial import (
+    bdiff,
+    mpatch,
+)
+
+def reducetest(a, b):
+    tries = 0
+    reductions = 0
+    print("reducing...")
+    while tries < 1000:
+        a2 = "\n".join(l for l in a.splitlines()
+                       if random.randint(0, 100) > 0) + "\n"
+        b2 = "\n".join(l for l in b.splitlines()
+                       if random.randint(0, 100) > 0) + "\n"
+        if a2 == a and b2 == b:
+            continue
+        if a2 == b2:
+            continue
+        tries += 1
+
+        try:
+            test1(a, b)
+        except Exception as inst:
+            reductions += 1
+            tries = 0
+            a = a2
+            b = b2
+
+    print("reduced:", reductions, len(a) + len(b),
+          repr(a), repr(b))
+    try:
+        test1(a, b)
+    except Exception as inst:
+        print("failed:", inst)
+
+    sys.exit(0)
+
+def test1(a, b):
+    d = bdiff.bdiff(a, b)
+    if not d:
+        raise ValueError("empty")
+    c = mpatch.patches(a, [d])
+    if c != b:
+        raise ValueError("bad")
+
+def testwrap(a, b):
+    try:
+        test1(a, b)
+        return
+    except Exception as inst:
+        pass
+    print("exception:", inst)
+    reducetest(a, b)
+
+def test(a, b):
+    testwrap(a, b)
+    testwrap(b, a)
+
+def rndtest(size, noise):
+    a = []
+    src = "                aaaaaaaabbbbccd"
+    for x in xrange(size):
+        a.append(src[random.randint(0, len(src) - 1)])
+
+    while True:
+        b = [c for c in a if random.randint(0, 99) > noise]
+        b2 = []
+        for c in b:
+            b2.append(c)
+            while random.randint(0, 99) < noise:
+                b2.append(src[random.randint(0, len(src) - 1)])
+        if b2 != a:
+            break
+
+    a = "\n".join(a) + "\n"
+    b = "\n".join(b2) + "\n"
+
+    test(a, b)
+
+maxvol = 10000
+startsize = 2
+while True:
+    size = startsize
+    count = 0
+    while size < maxvol:
+        print(size)
+        volume = 0
+        while volume < maxvol:
+            rndtest(size, 2)
+            volume += size
+            count += 2
+        size *= 2
+    maxvol *= 4
+    startsize *= 4
diff -r e6294e036777 -r 7f6d15694354 mercurial/bdiff.c
--- a/mercurial/bdiff.c	Thu Apr 21 21:46:31 2016 -0500
+++ b/mercurial/bdiff.c	Thu Apr 21 21:53:18 2016 -0500
@@ -265,6 +265,8 @@
 
 		if (curr->a2 == next->a1 || curr->b2 == next->b1)
 			while (curr->a2 < an && curr->b2 < bn
+			       && next->a1 < next->a2
+			       && next->b1 < next->b2
 			       && !cmp(a + curr->a2, b + curr->b2)) {
 				curr->a2++;
 				next->a1++;


More information about the Mercurial-devel mailing list