[PATCH 5 of 6] pure Python implementation of bdiff.c

Benoit Boissinot benoit.boissinot at ens-lyon.org
Mon Jan 12 13:12:22 CST 2009


On Mon, Jan 12, 2009 at 06:39:39PM +0100, Martin Geisler wrote:
> # HG changeset patch
> # User Martin Geisler <mg at daimi.au.dk>
> # Date 1231781066 -3600
> # Node ID 211568685b2a7f6a9916f57d9a53b8c53a0b06f0
> # Parent  e5ef97c26d1b1cba1c090c2ba95fef426d425884
> pure Python implementation of bdiff.c
> The bdiff.blocks functions is not quite correct here, it gives one
> error in the test suite:
> 
>   ERROR: test-bdiff output changed
>   --- Expected output
>   +++ Test output
>   @@ -17,7 +17,7 @@
>    *** 'abc' 'abc'
>    *** 'a\n' 'a\n'
>    *** 'a\nb' 'a\nb'
>   +5 5 '\ny\n'
>    6 6 'y\n\n'
>   -6 6 'y\n\n'
>   -9 9 'y\n\n'
>   +8 8 '\ny\n'
>    done
>   !.
>   Failed test-bdiff: output changed

Working implementation below:

diff --git a/mercurial/bdiff.py b/mercurial/bdiff.py
new file mode 100644
--- /dev/null
+++ b/mercurial/bdiff.py
@@ -0,0 +1,70 @@
+# bdiff.py - Python implementation of bdiff.c
+#
+# Copyright 2009 Matt Mackall <mpm at selenic.com> and others
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import re, struct, difflib, util, mpatch
+# mdiff import moved to bottom due to import cycle
+
+def _get_matching_blocks(a, b):
+    blocks = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
+
+    # normalize the blocks (push towards the end)
+    prev = None
+    for curr in blocks:
+        if prev is None:
+            prev = curr
+            continue
+        shift = 0
+
+        a1, b1, l1 = prev
+        a1end = a1 + l1
+        b1end = b1 + l1
+
+        a2, b2, l2 = curr
+        a2end = a2 + l2
+        b2end = b2 + l2
+        if a1end == a2:
+            while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]:
+                shift += 1
+        elif b1end == b2:
+            while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]:
+                shift += 1
+        yield a1, b1, l1+shift
+        prev = a2+shift, b2+shift, l2-shift
+    yield prev
+
+def diff(a, b):
+    if not a:
+        s = "".join(b)
+        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
+
+    bin = []
+    p = [0]
+    for i in a: p.append(p[-1] + len(i))
+
+    d = _get_matching_blocks(a, b)
+    la = 0
+    lb = 0
+    for am, bm, size in d:
+        s = "".join(b[lb:bm])
+        if am > la or s:
+            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
+        la = am + size
+        lb = bm + size
+
+    return "".join(bin)
+
+def bdiff(a, b):
+    return diff(str(a).splitlines(1), str(b).splitlines(1))
+
+def blocks(a, b):
+    an = mdiff.splitnewlines(a)
+    bn = mdiff.splitnewlines(b)
+    d = _get_matching_blocks(an, bn)
+    return [(i, i + n, j, j + n) for (i, j, n) in d]
+
+# this breaks an import cycle
+import mdiff

-- 
:wq


More information about the Mercurial-devel mailing list