[PATCH 5 of 6] pure Python implementation of bdiff.c
Benoit Boissinot
benoit.boissinot at ens-lyon.org
Mon Jan 12 13:12:22 CST 2009
On Mon, Jan 12, 2009 at 06:39:39PM +0100, Martin Geisler wrote:
> # HG changeset patch
> # User Martin Geisler <mg at daimi.au.dk>
> # Date 1231781066 -3600
> # Node ID 211568685b2a7f6a9916f57d9a53b8c53a0b06f0
> # Parent e5ef97c26d1b1cba1c090c2ba95fef426d425884
> pure Python implementation of bdiff.c
> The bdiff.blocks functions is not quite correct here, it gives one
> error in the test suite:
>
> ERROR: test-bdiff output changed
> --- Expected output
> +++ Test output
> @@ -17,7 +17,7 @@
> *** 'abc' 'abc'
> *** 'a\n' 'a\n'
> *** 'a\nb' 'a\nb'
> +5 5 '\ny\n'
> 6 6 'y\n\n'
> -6 6 'y\n\n'
> -9 9 'y\n\n'
> +8 8 '\ny\n'
> done
> !.
> Failed test-bdiff: output changed
Working implementation below:
diff --git a/mercurial/bdiff.py b/mercurial/bdiff.py
new file mode 100644
--- /dev/null
+++ b/mercurial/bdiff.py
@@ -0,0 +1,70 @@
+# bdiff.py - Python implementation of bdiff.c
+#
+# Copyright 2009 Matt Mackall <mpm at selenic.com> and others
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import re, struct, difflib, util, mpatch
+# mdiff import moved to bottom due to import cycle
+
+def _get_matching_blocks(a, b):
+ blocks = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
+
+ # normalize the blocks (push towards the end)
+ prev = None
+ for curr in blocks:
+ if prev is None:
+ prev = curr
+ continue
+ shift = 0
+
+ a1, b1, l1 = prev
+ a1end = a1 + l1
+ b1end = b1 + l1
+
+ a2, b2, l2 = curr
+ a2end = a2 + l2
+ b2end = b2 + l2
+ if a1end == a2:
+ while a1end+shift < a2end and a[a1end+shift] == b[b1end+shift]:
+ shift += 1
+ elif b1end == b2:
+ while b1end+shift < b2end and a[a1end+shift] == b[b1end+shift]:
+ shift += 1
+ yield a1, b1, l1+shift
+ prev = a2+shift, b2+shift, l2-shift
+ yield prev
+
+def diff(a, b):
+ if not a:
+ s = "".join(b)
+ return s and (struct.pack(">lll", 0, 0, len(s)) + s)
+
+ bin = []
+ p = [0]
+ for i in a: p.append(p[-1] + len(i))
+
+ d = _get_matching_blocks(a, b)
+ la = 0
+ lb = 0
+ for am, bm, size in d:
+ s = "".join(b[lb:bm])
+ if am > la or s:
+ bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
+ la = am + size
+ lb = bm + size
+
+ return "".join(bin)
+
+def bdiff(a, b):
+ return diff(str(a).splitlines(1), str(b).splitlines(1))
+
+def blocks(a, b):
+ an = mdiff.splitnewlines(a)
+ bn = mdiff.splitnewlines(b)
+ d = _get_matching_blocks(an, bn)
+ return [(i, i + n, j, j + n) for (i, j, n) in d]
+
+# this breaks an import cycle
+import mdiff
--
:wq
More information about the Mercurial-devel
mailing list