[PATCH 5 of 6] pure Python implementation of bdiff.c

Martin Geisler mg at daimi.au.dk
Mon Jan 12 10:12:56 CST 2009


# HG changeset patch
# User Martin Geisler <mg at daimi.au.dk>
# Date 1231775752 -3600
# Node ID bf345daa61bcabbde8d73e547bf8b345fcbdba4f
# Parent  60b77f666c007cffdc213f1ada50a38e6e16dfce
pure Python implementation of bdiff.c
The bdiff.blocks functions is not quite correct here, it gives one
error in the test suite:

  ERROR: test-bdiff output changed
  --- Expected output
  +++ Test output
  @@ -17,7 +17,7 @@
   *** 'abc' 'abc'
   *** 'a\n' 'a\n'
   *** 'a\nb' 'a\nb'
  +5 5 '\ny\n'
   6 6 'y\n\n'
  -6 6 'y\n\n'
  -9 9 'y\n\n'
  +8 8 '\ny\n'
   done
  !.
  Failed test-bdiff: output changed

diff -r 60b77f666c00 -r bf345daa61bc mercurial/bdiff.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/bdiff.py	Mon Jan 12 16:55:52 2009 +0100
@@ -0,0 +1,80 @@
+# bdiff.py - Python implementation of bdiff.c
+#
+# Copyright 2009 Matt Mackall <mpm at selenic.com> and others
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import re, struct, difflib, util, mpatch
+# mdiff import moved to bottom due to import cycle
+
+def sortdiff(a, b):
+    la = lb = 0
+    lena = len(a)
+    lenb = len(b)
+
+    while 1:
+        am, bm, = la, lb
+
+        # walk over matching lines
+        while lb < lenb and la < lena and a[la] == b[lb]:
+            la += 1
+            lb += 1
+
+        if la > am:
+            yield (am, bm, la - am) # return a match
+
+        # skip mismatched lines from b
+        while la < lena and lb < lenb and b[lb] < a[la]:
+            lb += 1
+
+        if lb >= lenb:
+            break
+
+        # skip mismatched lines from a
+        while la < lena and lb < lenb and b[lb] > a[la]:
+            la += 1
+
+        if la >= lena:
+            break
+
+    yield (lena, lenb, 0)
+
+def diff(a, b, sorted=0):
+    if not a:
+        s = "".join(b)
+        return s and (struct.pack(">lll", 0, 0, len(s)) + s)
+
+    bin = []
+    p = [0]
+    for i in a: p.append(p[-1] + len(i))
+
+    if sorted:
+        try:
+            d = sortdiff(a, b)
+        except:
+            raise
+    else:
+        d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
+    la = 0
+    lb = 0
+    for am, bm, size in d:
+        s = "".join(b[lb:bm])
+        if am > la or s:
+            bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
+        la = am + size
+        lb = bm + size
+
+    return "".join(bin)
+
+def bdiff(a, b):
+    return diff(str(a).splitlines(1), str(b).splitlines(1))
+
+def blocks(a, b):
+    an = mdiff.splitnewlines(a)
+    bn = mdiff.splitnewlines(b)
+    d = difflib.SequenceMatcher(None, an, bn)
+    return [(i, i + n, j, j + n) for (i, j, n) in d.get_matching_blocks()]
+
+# this breaks an import cycle
+import mdiff


More information about the Mercurial-devel mailing list