[PATCH 1 of 6] pure Python implementation of mpatch.c

Martin Geisler mg at daimi.au.dk
Mon Jan 12 10:12:52 CST 2009


# HG changeset patch
# User Martin Geisler <mg at daimi.au.dk>
# Date 1231775708 -3600
# Node ID 5a00f261dba21481b19bfb2a3dc5408bd1ac2395
# Parent  f2fa1a9eede60ef1e07bd45a014609e14198bbfb
pure Python implementation of mpatch.c

diff -r f2fa1a9eede6 -r 5a00f261dba2 mercurial/mpatch.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/mpatch.py	Mon Jan 12 16:55:08 2009 +0100
@@ -0,0 +1,103 @@
+# mpatch.py - Python implementation of mpatch.c
+#
+# Copyright 2009 Matt Mackall <mpm at selenic.com> and others
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import struct, mmap, difflib
+
+devzero = file("/dev/zero")
+
+# This attempts to apply a series of patches in time proportional to
+# the total size of the patches, rather than patches * len(text). This
+# means rather than shuffling strings around, we shuffle around
+# pointers to fragments with fragment lists.
+#
+# When the fragment lists get too long, we collapse them. To do this
+# efficiently, we do all our operations inside a buffer created by
+# mmap and simply use memmove. This avoids creating a bunch of large
+# temporary string buffers.
+
+def patches(a, bins):
+    if not bins: return a
+
+    plens = [len(x) for x in bins]
+    pl = sum(plens)
+    bl = len(a) + pl
+    tl = bl + bl + pl # enough for the patches and two working texts
+    b1, b2 = 0, bl
+
+    if not tl: return a
+
+    m = mmap.mmap(devzero.fileno(), tl, mmap.MAP_PRIVATE)
+
+    # load our original text
+    m.write(a)
+    frags = [(len(a), b1)]
+
+    # copy all the patches into our segment so we can memmove from them
+    pos = b2 + bl
+    m.seek(pos)
+    for p in bins: m.write(p)
+
+    def pull(dst, src, l): # pull l bytes from src
+        while l:
+            f = src.pop(0)
+            if f[0] > l: # do we need to split?
+                src.insert(0, (f[0] - l, f[1] + l))
+                dst.append((l, f[1]))
+                return
+            dst.append(f)
+            l -= f[0]
+
+    def collect(buf, list):
+        start = buf
+        for l, p in list:
+            m.move(buf, p, l)
+            buf += l
+        return (buf - start, start)
+
+    for plen in plens:
+        # if our list gets too long, execute it
+        if len(frags) > 128:
+            b2, b1 = b1, b2
+            frags = [collect(b1, frags)]
+
+        new = []
+        end = pos + plen
+        last = 0
+        while pos < end:
+            p1, p2, l = struct.unpack(">lll", m[pos:pos + 12])
+            pull(new, frags, p1 - last) # what didn't change
+            pull([], frags, p2 - p1)    # what got deleted
+            new.append((l, pos + 12))        # what got added
+            pos += l + 12
+            last = p2
+        frags = new + frags                    # what was left at the end
+
+    t = collect(b2, frags)
+
+    return m[t[1]:t[1] + t[0]]
+
+def patchedsize(orig, delta):
+    outlen, last, bin = 0, 0, 0
+    binend = len(delta)
+    data = 12
+
+    while data <= binend:
+        decode = delta[bin:bin + 12]
+        start, end, length = struct.unpack(">lll", decode)
+        if start > end:
+            break
+        bin = data + length
+        data = bin + 12
+        outlen += start - last
+        last = end
+        outlen += length
+
+    if bin != binend:
+        raise Exception("patch cannot be decoded")
+
+    outlen += orig - last
+    return outlen


More information about the Mercurial-devel mailing list