[PATCH] refactoring patch.diff

Adrian Buehlmann adrian at cadifra.com
Mon Apr 14 09:57:33 CDT 2008


After spending quite some time analyzing patch.diff, I have refactored it
by moving most of the for loop contents into two new classes.

I believe the code is better readable with this change and would like
to propose it for inclusion (or learn why it is bad, if anybody of the
hg gurus is willing to donate a timeslot for Python newbie teaching :-).

I also pushed the patch here for review:
http://www.cadifra.com/cgi-bin/repos/mercurial-abuehl/rev/964a3a4e894d

Excerpt from changed patch.diff (starting at line 1283):

'''
    if opts.git:
        d = gitdiffer(repo, ctx1, ctx2, added, removed, node1, node2)
    else:
        d = differ(repo, ctx1, ctx2, added, removed)

    all = modified + added + removed
    all.sort()

    for f in all:
        header, a, b, to, tn, dodiff = d.next(f)
        if dodiff:
            if dodiff == 'binary':
                text = b85diff(to, tn)
            else:
                text = mdiff.unidiff(to, date1,
                                    # ctx2 date may be dynamic
                                    tn, util.datestr(ctx2.date()),
                                    a, b, r, opts=opts)
            if text or len(header) > 1:
                fp.write(''.join(header))
            fp.write(text)
'''

The line

        header, a, b, to, tn, dodiff = d.next(f)

replaces a larger pile of code lines (see diff in patch below or link above),
which I have moved into new classes "differ" (base class) and "gitdiffer".

Regression tests run on FreeBSD 6.2:

%python run-tests.py
[dots omitted]
Skipped test-convert-baz: missing feature: GNU Arch baz client
Skipped test-convert-darcs: missing feature: darcs client
Skipped test-convert-mtn: missing feature: monotone client (> 0.31)
Skipped test-highlight: missing feature: Pygments source highlighting library
Skipped test-imerge: not executable
Skipped test-merge-internal-tools-pattern: not executable
Skipped test-no-symlinks: system supports symbolic links
# Ran 255 tests, 7 skipped, 0 failed.

Here is the full patch:

# HG changeset patch
# User Adrian Buehlmann <adrian at cadifra.com>
# Date 1207865114 -7200
# Node ID 8d23b100a8d4116f8009ef3bfd2efaea396cf052
# Parent  65f1b97484be50f3565a9ca59e9fbe01b3d143b8
refactoring patch.diff

New classes "differ" and "gitdiffer"

diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -1152,6 +1152,93 @@
     ret.append('\n')
     return ''.join(ret)

+class differ:
+    def __init__(self, repo, ctx1, ctx2, added, removed):
+        self.ctx1 = ctx1
+        self.ctx2 = ctx2
+        self.man1 = ctx1.manifest()
+        self.added, self.removed = added, removed
+        self.filelogcache = {}
+
+    def _getfilectx(self, f, ctx):
+        flctx = ctx.filectx(f, filelog=self.filelogcache.get(f))
+        if f not in self.filelogcache:
+            self.filelogcache[f] = flctx._filelog
+        return flctx
+
+    def next(self, f):
+        header = []
+        dodiff = True
+        to, tn = None, None
+        if f in self.man1:
+            to = self._getfilectx(f, self.ctx1).data()
+        if f not in self.removed:
+            tn = self._getfilectx(f, self.ctx2).data()
+        a, b = f, f
+        return header, a, b, to, tn, dodiff
+
+class gitdiffer(differ):
+    def __init__(self, repo, ctx1, ctx2, added, removed, node1, node2):
+        # node1 is intentionally unused
+        differ.__init__(self, repo, ctx1, ctx2, added, removed)
+        self.gone = {}
+        self.copy, diverge = copies.copies(repo, ctx1, ctx2, repo.changectx(nullid))
+        for k, v in self.copy.items():
+            self.copy[v] = k
+        if node2:
+            man2 = ctx2.manifest()
+            self.execf2 = man2.execf
+            self.linkf2 = man2.linkf
+        else:
+            self.execf2 = util.execfunc(repo.root, None)
+            self.linkf2 = util.linkfunc(repo.root, None)
+            if self.execf2 is None:
+                mc = ctx2.parents()[0].manifest().copy()
+                self.execf2 = mc.execf
+                self.linkf2 = mc.linkf
+
+    def next(self, f):
+        def gitmode(x, l):
+            return l and '120000' or (x and '100755' or '100644')
+        def addmode(header, omode, nmode):
+            if omode != nmode:
+                header.append('old mode %s\n' % omode)
+                header.append('new mode %s\n' % nmode)
+        header, a, b, to, tn, dodiff = differ.next(self, f)
+        if f in self.added:
+            mode = gitmode(self.execf2(f), self.linkf2(f))
+            if f in self.copy:
+                a = self.copy[f]
+                omode = gitmode(self.man1.execf(a), self.man1.linkf(a))
+                addmode(header, omode, mode)
+                if a in self.removed and a not in self.gone:
+                    op = 'rename'
+                    self.gone[a] = 1
+                else:
+                    op = 'copy'
+                header.append('%s from %s\n' % (op, a))
+                header.append('%s to %s\n' % (op, b))
+                to = self._getfilectx(a, self.ctx1).data()
+            else:
+                header.append('new file mode %s\n' % mode)
+            if util.binary(tn):
+                dodiff = 'binary'
+        elif f in self.removed:
+            # have we already reported a copy above?
+            if f in self.copy and self.copy[f] in self.added and self.copy[self.copy[f]] == f:
+                dodiff = False
+            else:
+                mode = gitmode(self.man1.execf(f), self.man1.linkf(f))
+                header.append('deleted file mode %s\n' % mode)
+        else:
+            omode = gitmode(self.man1.execf(a), self.man1.linkf(a))
+            nmode = gitmode(self.execf2(b), self.linkf2(b))
+            addmode(header, omode, nmode)
+            if util.binary(to) or util.binary(tn):
+                dodiff = 'binary'
+        header.insert(0, 'diff --git a/%s b/%s\n' % (a, b))
+        return header, a, b, to, tn, dodiff
+
 def diff(repo, node1=None, node2=None, files=None, match=util.always,
          fp=None, changes=None, opts=None):
     '''print diff of changes to files between two nodes, or node and
@@ -1167,13 +1254,6 @@

     if not node1:
         node1 = repo.dirstate.parents()[0]
-
-    flcache = {}
-    def getfilectx(f, ctx):
-        flctx = ctx.filectx(f, filelog=flcache.get(f))
-        if f not in flcache:
-            flcache[f] = flctx._filelog
-        return flctx

     # reading the data for node1 early allows it to play nicely
     # with repo.status and the revlog cache.
@@ -1191,83 +1271,25 @@

     if node2:
         ctx2 = context.changectx(repo, node2)
-        execf2 = ctx2.manifest().execf
-        linkf2 = ctx2.manifest().linkf
     else:
         ctx2 = context.workingctx(repo)
-        execf2 = util.execfunc(repo.root, None)
-        linkf2 = util.linkfunc(repo.root, None)
-        if execf2 is None:
-            mc = ctx2.parents()[0].manifest().copy()
-            execf2 = mc.execf
-            linkf2 = mc.linkf

-    if repo.ui.quiet:
+    if repo.ui.quiet or opts.git:
         r = None
     else:
         hexfunc = repo.ui.debugflag and hex or short
         r = [hexfunc(node) for node in [node1, node2] if node]

     if opts.git:
-        copy, diverge = copies.copies(repo, ctx1, ctx2, repo.changectx(nullid))
-        for k, v in copy.items():
-            copy[v] = k
+        d = gitdiffer(repo, ctx1, ctx2, added, removed, node1, node2)
+    else:
+        d = differ(repo, ctx1, ctx2, added, removed)

     all = modified + added + removed
     all.sort()
-    gone = {}

     for f in all:
-        to = None
-        tn = None
-        dodiff = True
-        header = []
-        if f in man1:
-            to = getfilectx(f, ctx1).data()
-        if f not in removed:
-            tn = getfilectx(f, ctx2).data()
-        a, b = f, f
-        if opts.git:
-            def gitmode(x, l):
-                return l and '120000' or (x and '100755' or '100644')
-            def addmodehdr(header, omode, nmode):
-                if omode != nmode:
-                    header.append('old mode %s\n' % omode)
-                    header.append('new mode %s\n' % nmode)
-
-            if f in added:
-                mode = gitmode(execf2(f), linkf2(f))
-                if f in copy:
-                    a = copy[f]
-                    omode = gitmode(man1.execf(a), man1.linkf(a))
-                    addmodehdr(header, omode, mode)
-                    if a in removed and a not in gone:
-                        op = 'rename'
-                        gone[a] = 1
-                    else:
-                        op = 'copy'
-                    header.append('%s from %s\n' % (op, a))
-                    header.append('%s to %s\n' % (op, f))
-                    to = getfilectx(a, ctx1).data()
-                else:
-                    header.append('new file mode %s\n' % mode)
-                if util.binary(tn):
-                    dodiff = 'binary'
-            elif f in removed:
-                # have we already reported a copy above?
-                if f in copy and copy[f] in added and copy[copy[f]] == f:
-                    dodiff = False
-                else:
-                    mode = gitmode(man1.execf(f), man1.linkf(f))
-                    header.append('deleted file mode %s\n' % mode)
-            else:
-                omode = gitmode(man1.execf(f), man1.linkf(f))
-                nmode = gitmode(execf2(f), linkf2(f))
-                addmodehdr(header, omode, nmode)
-                if util.binary(to) or util.binary(tn):
-                    dodiff = 'binary'
-            r = None
-            header.insert(0, 'diff --git a/%s b/%s\n' % (a, b))
+        header, a, b, to, tn, dodiff = d.next(f)
         if dodiff:
             if dodiff == 'binary':
                 text = b85diff(to, tn)


More information about the Mercurial-devel mailing list