[PATCH v2] patch: fix patch hunk/metdata synchronization (issue3384)

Patrick Mezard patrick at mezard.eu
Sat Apr 21 14:46:29 CDT 2012


# HG changeset patch
# User Patrick Mezard <patrick at mezard.eu>
# Date 1335037225 -7200
# Branch stable
# Node ID 2be89f9bbb7532d36b1edb044d26a2fe1ea2fd07
# Parent  cbf2ea2f5ca169d22e0729cb71a21b808574b90e
patch: fix patch hunk/metdata synchronization (issue3384)

Git patches are parsed in two phases: 1) extract metadata, 2) parse actual
deltas and merge them with the previous metadata. We do this to avoid
dependency issues like "modify a; copy a to b", where "b" must be copied from
the unmodified "a".

Issue3384 is caused by flaky code I wrote to synchronize the patch metadata
with the emitted hunk:

 if (gitpatches and
     (gitpatches[-1][0] == afile or gitpatches[-1][1] == bfile)):
     gp = gitpatches.pop()[2]

With a patch like:

 diff --git a/a b/c
 copy from a
 copy to c
 --- a/a
 +++ b/c
 @@ -1,1 +1,2 @@
  a
 +a
 @@ -2,1 +2,2 @@
  a
 +a
 diff --git a/a b/a
 --- a/a
 +++ b/a
 @@ -1,1 +1,2 @@
  a
 +b

the first hunk of the first block is matched with the metadata for the block
"diff --git a/a b/c", then the second hunk of the first block is matched with
the metadata of the second block "diff --git a/a b/a", because of the "or" in
the code paste above. Turning the "or" into an "and" is not enough as we have
to deal with /dev/null cases for each file.

We I remove this broken piece of code:

 # copy/rename + modify should modify target, not source
 if gp.op in ('COPY', 'DELETE', 'RENAME', 'ADD') or gp.mode:
     afile = bfile

because "afile = bfile" set "afile" to stuff like "b/file" instead of "a/file",
and because this only happens for git patches, which afile/bfile are ignored
anyway by applydiff().

v2:
- Avoid a traceback on git metadata desynchronization

diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -290,6 +290,19 @@
         other.binary = self.binary
         return other
 
+    def _ispatchinga(self, afile):
+        if afile == '/dev/null':
+            return self.op == 'ADD'
+        return afile == 'a/' + (self.oldpath or self.path)
+
+    def _ispatchingb(self, bfile):
+        if bfile == '/dev/null':
+            return self.op == 'DELETE'
+        return bfile == 'b/' + self.path
+
+    def ispatching(self, afile, bfile):
+        return self._ispatchinga(afile) and self._ispatchingb(bfile)
+
     def __repr__(self):
         return "<patchmeta %s %r>" % (self.op, self.path)
 
@@ -1180,8 +1193,8 @@
             or x.startswith('GIT binary patch')):
             gp = None
             if (gitpatches and
-                (gitpatches[-1][0] == afile or gitpatches[-1][1] == bfile)):
-                gp = gitpatches.pop()[2]
+                gitpatches[-1].ispatching(afile, bfile)):
+                gp = gitpatches.pop()
             if x.startswith('GIT binary patch'):
                 h = binhunk(lr)
             else:
@@ -1197,22 +1210,21 @@
             m = gitre.match(x)
             if not m:
                 continue
-            if not gitpatches:
+            if gitpatches is None:
                 # scan whole input for git metadata
-                gitpatches = [('a/' + gp.path, 'b/' + gp.path, gp) for gp
-                              in scangitpatch(lr, x)]
-                yield 'git', [g[2].copy() for g in gitpatches
-                              if g[2].op in ('COPY', 'RENAME')]
+                gitpatches = scangitpatch(lr, x)
+                yield 'git', [g.copy() for g in gitpatches
+                              if g.op in ('COPY', 'RENAME')]
                 gitpatches.reverse()
             afile = 'a/' + m.group(1)
             bfile = 'b/' + m.group(2)
-            while afile != gitpatches[-1][0] and bfile != gitpatches[-1][1]:
-                gp = gitpatches.pop()[2]
+            while gitpatches and not gitpatches[-1].ispatching(afile, bfile):
+                gp = gitpatches.pop()
                 yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
-            gp = gitpatches[-1][2]
-            # copy/rename + modify should modify target, not source
-            if gp.op in ('COPY', 'DELETE', 'RENAME', 'ADD') or gp.mode:
-                afile = bfile
+            if not gitpatches:
+                raise PatchError(_('failed to synchronize metadata for "%s"')
+                                 % afile[2:])
+            gp = gitpatches[-1]
             newfile = True
         elif x.startswith('---'):
             # check for a unified diff
@@ -1247,7 +1259,7 @@
             hunknum = 0
 
     while gitpatches:
-        gp = gitpatches.pop()[2]
+        gp = gitpatches.pop()
         yield 'file', ('a/' + gp.path, 'b/' + gp.path, None, gp.copy())
 
 def applydiff(ui, fp, backend, store, strip=1, eolmode='strict'):
diff --git a/tests/test-import-git.t b/tests/test-import-git.t
--- a/tests/test-import-git.t
+++ b/tests/test-import-git.t
@@ -483,4 +483,28 @@
   ? b.rej
   ? linkb.rej
 
+Test corner case involving copies and multiple hunks (issue3384)
+
+  $ hg revert -qa
+  $ hg import --no-commit - <<EOF
+  > diff --git a/a b/c
+  > copy from a
+  > copy to c
+  > --- a/a
+  > +++ b/c
+  > @@ -1,1 +1,2 @@
+  >  a
+  > +a
+  > @@ -2,1 +2,2 @@
+  >  a
+  > +a
+  > diff --git a/a b/a
+  > --- a/a
+  > +++ b/a
+  > @@ -1,1 +1,2 @@
+  >  a
+  > +b
+  > EOF
+  applying patch from stdin
+
   $ cd ..


More information about the Mercurial-devel mailing list