[PATCH 6 of 6 v2] convert: introduce --full for converting all files

Mads Kiilerich mads at kiilerich.com
Tue Aug 26 15:04:06 CDT 2014


# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1409083412 -7200
#      Tue Aug 26 22:03:32 2014 +0200
# Node ID 8813e0874fdda6ce4178a24c1f756b4c043a045a
# Parent  9022fa68bb471e2b8fcc136f4e371aad45c81b0e
convert: introduce --full for converting all files

Convert will normally only process files that were changed in a each source
revision, apply the filemap, and record it has a change in the target
repository. (If it ends up not really changing anything, nothing changes.)

That means that _if_ the filemap is changed before continuing an incremental
convert, the change will only kick in when the files it affects are modified in
a source revision and thus processed.

With --full, convert will make a full conversion every time and process
all files in the source repo and remove target repo files that shouldn't be
there. Filemap changes will thus kick in on the first converted revision, no
matter what is changed.

This flag should in most cases not make any difference but will make convert
significantly slower.

Other names has been considered for this feature, such as "resync", "sync",
"checkunmodified", "all" or "allfiles", but I found that they were less obvious
and required more explanation than "full" and were harder to describe
consistently.

diff --git a/hgext/convert/__init__.py b/hgext/convert/__init__.py
--- a/hgext/convert/__init__.py
+++ b/hgext/convert/__init__.py
@@ -29,6 +29,8 @@ testedwith = 'internal'
     ('A', 'authormap', '', _('remap usernames using this file'), _('FILE')),
     ('', 'filemap', '', _('remap file names using contents of file'),
      _('FILE')),
+    ('', 'full', None,
+     _('apply filemap changes by converting all files again')),
     ('', 'splicemap', '', _('splice synthesized history into place'),
      _('FILE')),
     ('', 'branchmap', '', _('change branch names while converting'),
@@ -131,6 +133,14 @@ def convert(ui, src, dest=None, revmapfi
     it is converted. To rename from a subdirectory into the root of
     the repository, use ``.`` as the path to rename to.
 
+    ``--full`` will make sure the converted changesets contain exactly
+    the right files with the right content. It will make a full
+    conversion of all files, not just the ones that have
+    changed. Files that already are correct will not be changed. This
+    can be used to apply filemap changes when converting
+    incrementally. This is currently only supported for Mercurial and
+    Subversion.
+
     The splicemap is a file that allows insertion of synthetic
     history, letting you specify the parents of a revision. This is
     useful if you want to e.g. give a Subversion merge two parents, or
diff --git a/hgext/convert/bzr.py b/hgext/convert/bzr.py
--- a/hgext/convert/bzr.py
+++ b/hgext/convert/bzr.py
@@ -134,8 +134,9 @@ class bzr_source(converter_source):
             sio = revtree.get_file(fileid)
             return sio.read(), mode
 
-    def getchanges(self, version):
-        # set up caches: modecache and revtree
+    def getchanges(self, version, full):
+        if full:
+            raise util.Abort(_("convert from cvs do not support --full"))
         self._modecache = {}
         self._revtree = self.sourcerepo.revision_tree(version)
         # get the parentids from the cache
diff --git a/hgext/convert/common.py b/hgext/convert/common.py
--- a/hgext/convert/common.py
+++ b/hgext/convert/common.py
@@ -93,12 +93,13 @@ class converter_source(object):
         """
         raise NotImplementedError
 
-    def getchanges(self, version):
+    def getchanges(self, version, full):
         """Returns a tuple of (files, copies).
 
         files is a sorted list of (filename, id) tuples for all files
         changed between version and its first parent returned by
-        getcommit(). id is the source revision id of the file.
+        getcommit(). If full, all files in that revision is returned.
+        id is the source revision id of the file.
 
         copies is a dictionary of dest: source
         """
@@ -204,7 +205,7 @@ class converter_sink(object):
         mapping equivalent authors identifiers for each system."""
         return None
 
-    def putcommit(self, files, copies, parents, commit, source, revmap):
+    def putcommit(self, files, copies, parents, commit, source, revmap, full):
         """Create a revision with all changed files listed in 'files'
         and having listed parents. 'commit' is a commit object
         containing at a minimum the author, date, and message for this
@@ -212,7 +213,8 @@ class converter_sink(object):
         'copies' is a dictionary mapping destinations to sources,
         'source' is the source repository, and 'revmap' is a mapfile
         of source revisions to converted revisions. Only getfile() and
-        lookuprev() should be called on 'source'.
+        lookuprev() should be called on 'source'. 'full' means that 'files'
+        is complete and all other files should be removed.
 
         Note that the sink repository is not told to update itself to
         a particular revision (or even what that revision would be)
diff --git a/hgext/convert/convcmd.py b/hgext/convert/convcmd.py
--- a/hgext/convert/convcmd.py
+++ b/hgext/convert/convcmd.py
@@ -386,8 +386,8 @@ class converter(object):
 
     def copy(self, rev):
         commit = self.commitcache[rev]
-
-        changes = self.source.getchanges(rev)
+        full = self.opts.get('full')
+        changes = self.source.getchanges(rev, full)
         if isinstance(changes, basestring):
             if changes == SKIPREV:
                 dest = SKIPREV
@@ -413,7 +413,7 @@ class converter(object):
             parents = [b[0] for b in pbranches]
         source = progresssource(self.ui, self.source, len(files))
         newnode = self.dest.putcommit(files, copies, parents, commit,
-                                      source, self.map)
+                                      source, self.map, full)
         source.close()
         self.source.converted(rev, newnode)
         self.map[rev] = newnode
diff --git a/hgext/convert/cvs.py b/hgext/convert/cvs.py
--- a/hgext/convert/cvs.py
+++ b/hgext/convert/cvs.py
@@ -258,7 +258,9 @@ class convert_cvs(converter_source):
                 else:
                     raise util.Abort(_("unknown CVS response: %s") % line)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from cvs do not support --full"))
         self._parse()
         return sorted(self.files[rev].iteritems()), {}
 
diff --git a/hgext/convert/darcs.py b/hgext/convert/darcs.py
--- a/hgext/convert/darcs.py
+++ b/hgext/convert/darcs.py
@@ -156,7 +156,9 @@ class darcs_source(converter_source, com
             output, status = self.run('revert', all=True, repodir=self.tmppath)
             self.checkexit(status, output)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from darcs do not support --full"))
         copies = {}
         changes = []
         man = None
diff --git a/hgext/convert/filemap.py b/hgext/convert/filemap.py
--- a/hgext/convert/filemap.py
+++ b/hgext/convert/filemap.py
@@ -304,7 +304,7 @@ class filemap_source(converter_source):
         wrev.add(rev)
         self.wantedancestors[rev] = wrev
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
         parents = self.commits[rev].parents
         if len(parents) > 1:
             self.rebuild()
@@ -384,7 +384,7 @@ class filemap_source(converter_source):
         # Get the real changes and do the filtering/mapping. To be
         # able to get the files later on in getfile, we hide the
         # original filename in the rev part of the return value.
-        changes, copies = self.base.getchanges(rev)
+        changes, copies = self.base.getchanges(rev, full)
         files = {}
         for f, r in changes:
             newf = self.filemapper(f)
diff --git a/hgext/convert/git.py b/hgext/convert/git.py
--- a/hgext/convert/git.py
+++ b/hgext/convert/git.py
@@ -180,7 +180,9 @@ class convert_git(converter_source):
                 continue
             m.node = node.strip()
 
-    def getchanges(self, version):
+    def getchanges(self, version, full):
+        if full:
+            raise util.Abort(_("convert from git do not support --full"))
         self.modecache = {}
         fh = self.gitopen("git diff-tree -z --root -m -r %s" % version)
         changes = []
diff --git a/hgext/convert/gnuarch.py b/hgext/convert/gnuarch.py
--- a/hgext/convert/gnuarch.py
+++ b/hgext/convert/gnuarch.py
@@ -142,7 +142,9 @@ class gnuarch_source(converter_source, c
 
         return self._getfile(name, rev)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from arch do not support --full"))
         self._update(rev)
         changes = []
         copies = {}
diff --git a/hgext/convert/hg.py b/hgext/convert/hg.py
--- a/hgext/convert/hg.py
+++ b/hgext/convert/hg.py
@@ -128,11 +128,13 @@ class mercurial_sink(converter_sink):
             fp.write('%s %s\n' % (revid, s[1]))
         return fp.getvalue()
 
-    def putcommit(self, files, copies, parents, commit, source, revmap):
-
+    def putcommit(self, files, copies, parents, commit, source, revmap, full):
         files = dict(files)
         def getfilectx(repo, memctx, f):
-            v = files[f]
+            try:
+                v = files[f]
+            except KeyError:
+                return None
             data, mode = source.getfile(f, v)
             if data is None:
                 return None
@@ -193,7 +195,10 @@ class mercurial_sink(converter_sink):
         while parents:
             p1 = p2
             p2 = parents.pop(0)
-            ctx = context.memctx(self.repo, (p1, p2), text, files.keys(),
+            fileset = set(files)
+            if full:
+                fileset.update(self.repo[p1], self.repo[p2])
+            ctx = context.memctx(self.repo, (p1, p2), text, fileset,
                                  getfilectx, commit.author, commit.date, extra)
             self.repo.commitctx(ctx)
             text = "(octopus merge fixup)\n"
@@ -356,17 +361,18 @@ class mercurial_source(converter_source)
         except error.LookupError:
             return None, None
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
         ctx = self.changectx(rev)
         parents = self.parents(ctx)
-        if not parents:
+        if full or not parents:
             files = copyfiles = ctx.manifest()
-        else:
+        if parents:
             if self._changescache[0] == rev:
                 m, a, r = self._changescache[1]
             else:
                 m, a, r = self.repo.status(parents[0].node(), ctx.node())[:3]
-            files = m + a + r
+            if not full:
+                files = m + a + r
             copyfiles = m + a
         # getcopies() is also run for roots and before filtering so missing
         # revlogs are detected early
diff --git a/hgext/convert/monotone.py b/hgext/convert/monotone.py
--- a/hgext/convert/monotone.py
+++ b/hgext/convert/monotone.py
@@ -224,7 +224,9 @@ class monotone_source(converter_source, 
         else:
             return [self.rev]
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from monotone do not support --full"))
         revision = self.mtnrun("get_revision", rev).split("\n\n")
         files = {}
         ignoremove = {}
diff --git a/hgext/convert/p4.py b/hgext/convert/p4.py
--- a/hgext/convert/p4.py
+++ b/hgext/convert/p4.py
@@ -190,7 +190,9 @@ class p4_source(converter_source):
 
         return contents, mode
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
+        if full:
+            raise util.Abort(_("convert from p4 do not support --full"))
         return self.files[rev], {}
 
     def getcommit(self, rev):
diff --git a/hgext/convert/subversion.py b/hgext/convert/subversion.py
--- a/hgext/convert/subversion.py
+++ b/hgext/convert/subversion.py
@@ -444,37 +444,37 @@ class svn_source(converter_source):
 
         return self.heads
 
-    def _getchanges(self, rev):
+    def _getchanges(self, rev, full):
         (paths, parents) = self.paths[rev]
+        copies = {}
         if parents:
             files, self.removed, copies = self.expandpaths(rev, paths, parents)
-        else:
+        if full or not parents:
             # Perform a full checkout on roots
             uuid, module, revnum = revsplit(rev)
             entries = svn.client.ls(self.baseurl + quote(module),
                                     optrev(revnum), True, self.ctx)
             files = [n for n, e in entries.iteritems()
                      if e.kind == svn.core.svn_node_file]
-            copies = {}
             self.removed = set()
 
         files.sort()
         files = zip(files, [rev] * len(files))
         return (files, copies)
 
-    def getchanges(self, rev):
+    def getchanges(self, rev, full):
         # reuse cache from getchangedfiles
-        if self._changescache[0] == rev:
+        if self._changescache[0] == rev and not full:
             (files, copies) = self._changescache[1]
         else:
-            (files, copies) = self._getchanges(rev)
+            (files, copies) = self._getchanges(rev, full)
             # caller caches the result, so free it here to release memory
             del self.paths[rev]
         return (files, copies)
 
     def getchangedfiles(self, rev, i):
         # called from filemap - cache computed values for reuse in getchanges
-        (files, copies) = self._getchanges(rev)
+        (files, copies) = self._getchanges(rev, False)
         self._changescache = (rev, (files, copies))
         return [f[0] for f in files]
 
@@ -1222,7 +1222,7 @@ class svn_sink(converter_sink, commandli
     def revid(self, rev):
         return u"svn:%s@%s" % (self.uuid, rev)
 
-    def putcommit(self, files, copies, parents, commit, source, revmap):
+    def putcommit(self, files, copies, parents, commit, source, revmap, full):
         for parent in parents:
             try:
                 return self.revid(self.childmap[parent])
@@ -1238,6 +1238,8 @@ class svn_sink(converter_sink, commandli
                 self.putfile(f, mode, data)
                 if f in copies:
                     self.copies.append([copies[f], f])
+        if full:
+            self.delete.extend(sorted(self.manifest.difference(files)))
         files = [f[0] for f in files]
 
         entries = set(self.delete)
diff --git a/tests/test-convert-hg-sink.t b/tests/test-convert-hg-sink.t
--- a/tests/test-convert-hg-sink.t
+++ b/tests/test-convert-hg-sink.t
@@ -537,3 +537,16 @@ Conversion after rollback
   |
   o  0 0 (a-only f)
   
+Convert with --full adds and removes files that didn't change
+
+  $ echo f >> 0/f
+  $ hg -R 0 ci -m "f"
+  $ hg convert --filemap filemap-b --full 0 a --config convert.hg.revs=1::
+  scanning source...
+  sorting...
+  converting...
+  0 f
+  $ hg -R a status --change tip
+  M f
+  A b-only
+  R a-only
diff --git a/tests/test-convert-svn-sink.t b/tests/test-convert-svn-sink.t
--- a/tests/test-convert-svn-sink.t
+++ b/tests/test-convert-svn-sink.t
@@ -247,6 +247,31 @@ Symlinks
 
 #endif
 
+Convert with --full adds and removes files that didn't change
+
+  $ touch a/f
+  $ hg -R a ci -Aqmf
+  $ echo "rename c d" > filemap
+  $ hg convert -d svn a --filemap filemap --full
+  assuming destination a-hg
+  initializing svn working copy 'a-hg-wc'
+  scanning source...
+  sorting...
+  converting...
+  0 f
+  $ svnupanddisplay a-hg-wc 1
+   9 9 test .
+   9 9 test d
+   9 9 test f
+  revision: 9
+  author: test
+  msg: f
+   D /c
+   A /d
+   D /d1
+   A /f
+   D /newlink
+
   $ rm -rf a a-hg a-hg-wc
 
 
diff --git a/tests/test-convert-svn-source.t b/tests/test-convert-svn-source.t
--- a/tests/test-convert-svn-source.t
+++ b/tests/test-convert-svn-source.t
@@ -168,6 +168,27 @@ Test filemap
   |
   o  0 second letter files: letter2.txt
   
+Convert with --full adds and removes files that didn't change
+
+  $ cd B
+  $ echo >> "letter .txt"
+  $ svn ci -m 'nothing'
+  Sending        letter .txt
+  Transmitting file data .
+  Committed revision 9.
+  $ cd ..
+
+  $ echo 'rename letter2.txt letter3.txt' > filemap
+  $ hg convert --filemap filemap --full "$SVNREPOURL/proj%20B/mytrunk" fmap
+  scanning source...
+  sorting...
+  converting...
+  0 nothing
+  $ hg -R fmap st --change tip
+  A letter .txt
+  A letter3.txt
+  R letter2.txt
+
 test invalid splicemap1
 
   $ cat > splicemap <<EOF
diff --git a/tests/test-convert.t b/tests/test-convert.t
--- a/tests/test-convert.t
+++ b/tests/test-convert.t
@@ -91,6 +91,13 @@
       directory if it is converted. To rename from a subdirectory into the root
       of the repository, use "." as the path to rename to.
   
+      "--full" will make sure the converted changesets contain exactly the right
+      files with the right content. It will make a full conversion of all files,
+      not just the ones that have changed. Files that already are correct will
+      not be changed. This can be used to apply filemap changes when converting
+      incrementally. This is currently only supported for Mercurial and
+      Subversion.
+  
       The splicemap is a file that allows insertion of synthetic history,
       letting you specify the parents of a revision. This is useful if you want
       to e.g. give a Subversion merge two parents, or graft two disconnected
@@ -265,6 +272,7 @@
    -r --rev REV          import up to source revision REV
    -A --authormap FILE   remap usernames using this file
       --filemap FILE     remap file names using contents of file
+      --full             apply filemap changes by converting all files again
       --splicemap FILE   splice synthesized history into place
       --branchmap FILE   change branch names while converting
       --branchsort       try to sort changesets by branches


More information about the Mercurial-devel mailing list