[PATCH 1 of 5] convert: add support for --includerevs, --excluderevs

Yury Sulsky yury.sulsky at gmail.com
Sat Nov 26 15:26:46 CST 2011


# HG changeset patch
# User Yury Sulsky <yury.sulsky at gmail.com>
# Date 1322282534 18000
# Branch stable
# Node ID 84c30a6c1760a6302f99daf45a992bf59be1a29a
# Parent  8a7f1722b28e50f8e1690870b0caa38e6a6def59
convert: add support for --includerevs, --excluderevs

diff -r 8a7f1722b28e -r 84c30a6c1760 hgext/convert/__init__.py
--- a/hgext/convert/__init__.py	Sat Oct 15 14:31:29 2011 -0500
+++ b/hgext/convert/__init__.py	Fri Nov 25 23:42:14 2011 -0500
@@ -306,6 +306,10 @@
            _('splice synthesized history into place'), _('FILE')),
           ('', 'branchmap', '',
            _('change branch names while converting'), _('FILE')),
+          ('', 'includerevs', '',
+           _('convert only these revisions'), _('FILE')),
+          ('', 'excluderevs', '',
+           _('exclude these revsisions from the conversion'), _('FILE')),
           ('', 'branchsort', None, _('try to sort changesets by branches')),
           ('', 'datesort', None, _('try to sort changesets by date')),
           ('', 'sourcesort', None, _('preserve source changesets order'))],
diff -r 8a7f1722b28e -r 84c30a6c1760 hgext/convert/convcmd.py
--- a/hgext/convert/convcmd.py	Sat Oct 15 14:31:29 2011 -0500
+++ b/hgext/convert/convcmd.py	Fri Nov 25 23:42:14 2011 -0500
@@ -15,7 +15,7 @@
 from gnuarch import gnuarch_source
 from bzr import bzr_source
 from p4 import p4_source
-import filemap
+import filtermap
 
 import os, shutil
 from mercurial import hg, util, encoding
@@ -430,10 +430,15 @@
     if sortmode == 'sourcesort' and not srcc.hasnativeorder():
         raise util.Abort(_('--sourcesort is not supported by this data source'))
 
-    fmap = opts.get('filemap')
-    if fmap:
-        srcc = filemap.filemap_source(ui, srcc, fmap)
-        destc.setfilemapmode(True)
+    inrevs  = opts.get('includerevs')
+    outrevs = opts.get('excluderevs')
+    filemap = opts.get('filemap')
+    if filemap or inrevs or outrevs:
+        srcc = filtermap.filtermap_source(ui, srcc,
+                                          inrevs  = inrevs,
+                                          outrevs = outrevs,
+                                          filemap = filemap)
+        destc.setfilemapmode(filemap is not None)
 
     if not revmapfile:
         try:
diff -r 8a7f1722b28e -r 84c30a6c1760 hgext/convert/filemap.py
--- a/hgext/convert/filemap.py	Sat Oct 15 14:31:29 2011 -0500
+++ b/hgext/convert/filemap.py	Fri Nov 25 23:42:14 2011 -0500
@@ -7,7 +7,6 @@
 import shlex
 from mercurial.i18n import _
 from mercurial import util
-from common import SKIPREV, converter_source
 
 def rpairs(name):
     e = len(name)
@@ -105,276 +104,3 @@
 
     def active(self):
         return bool(self.include or self.exclude or self.rename)
-
-# This class does two additional things compared to a regular source:
-#
-# - Filter and rename files.  This is mostly wrapped by the filemapper
-#   class above. We hide the original filename in the revision that is
-#   returned by getchanges to be able to find things later in getfile.
-#
-# - Return only revisions that matter for the files we're interested in.
-#   This involves rewriting the parents of the original revision to
-#   create a graph that is restricted to those revisions.
-#
-#   This set of revisions includes not only revisions that directly
-#   touch files we're interested in, but also merges that merge two
-#   or more interesting revisions.
-
-class filemap_source(converter_source):
-    def __init__(self, ui, baseconverter, filemap):
-        super(filemap_source, self).__init__(ui)
-        self.base = baseconverter
-        self.filemapper = filemapper(ui, filemap)
-        self.commits = {}
-        # if a revision rev has parent p in the original revision graph, then
-        # rev will have parent self.parentmap[p] in the restricted graph.
-        self.parentmap = {}
-        # self.wantedancestors[rev] is the set of all ancestors of rev that
-        # are in the restricted graph.
-        self.wantedancestors = {}
-        self.convertedorder = None
-        self._rebuilt = False
-        self.origparents = {}
-        self.children = {}
-        self.seenchildren = {}
-
-    def before(self):
-        self.base.before()
-
-    def after(self):
-        self.base.after()
-
-    def setrevmap(self, revmap):
-        # rebuild our state to make things restartable
-        #
-        # To avoid calling getcommit for every revision that has already
-        # been converted, we rebuild only the parentmap, delaying the
-        # rebuild of wantedancestors until we need it (i.e. until a
-        # merge).
-        #
-        # We assume the order argument lists the revisions in
-        # topological order, so that we can infer which revisions were
-        # wanted by previous runs.
-        self._rebuilt = not revmap
-        seen = {SKIPREV: SKIPREV}
-        dummyset = set()
-        converted = []
-        for rev in revmap.order:
-            mapped = revmap[rev]
-            wanted = mapped not in seen
-            if wanted:
-                seen[mapped] = rev
-                self.parentmap[rev] = rev
-            else:
-                self.parentmap[rev] = seen[mapped]
-            self.wantedancestors[rev] = dummyset
-            arg = seen[mapped]
-            if arg == SKIPREV:
-                arg = None
-            converted.append((rev, wanted, arg))
-        self.convertedorder = converted
-        return self.base.setrevmap(revmap)
-
-    def rebuild(self):
-        if self._rebuilt:
-            return True
-        self._rebuilt = True
-        self.parentmap.clear()
-        self.wantedancestors.clear()
-        self.seenchildren.clear()
-        for rev, wanted, arg in self.convertedorder:
-            if rev not in self.origparents:
-                self.origparents[rev] = self.getcommit(rev).parents
-            if arg is not None:
-                self.children[arg] = self.children.get(arg, 0) + 1
-
-        for rev, wanted, arg in self.convertedorder:
-            parents = self.origparents[rev]
-            if wanted:
-                self.mark_wanted(rev, parents)
-            else:
-                self.mark_not_wanted(rev, arg)
-            self._discard(arg, *parents)
-
-        return True
-
-    def getheads(self):
-        return self.base.getheads()
-
-    def getcommit(self, rev):
-        # We want to save a reference to the commit objects to be able
-        # to rewrite their parents later on.
-        c = self.commits[rev] = self.base.getcommit(rev)
-        for p in c.parents:
-            self.children[p] = self.children.get(p, 0) + 1
-        return c
-
-    def _cachedcommit(self, rev):
-        if rev in self.commits:
-            return self.commits[rev]
-        return self.base.getcommit(rev)
-
-    def _discard(self, *revs):
-        for r in revs:
-            if r is None:
-                continue
-            self.seenchildren[r] = self.seenchildren.get(r, 0) + 1
-            if self.seenchildren[r] == self.children[r]:
-                del self.wantedancestors[r]
-                del self.parentmap[r]
-                del self.seenchildren[r]
-                if self._rebuilt:
-                    del self.children[r]
-
-    def wanted(self, rev, i):
-        # Return True if we're directly interested in rev.
-        #
-        # i is an index selecting one of the parents of rev (if rev
-        # has no parents, i is None).  getchangedfiles will give us
-        # the list of files that are different in rev and in the parent
-        # indicated by i.  If we're interested in any of these files,
-        # we're interested in rev.
-        try:
-            files = self.base.getchangedfiles(rev, i)
-        except NotImplementedError:
-            raise util.Abort(_("source repository doesn't support --filemap"))
-        for f in files:
-            if self.filemapper(f):
-                return True
-        return False
-
-    def mark_not_wanted(self, rev, p):
-        # Mark rev as not interesting and update data structures.
-
-        if p is None:
-            # A root revision. Use SKIPREV to indicate that it doesn't
-            # map to any revision in the restricted graph.  Put SKIPREV
-            # in the set of wanted ancestors to simplify code elsewhere
-            self.parentmap[rev] = SKIPREV
-            self.wantedancestors[rev] = set((SKIPREV,))
-            return
-
-        # Reuse the data from our parent.
-        self.parentmap[rev] = self.parentmap[p]
-        self.wantedancestors[rev] = self.wantedancestors[p]
-
-    def mark_wanted(self, rev, parents):
-        # Mark rev ss wanted and update data structures.
-
-        # rev will be in the restricted graph, so children of rev in
-        # the original graph should still have rev as a parent in the
-        # restricted graph.
-        self.parentmap[rev] = rev
-
-        # The set of wanted ancestors of rev is the union of the sets
-        # of wanted ancestors of its parents. Plus rev itself.
-        wrev = set()
-        for p in parents:
-            wrev.update(self.wantedancestors[p])
-        wrev.add(rev)
-        self.wantedancestors[rev] = wrev
-
-    def getchanges(self, rev):
-        parents = self.commits[rev].parents
-        if len(parents) > 1:
-            self.rebuild()
-
-        # To decide whether we're interested in rev we:
-        #
-        # - calculate what parents rev will have if it turns out we're
-        #   interested in it.  If it's going to have more than 1 parent,
-        #   we're interested in it.
-        #
-        # - otherwise, we'll compare it with the single parent we found.
-        #   If any of the files we're interested in is different in the
-        #   the two revisions, we're interested in rev.
-
-        # A parent p is interesting if its mapped version (self.parentmap[p]):
-        # - is not SKIPREV
-        # - is still not in the list of parents (we don't want duplicates)
-        # - is not an ancestor of the mapped versions of the other parents
-        mparents = []
-        wp = None
-        for i, p1 in enumerate(parents):
-            mp1 = self.parentmap[p1]
-            if mp1 == SKIPREV or mp1 in mparents:
-                continue
-            for p2 in parents:
-                if p1 == p2 or mp1 == self.parentmap[p2]:
-                    continue
-                if mp1 in self.wantedancestors[p2]:
-                    break
-            else:
-                mparents.append(mp1)
-                wp = i
-
-        if wp is None and parents:
-            wp = 0
-
-        self.origparents[rev] = parents
-
-        closed = False
-        if 'close' in self.commits[rev].extra:
-            # A branch closing revision is only useful if one of its
-            # parents belong to the branch being closed
-            branch = self.commits[rev].branch
-            pbranches = [self._cachedcommit(p).branch for p in mparents]
-            if branch in pbranches:
-                closed = True
-
-        if len(mparents) < 2 and not closed and not self.wanted(rev, wp):
-            # We don't want this revision.
-            # Update our state and tell the convert process to map this
-            # revision to the same revision its parent as mapped to.
-            p = None
-            if parents:
-                p = parents[wp]
-            self.mark_not_wanted(rev, p)
-            self.convertedorder.append((rev, False, p))
-            self._discard(*parents)
-            return self.parentmap[rev]
-
-        # We want this revision.
-        # Rewrite the parents of the commit object
-        self.commits[rev].parents = mparents
-        self.mark_wanted(rev, parents)
-        self.convertedorder.append((rev, True, None))
-        self._discard(*parents)
-
-        # Get the real changes and do the filtering/mapping. To be
-        # able to get the files later on in getfile, we hide the
-        # original filename in the rev part of the return value.
-        changes, copies = self.base.getchanges(rev)
-        newnames = {}
-        files = []
-        for f, r in changes:
-            newf = self.filemapper(f)
-            if newf:
-                files.append((newf, (f, r)))
-                newnames[f] = newf
-
-        ncopies = {}
-        for c in copies:
-            newc = self.filemapper(c)
-            if newc:
-                newsource = self.filemapper(copies[c])
-                if newsource:
-                    ncopies[newc] = newsource
-
-        return files, ncopies
-
-    def getfile(self, name, rev):
-        realname, realrev = rev
-        return self.base.getfile(realname, realrev)
-
-    def gettags(self):
-        return self.base.gettags()
-
-    def hasnativeorder(self):
-        return self.base.hasnativeorder()
-
-    def lookuprev(self, rev):
-        return self.base.lookuprev(rev)
-
-    def getbookmarks(self):
-        return self.base.getbookmarks()
diff -r 8a7f1722b28e -r 84c30a6c1760 hgext/convert/filtermap.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/convert/filtermap.py	Fri Nov 25 23:42:14 2011 -0500
@@ -0,0 +1,340 @@
+# Copyright 2007 Bryan O'Sullivan <bos at serpentine.com>
+# Copyright 2007 Alexis S. L. Carvalho <alexis at cecm.usp.br>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from mercurial.i18n import _
+from mercurial import util
+from common import SKIPREV, converter_source
+from filemap import filemapper
+
+# This class serves as the base for filtering and mapping revisions
+# between the source and the sink. A subclass should override the
+# "wanted" function to determine if it cares about a certain revision
+# (see "filtermap_source" below).
+#
+# All such revisions, along with merge points or revisions that close
+# branches are transferred to the converter sink.
+
+class filtermap_base(converter_source):
+
+    def wanted(self, rev, i):
+        """Return True if we want to keep this revision"""
+        return True
+
+    def __init__(self, ui, baseconverter):
+        super(filtermap_base, self).__init__(ui)
+        self.base = baseconverter
+        self.commits = {}
+        # if a revision rev has parent p in the original revision graph, then
+        # rev will have parent self.parentmap[p] in the restricted graph.
+        self.parentmap = {}
+        # self.wantedancestors[rev] is the set of all ancestors of rev that
+        # are in the restricted graph.
+        self.wantedancestors = {}
+        self.convertedorder = None
+        self._rebuilt = False
+        self.origparents = {}
+        self.children = {}
+        self.seenchildren = {}
+
+    def before(self):
+        self.base.before()
+
+    def after(self):
+        self.base.after()
+
+    def setrevmap(self, revmap):
+        # rebuild our state to make things restartable
+        #
+        # To avoid calling getcommit for every revision that has already
+        # been converted, we rebuild only the parentmap, delaying the
+        # rebuild of wantedancestors until we need it (i.e. until a
+        # merge).
+        #
+        # We assume the order argument lists the revisions in
+        # topological order, so that we can infer which revisions were
+        # wanted by previous runs.
+        self._rebuilt = not revmap
+        seen = {SKIPREV: SKIPREV}
+        dummyset = set()
+        converted = []
+        for rev in revmap.order:
+            mapped = revmap[rev]
+            wanted = mapped not in seen
+            if wanted:
+                seen[mapped] = rev
+                self.parentmap[rev] = rev
+            else:
+                self.parentmap[rev] = seen[mapped]
+            self.wantedancestors[rev] = dummyset
+            arg = seen[mapped]
+            if arg == SKIPREV:
+                arg = None
+            converted.append((rev, wanted, arg))
+        self.convertedorder = converted
+        return self.base.setrevmap(revmap)
+
+    def rebuild(self):
+        if self._rebuilt:
+            return True
+        self._rebuilt = True
+        self.parentmap.clear()
+        self.wantedancestors.clear()
+        self.seenchildren.clear()
+        for rev, wanted, arg in self.convertedorder:
+            if rev not in self.origparents:
+                self.origparents[rev] = self.getcommit(rev).parents
+            if arg is not None:
+                self.children[arg] = self.children.get(arg, 0) + 1
+
+        for rev, wanted, arg in self.convertedorder:
+            parents = self.origparents[rev]
+            if wanted:
+                self.mark_wanted(rev, parents)
+            else:
+                self.mark_not_wanted(rev, arg)
+            self._discard(arg, *parents)
+
+        return True
+
+    def getheads(self):
+        return self.base.getheads()
+
+    def getcommit(self, rev):
+        # We want to save a reference to the commit objects to be able
+        # to rewrite their parents later on.
+        c = self.commits[rev] = self.base.getcommit(rev)
+        for p in c.parents:
+            self.children[p] = self.children.get(p, 0) + 1
+        return c
+
+    def _cachedcommit(self, rev):
+        if rev in self.commits:
+            return self.commits[rev]
+        return self.base.getcommit(rev)
+
+    def _discard(self, *revs):
+        for r in revs:
+            if r is None:
+                continue
+            self.seenchildren[r] = self.seenchildren.get(r, 0) + 1
+            if self.seenchildren[r] == self.children[r]:
+                del self.wantedancestors[r]
+                del self.parentmap[r]
+                del self.seenchildren[r]
+                if self._rebuilt:
+                    del self.children[r]
+
+    def mark_not_wanted(self, rev, p):
+        # Mark rev as not interesting and update data structures.
+
+        if p is None:
+            # A root revision. Use SKIPREV to indicate that it doesn't
+            # map to any revision in the restricted graph.  Put SKIPREV
+            # in the set of wanted ancestors to simplify code elsewhere
+            self.parentmap[rev] = SKIPREV
+            self.wantedancestors[rev] = set((SKIPREV,))
+            return
+
+        # Reuse the data from our parent.
+        self.parentmap[rev] = self.parentmap[p]
+        self.wantedancestors[rev] = self.wantedancestors[p]
+
+    def mark_wanted(self, rev, parents):
+        # Mark rev ss wanted and update data structures.
+
+        # rev will be in the restricted graph, so children of rev in
+        # the original graph should still have rev as a parent in the
+        # restricted graph.
+        self.parentmap[rev] = rev
+
+        # The set of wanted ancestors of rev is the union of the sets
+        # of wanted ancestors of its parents. Plus rev itself.
+        wrev = set()
+        for p in parents:
+            wrev.update(self.wantedancestors[p])
+        wrev.add(rev)
+        self.wantedancestors[rev] = wrev
+
+    def getchanges(self, rev):
+        parents = self.commits[rev].parents
+        if len(parents) > 1:
+            self.rebuild()
+
+        # To decide whether we're interested in rev we:
+        #
+        # - calculate what parents rev will have if it turns out we're
+        #   interested in it.  If it's going to have more than 1 parent,
+        #   we're interested in it.
+        #
+        # - otherwise, we call the "wanted" function to tell us if we're
+        #   still interested.
+
+        # A parent p is interesting if its mapped version (self.parentmap[p]):
+        # - is not SKIPREV
+        # - is still not in the list of parents (we don't want duplicates)
+        # - is not an ancestor of the mapped versions of the other parents
+        mparents = []
+        wp = None
+        for i, p1 in enumerate(parents):
+            mp1 = self.parentmap[p1]
+            if mp1 == SKIPREV or mp1 in mparents:
+                continue
+            for p2 in parents:
+                if p1 == p2 or mp1 == self.parentmap[p2]:
+                    continue
+                if mp1 in self.wantedancestors[p2]:
+                    break
+            else:
+                mparents.append(mp1)
+                wp = i
+
+        if wp is None and parents:
+            wp = 0
+
+        self.origparents[rev] = parents
+
+        closed = False
+        if 'close' in self.commits[rev].extra:
+            # A branch closing revision is only useful if one of its
+            # parents belong to the branch being closed
+            branch = self.commits[rev].branch
+            pbranches = [self._cachedcommit(p).branch for p in mparents]
+            if branch in pbranches:
+                closed = True
+
+        if len(mparents) < 2 and not closed and not self.wanted(rev, wp):
+            # We don't want this revision.
+            # Update our state and tell the convert process to map this
+            # revision to the same revision its parent as mapped to.
+            p = None
+            if parents:
+                p = parents[wp]
+            self.mark_not_wanted(rev, p)
+            self.convertedorder.append((rev, False, p))
+            self._discard(*parents)
+            return self.parentmap[rev]
+
+        # We want this revision.
+        # Rewrite the parents of the commit object
+        self.commits[rev].parents = mparents
+        self.mark_wanted(rev, parents)
+        self.convertedorder.append((rev, True, None))
+        self._discard(*parents)
+        return self.base.getchanges(rev)
+
+    def getfile(self, name, rev):
+        return self.base.getfile(name, rev)
+
+    def gettags(self):
+        return self.base.gettags()
+
+    def hasnativeorder(self):
+        return self.base.hasnativeorder()
+
+    def lookuprev(self, rev):
+        return self.base.lookuprev(rev)
+
+    def getbookmarks(self):
+        return self.base.getbookmarks()
+
+# This class does two additional things compared to a regular source:
+#
+# - Filter and rename files.  This is mostly wrapped by the filemapper
+#   class above. We hide the original filename in the revision that is
+#   returned by getchanges to be able to find things later in getfile.
+#
+# - Return only revisions that matter for the files we're interested in.
+#   This involves rewriting the parents of the original revision to
+#   create a graph that is restricted to those revisions.
+#
+#   This set of revisions includes not only revisions that directly
+#   touch files we're interested in, but also merges that merge two
+#   or more interesting revisions.
+
+class filtermap_source(filtermap_base):
+    def __init__(self, ui, base,
+                 inrevs  = None,
+                 outrevs = None,
+                 filemap = None,
+                 modify_file = None):
+        super(filtermap_source, self).__init__(ui, base)
+        self.inrevs  = self.read_revset(inrevs)  if inrevs  else None
+        self.outrevs = self.read_revset(outrevs) if outrevs else None
+        self.filemapper  = filemapper(ui, filemap) if filemap else None
+        self.modify_file = modify_file
+
+    def read_revset(self, fname):
+        def parse(line):
+            line = line.strip()
+            ret = self.base.lookuprev(line)
+            if ret is None:
+                self.ui.warn(_("Can't find revision %s." % line))
+            return ret
+        with open(fname, 'r') as fp:
+            return set([parse(line) for line in fp])
+
+    def want_rev(self, rev):
+        if self.inrevs  is not None and rev not in self.inrevs:
+            return False
+        if self.outrevs is not None and rev     in self.outrevs:
+            return False
+        return True
+
+    def want_files(self, rev, i):
+        if self.filemapper is None:
+            return True
+        try:
+            files = self.base.getchangedfiles(rev, i)
+        except NotImplementedError:
+            raise util.Abort(_("source repository doesn't support --filemap"))
+        for f in files:
+            if self.filemapper(f):
+                return True
+        return False
+
+    def wanted(self, rev, i):
+        return self.want_rev(rev) and self.want_files(rev, i)
+
+    def filemap_changes(self, changes):
+        if self.filemapper is None:
+            return changes
+
+        # Get the real changes and do the filtering/mapping. To be
+        # able to get the files later on in getfile, we hide the
+        # original filename in the rev part of the return value.
+
+        files, copies = changes
+
+        nfiles = []
+        for f, r in files:
+            newf = self.filemapper(f)
+            if newf:
+                nfiles.append((newf, (f, r)))
+
+        ncopies = {}
+        for c in copies:
+            newc = self.filemapper(c)
+            if newc:
+                newsource = self.filemapper(copies[c])
+                if newsource:
+                    ncopies[newc] = newsource
+
+        return nfiles, ncopies
+
+    def getchanges(self, rev):
+        changes = super(filtermap_source, self).getchanges(rev)
+        if isinstance(changes, basestring):
+            return changes
+        return self.filemap_changes(changes)
+
+    def getfile(self, name, rev):
+        if self.filemapper is not None:
+            name, rev = rev
+        file = super(filtermap_source, self).getfile(name, rev)
+        if self.modify_file is not None:
+            return self.modify_file(name, file)
+        return file
+


More information about the Mercurial-devel mailing list