[PATCH] convert: better support for CVS branchpoints (issue1447) [revised]

Henrik Stuart hg at hstuart.dk
Mon Jun 8 01:29:53 CDT 2009


# HG changeset patch
# User Henrik Stuart <hg at hstuart.dk>
# Date 1244441586 -7200
# Node ID 4cbf1494ba82f84c37b7a9b5cc4c18ad8f42ac07
# Parent  6e41d3c5619f3d981f7c444a0f01619cb10932fd
convert: better support for CVS branchpoints (issue1447)

This records the branches starting at individual CVS file revisions,
using the symbolic names map rather than just the branches
information.  This information is used to generate Mercurial
changesets. Despite the changes, the CVS conversion still suffers
heavily from cvsps' deficiencies in generating a correct
representation of the CVS repository history.

diff -r 6e41d3c5619f -r 4cbf1494ba82 hgext/convert/cvsps.py
--- a/hgext/convert/cvsps.py	Sat Jun 06 15:38:03 2009 -0700
+++ b/hgext/convert/cvsps.py	Mon Jun 08 08:13:06 2009 +0200
@@ -36,6 +36,7 @@
         .synthetic - is this a synthetic "file ... added on ..." revision?
         .mergepoint- the branch that has been merged from
                      (if present in rlog output)
+        .branchpoints- the branches that start at the current entry
     '''
     def __init__(self, **entries):
         self.__dict__.update(entries)
@@ -400,6 +401,19 @@
             else:
                 e.branch = None
 
+            # find the branches starting from this revision
+            branchpoints = set()
+            for branch, revision in branchmap.iteritems():
+                revparts = tuple([int(i) for i in revision.split('.')])
+                if revparts[-2] == 0 and revparts[-1] % 2 == 0:
+                    # normal branch
+                    if revparts[:-2] == e.revision:
+                        branchpoints.add(branch)
+                elif revparts == (1,1,1): # vendor branch
+                    if revparts in e.branches:
+                        branchpoints.add(branch)
+            e.branchpoints = branchpoints
+
             log.append(e)
 
             if len(log) % 100 == 0:
@@ -453,6 +467,7 @@
         .synthetic - from synthetic revision "file ... added on branch ..."
         .mergepoint- the branch that has been merged from
                      (if present in rlog output)
+        .branchpoints- the branches that start at the current entry
     '''
     def __init__(self, **entries):
         self.__dict__.update(entries)
@@ -477,17 +492,34 @@
     for i, e in enumerate(log):
 
         # Check if log entry belongs to the current changeset or not.
+
+        # Since CVS is file centric, two different file revisions with
+        # different branchpoints should be treated as belonging to two
+        # different changesets (and the ordering is important and not
+        # honoured by cvsps at this point).
+        #
+        # Consider the following case:
+        # foo 1.1 branchpoints: [MYBRANCH]
+        # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
+        #
+        # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
+        # later verison of foo may be in MYBRANCH2, so foo should be the
+        # first changeset and bar the next and MYBRANCH and MYBRANCH2
+        # should both start off of the bar changeset. No provisions are
+        # made to ensure that this is, in fact, what happens.
         if not (c and
                   e.comment == c.comment and
                   e.author == c.author and
                   e.branch == c.branch and
+                  e.branchpoints == c.branchpoints and
                   ((c.date[0] + c.date[1]) <=
                    (e.date[0] + e.date[1]) <=
                    (c.date[0] + c.date[1]) + fuzz) and
                   e.file not in files):
             c = changeset(comment=e.comment, author=e.author,
                           branch=e.branch, date=e.date, entries=[],
-                          mergepoint=getattr(e, 'mergepoint', None))
+                          mergepoint=getattr(e, 'mergepoint', None),
+                          branchpoints=getattr(e, 'branchpoints', set()))
             changesets.append(c)
             files = set()
             if len(changesets) % 100 == 0:
@@ -613,8 +645,23 @@
         if c.branch in branches:
             p = branches[c.branch]
         else:
-            for f in c.entries:
-                p = max(p, versions.get((f.rcs, f.parent), None))
+            # first changeset on a new branch
+            # the parent is a changeset with the branch in its
+            # branchpoints such that it is the latest possible
+            # commit without any intervening, unrelated commits.
+
+            candidates = []
+            for candidate in xrange(i):
+                if c.branch in changesets[candidate].branchpoints:
+                    candidates.append(candidate)
+            if candidates:
+                candidates = sorted(candidates)
+                p = candidates[0]
+                for q in candidates[1:]:
+                    if p == q - 1:
+                        p = q
+                    else:
+                        break
 
         c.parents = []
         if p is not None:
@@ -753,6 +800,9 @@
             ui.write('Branch: %s\n' % (cs.branch or 'HEAD'))
             ui.write('Tag%s: %s \n' % (['', 's'][len(cs.tags)>1],
                                   ','.join(cs.tags) or '(none)'))
+            branchpoints = getattr(cs, 'branchpoints', None)
+            if branchpoints:
+                ui.write('Branchpoints: %s \n' % ', '.join(branchpoints))
             if opts["parents"] and cs.parents:
                 if len(cs.parents)>1:
                     ui.write('Parents: %s\n' % (','.join([str(p.id) for p in cs.parents])))
diff -r 6e41d3c5619f -r 4cbf1494ba82 tests/test-convert-cvs-branch
--- a/tests/test-convert-cvs-branch	Sat Jun 06 15:38:03 2009 -0700
+++ b/tests/test-convert-cvs-branch	Mon Jun 08 08:13:06 2009 +0200
@@ -1,6 +1,7 @@
 #!/bin/sh
 
 # This is http://www.selenic.com/mercurial/bts/issue1148
+#     and http://www.selenic.com/mercurial/bts/issue1447
 
 "$TESTDIR/hghave" cvs || exit 80
 
@@ -62,3 +63,56 @@
 echo % Check the result
 
 hg -R src-hg glog --template '{rev} ({branches}) {desc} files: {files}\n'
+
+echo ""
+
+echo % issue 1447
+cvscall()
+{
+    echo cvs -f "$@"
+    cvs -f "$@"
+    sleep 1
+}
+
+cvsci()
+{
+    echo cvs -f ci "$@"
+    cvs -f ci "$@" >/dev/null 2>&1
+    sleep 1
+}
+
+echo "[extensions]" >> $HGRCPATH
+echo "convert = " >> $HGRCPATH
+echo "graphlog = " >> $HGRCPATH
+echo "[convert]" >> $HGRCPATH
+echo "cvsps=builtin" >> $HGRCPATH
+
+cvscall -Q -d `pwd`/cvsmaster2 init >/dev/null 2>&1
+cd cvsmaster2
+export CVSROOT=`pwd`
+mkdir foo
+cd ..
+cvscall -Q co -d cvswork2 foo
+
+cd cvswork2
+echo foo > a.txt
+echo bar > b.txt
+cvscall -Q add a.txt b.txt
+cvsci -m "Initial commit"
+
+echo foo > b.txt
+cvsci -m "Fix b on HEAD"
+
+echo bar > a.txt
+cvsci -m "Small fix in a on HEAD"
+
+cvscall -Q tag -b BRANCH
+cvscall -Q up -P -rBRANCH
+
+echo baz > b.txt
+cvsci -m "Change on BRANCH in b"
+
+hg debugcvsps -x --parents foo | sed -e 's/Author:.*/Author:/' -e 's/Date:.*/Date:/'
+
+cd ..
+
diff -r 6e41d3c5619f -r 4cbf1494ba82 tests/test-convert-cvs-branch.out
--- a/tests/test-convert-cvs-branch.out	Sat Jun 06 15:38:03 2009 -0700
+++ b/tests/test-convert-cvs-branch.out	Mon Jun 08 08:13:06 2009 +0200
@@ -51,3 +51,71 @@
 |/
 o  0 () Initial revision files: a b
 
+
+% issue 1447
+cvs -f -Q co -d cvswork2 foo
+cvs -f -Q add a.txt b.txt
+cvs -f ci -m Initial commit
+cvs -f ci -m Fix b on HEAD
+cvs -f ci -m Small fix in a on HEAD
+cvs -f -Q tag -b BRANCH
+cvs -f -Q up -P -rBRANCH
+cvs -f ci -m Change on BRANCH in b
+collecting CVS rlog
+5 log entries
+creating changesets
+4 changeset entries
+---------------------
+PatchSet 1 
+Date:
+Author:
+Branch: HEAD
+Tag: (none) 
+Log:
+Initial commit
+
+Members: 
+	a.txt:INITIAL->1.1 
+	b.txt:INITIAL->1.1 
+
+---------------------
+PatchSet 2 
+Date:
+Author:
+Branch: HEAD
+Tag: (none) 
+Branchpoints: BRANCH 
+Parent: 1
+Log:
+Fix b on HEAD
+
+Members: 
+	b.txt:1.1->1.2 
+
+---------------------
+PatchSet 3 
+Date:
+Author:
+Branch: HEAD
+Tag: (none) 
+Branchpoints: BRANCH 
+Parent: 2
+Log:
+Small fix in a on HEAD
+
+Members: 
+	a.txt:1.1->1.2 
+
+---------------------
+PatchSet 4 
+Date:
+Author:
+Branch: BRANCH
+Tag: (none) 
+Parent: 3
+Log:
+Change on BRANCH in b
+
+Members: 
+	b.txt:1.2->1.2.2.1 
+
diff -r 6e41d3c5619f -r 4cbf1494ba82 tests/test-convert-cvs-builtincvsps
--- a/tests/test-convert-cvs-builtincvsps	Sat Jun 06 15:38:03 2009 -0700
+++ b/tests/test-convert-cvs-builtincvsps	Mon Jun 08 08:13:06 2009 +0200
@@ -94,7 +94,7 @@
 
 echo % convert again
 hg convert src src-hg | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
-hgcat a
+#hgcat a
 hgcat b/c
 
 echo % convert again with --filemap
diff -r 6e41d3c5619f -r 4cbf1494ba82 tests/test-convert-cvs-builtincvsps.out
--- a/tests/test-convert-cvs-builtincvsps.out	Sat Jun 06 15:38:03 2009 -0700
+++ b/tests/test-convert-cvs-builtincvsps.out	Mon Jun 08 08:13:06 2009 +0200
@@ -104,7 +104,6 @@
 sorting...
 converting...
 0 ci2
-a
 c
 d
 % convert again with --filemap
@@ -142,13 +141,13 @@
 |  ----------------------------
 |  log message files: a
 o  5 (branch) ci2 files: b/c
+
+o  4 () ci1 files: a b/c
 |
-| o  4 () ci1 files: a b/c
-| |
-| o  3 () update tags files: .hgtags
-| |
-| o  2 () ci0 files: b/c
-|/
+o  3 () update tags files: .hgtags
+|
+o  2 () ci0 files: b/c
+|
 | o  1 (INITIAL) import files:
 |/
 o  0 () Initial revision files: a b/c
@@ -157,22 +156,35 @@
 collecting CVS rlog
 9 log entries
 creating changesets
-6 changeset entries
+8 changeset entries
 ---------------------
 PatchSet 1 
 Date:
 Author:
 Branch: HEAD
 Tag: (none) 
+Branchpoints: INITIAL 
 Log:
 Initial revision
 
 Members: 
 	a:INITIAL->1.1 
+
+---------------------
+PatchSet 2 
+Date:
+Author:
+Branch: HEAD
+Tag: (none) 
+Branchpoints: INITIAL, branch 
+Log:
+Initial revision
+
+Members: 
 	b/c:INITIAL->1.1 
 
 ---------------------
-PatchSet 2 
+PatchSet 3 
 Date:
 Author:
 Branch: INITIAL
@@ -185,7 +197,7 @@
 	b/c:1.1->1.1.1.1 
 
 ---------------------
-PatchSet 3 
+PatchSet 4 
 Date:
 Author:
 Branch: HEAD
@@ -197,7 +209,20 @@
 	b/c:1.1->1.2 
 
 ---------------------
-PatchSet 4 
+PatchSet 5 
+Date:
+Author:
+Branch: HEAD
+Tag: (none) 
+Branchpoints: branch 
+Log:
+ci1
+
+Members: 
+	a:1.1->1.2 
+
+---------------------
+PatchSet 6 
 Date:
 Author:
 Branch: HEAD
@@ -206,11 +231,10 @@
 ci1
 
 Members: 
-	a:1.1->1.2 
 	b/c:1.2->1.3 
 
 ---------------------
-PatchSet 5 
+PatchSet 7 
 Date:
 Author:
 Branch: branch
@@ -222,7 +246,7 @@
 	b/c:1.1->1.1.2.1 
 
 ---------------------
-PatchSet 6 
+PatchSet 8 
 Date:
 Author:
 Branch: branch


More information about the Mercurial-devel mailing list