[PATCH] convert: better support for CVS branchpoints (issue1447) [revised]

Patrick Mézard pmezard at gmail.com
Mon Jun 8 16:21:18 CDT 2009


Henrik Stuart a écrit :
> # HG changeset patch
> # User Henrik Stuart <hg at hstuart.dk>
> # Date 1244441586 -7200
> # Node ID 4cbf1494ba82f84c37b7a9b5cc4c18ad8f42ac07
> # Parent  6e41d3c5619f3d981f7c444a0f01619cb10932fd
> convert: better support for CVS branchpoints (issue1447)
> 
> This records the branches starting at individual CVS file revisions,
> using the symbolic names map rather than just the branches
> information.  This information is used to generate Mercurial
> changesets. Despite the changes, the CVS conversion still suffers
> heavily from cvsps' deficiencies in generating a correct
> representation of the CVS repository history.
> 
> diff -r 6e41d3c5619f -r 4cbf1494ba82 hgext/convert/cvsps.py
> --- a/hgext/convert/cvsps.py	Sat Jun 06 15:38:03 2009 -0700
> +++ b/hgext/convert/cvsps.py	Mon Jun 08 08:13:06 2009 +0200
> @@ -36,6 +36,7 @@
>          .synthetic - is this a synthetic "file ... added on ..." revision?
>          .mergepoint- the branch that has been merged from
>                       (if present in rlog output)
> +        .branchpoints- the branches that start at the current entry
>      '''
>      def __init__(self, **entries):
>          self.__dict__.update(entries)
> @@ -400,6 +401,19 @@
>              else:
>                  e.branch = None
>  
> +            # find the branches starting from this revision
> +            branchpoints = set()
> +            for branch, revision in branchmap.iteritems():
> +                revparts = tuple([int(i) for i in revision.split('.')])
> +                if revparts[-2] == 0 and revparts[-1] % 2 == 0:

Just curious, is the "revparts[-1] % 2 == 0" necessary ?

> +                    # normal branch
> +                    if revparts[:-2] == e.revision:
> +                        branchpoints.add(branch)
> +                elif revparts == (1,1,1): # vendor branch
> +                    if revparts in e.branches:
> +                        branchpoints.add(branch)
> +            e.branchpoints = branchpoints
> +
>              log.append(e)
>  
>              if len(log) % 100 == 0:
> @@ -453,6 +467,7 @@
>          .synthetic - from synthetic revision "file ... added on branch ..."
>          .mergepoint- the branch that has been merged from
>                       (if present in rlog output)
> +        .branchpoints- the branches that start at the current entry
>      '''
>      def __init__(self, **entries):
>          self.__dict__.update(entries)
> @@ -477,17 +492,34 @@
>      for i, e in enumerate(log):
>  
>          # Check if log entry belongs to the current changeset or not.
> +
> +        # Since CVS is file centric, two different file revisions with
> +        # different branchpoints should be treated as belonging to two
> +        # different changesets (and the ordering is important and not
> +        # honoured by cvsps at this point).
> +        #
> +        # Consider the following case:
> +        # foo 1.1 branchpoints: [MYBRANCH]
> +        # bar 1.1 branchpoints: [MYBRANCH, MYBRANCH2]
> +        #
> +        # Here foo is part only of MYBRANCH, but not MYBRANCH2, e.g. a
> +        # later verison of foo may be in MYBRANCH2, so foo should be the
> +        # first changeset and bar the next and MYBRANCH and MYBRANCH2
> +        # should both start off of the bar changeset. No provisions are
> +        # made to ensure that this is, in fact, what happens.
>          if not (c and
>                    e.comment == c.comment and
>                    e.author == c.author and
>                    e.branch == c.branch and
> +                  e.branchpoints == c.branchpoints and
>                    ((c.date[0] + c.date[1]) <=
>                     (e.date[0] + e.date[1]) <=
>                     (c.date[0] + c.date[1]) + fuzz) and
>                    e.file not in files):
>              c = changeset(comment=e.comment, author=e.author,
>                            branch=e.branch, date=e.date, entries=[],
> -                          mergepoint=getattr(e, 'mergepoint', None))
> +                          mergepoint=getattr(e, 'mergepoint', None),
> +                          branchpoints=getattr(e, 'branchpoints', set()))
>              changesets.append(c)
>              files = set()
>              if len(changesets) % 100 == 0:
> @@ -613,8 +645,23 @@
>          if c.branch in branches:
>              p = branches[c.branch]
>          else:
> -            for f in c.entries:
> -                p = max(p, versions.get((f.rcs, f.parent), None))
> +            # first changeset on a new branch
> +            # the parent is a changeset with the branch in its
> +            # branchpoints such that it is the latest possible
> +            # commit without any intervening, unrelated commits.
> +
> +            candidates = []
> +            for candidate in xrange(i):
> +                if c.branch in changesets[candidate].branchpoints:
> +                    candidates.append(candidate)
> +            if candidates:
> +                candidates = sorted(candidates)
> +                p = candidates[0]
> +                for q in candidates[1:]:
> +                    if p == q - 1:
> +                        p = q
> +                    else:
> +                        break

The sorted() call looks useless, the first loop is already in correct order so we can probably get rid of the candidates list and do everything in it like:

for candidate in xrange(i):
    if c.branch not in changesets[candidate].branchpoints:
        if p is not None:
            break
        continue
    p = candidate

>  
>          c.parents = []
>          if p is not None:
> @@ -753,6 +800,9 @@
>              ui.write('Branch: %s\n' % (cs.branch or 'HEAD'))
>              ui.write('Tag%s: %s \n' % (['', 's'][len(cs.tags)>1],
>                                    ','.join(cs.tags) or '(none)'))
> +            branchpoints = getattr(cs, 'branchpoints', None)
> +            if branchpoints:
> +                ui.write('Branchpoints: %s \n' % ', '.join(branchpoints))
>              if opts["parents"] and cs.parents:
>                  if len(cs.parents)>1:
>                      ui.write('Parents: %s\n' % (','.join([str(p.id) for p in cs.parents])))
> diff -r 6e41d3c5619f -r 4cbf1494ba82 tests/test-convert-cvs-branch
> --- a/tests/test-convert-cvs-branch	Sat Jun 06 15:38:03 2009 -0700
> +++ b/tests/test-convert-cvs-branch	Mon Jun 08 08:13:06 2009 +0200
> @@ -1,6 +1,7 @@
>  #!/bin/sh
>  
>  # This is http://www.selenic.com/mercurial/bts/issue1148
> +#     and http://www.selenic.com/mercurial/bts/issue1447
>  
>  "$TESTDIR/hghave" cvs || exit 80
>  
> @@ -62,3 +63,56 @@
>  echo % Check the result
>  
>  hg -R src-hg glog --template '{rev} ({branches}) {desc} files: {files}\n'
> +
> +echo ""
> +
> +echo % issue 1447
> +cvscall()
> +{
> +    echo cvs -f "$@"
> +    cvs -f "$@"
> +    sleep 1
> +}
> +
> +cvsci()
> +{
> +    echo cvs -f ci "$@"
> +    cvs -f ci "$@" >/dev/null 2>&1
> +    sleep 1
> +}
> +
> +echo "[extensions]" >> $HGRCPATH
> +echo "convert = " >> $HGRCPATH
> +echo "graphlog = " >> $HGRCPATH
> +echo "[convert]" >> $HGRCPATH
> +echo "cvsps=builtin" >> $HGRCPATH

Do you explicitely enable cvsps cache here? Otherwise, these looks a bit unnecessary.

> +cvscall -Q -d `pwd`/cvsmaster2 init >/dev/null 2>&1
> +cd cvsmaster2
> +export CVSROOT=`pwd`
> +mkdir foo
> +cd ..
> +cvscall -Q co -d cvswork2 foo
> +
> +cd cvswork2
> +echo foo > a.txt
> +echo bar > b.txt
> +cvscall -Q add a.txt b.txt
> +cvsci -m "Initial commit"
> +
> +echo foo > b.txt
> +cvsci -m "Fix b on HEAD"
> +
> +echo bar > a.txt
> +cvsci -m "Small fix in a on HEAD"
> +
> +cvscall -Q tag -b BRANCH
> +cvscall -Q up -P -rBRANCH
> +
> +echo baz > b.txt
> +cvsci -m "Change on BRANCH in b"
> +
> +hg debugcvsps -x --parents foo | sed -e 's/Author:.*/Author:/' -e 's/Date:.*/Date:/'
> +
> +cd ..
> +
> diff -r 6e41d3c5619f -r 4cbf1494ba82 tests/test-convert-cvs-branch.out
> --- a/tests/test-convert-cvs-branch.out	Sat Jun 06 15:38:03 2009 -0700
> +++ b/tests/test-convert-cvs-branch.out	Mon Jun 08 08:13:06 2009 +0200
> @@ -51,3 +51,71 @@
>  |/
>  o  0 () Initial revision files: a b
>  
> +
> +% issue 1447
> +cvs -f -Q co -d cvswork2 foo
> +cvs -f -Q add a.txt b.txt
> +cvs -f ci -m Initial commit
> +cvs -f ci -m Fix b on HEAD
> +cvs -f ci -m Small fix in a on HEAD
> +cvs -f -Q tag -b BRANCH
> +cvs -f -Q up -P -rBRANCH
> +cvs -f ci -m Change on BRANCH in b
> +collecting CVS rlog
> +5 log entries
> +creating changesets
> +4 changeset entries
> +---------------------
> +PatchSet 1 
> +Date:
> +Author:
> +Branch: HEAD
> +Tag: (none) 
> +Log:
> +Initial commit
> +
> +Members: 
> +	a.txt:INITIAL->1.1 
> +	b.txt:INITIAL->1.1 
> +
> +---------------------
> +PatchSet 2 
> +Date:
> +Author:
> +Branch: HEAD
> +Tag: (none) 
> +Branchpoints: BRANCH 
> +Parent: 1
> +Log:
> +Fix b on HEAD
> +
> +Members: 
> +	b.txt:1.1->1.2 
> +
> +---------------------
> +PatchSet 3 
> +Date:
> +Author:
> +Branch: HEAD
> +Tag: (none) 
> +Branchpoints: BRANCH 
> +Parent: 2
> +Log:
> +Small fix in a on HEAD
> +
> +Members: 
> +	a.txt:1.1->1.2 
> +
> +---------------------
> +PatchSet 4 
> +Date:
> +Author:
> +Branch: BRANCH
> +Tag: (none) 
> +Parent: 3
> +Log:
> +Change on BRANCH in b
> +
> +Members: 
> +	b.txt:1.2->1.2.2.1 
> +
> diff -r 6e41d3c5619f -r 4cbf1494ba82 tests/test-convert-cvs-builtincvsps
> --- a/tests/test-convert-cvs-builtincvsps	Sat Jun 06 15:38:03 2009 -0700
> +++ b/tests/test-convert-cvs-builtincvsps	Mon Jun 08 08:13:06 2009 +0200
> @@ -94,7 +94,7 @@
>  
>  echo % convert again
>  hg convert src src-hg | sed -e 's/connecting to.*cvsrepo/connecting to cvsrepo/g'
> -hgcat a
> +#hgcat a

Debugging leftover ?

--
Patrick Mézard


More information about the Mercurial-devel mailing list