[PATCH 1 of 8] convert: add the --descmap option

Patrick Mézard pmezard at gmail.com
Tue Nov 29 10:36:17 CST 2011


Le 28/11/11 07:25, Yury Sulsky a écrit :
> # HG changeset patch
> # User Yury Sulsky <yury.sulsky at gmail.com>
> # Date 1322451544 18000
> # Node ID cc4a7bb5d0827631a1c67d195aec514a673b7478
> # Parent  3e13ade423f08031045c2d5a2ef2a87a863a2614
> convert: add the --descmap option

The intent is good, I would prefer to start with some kind of python hook (idem for "preprocess" in the next patches), then possibly add a "preprocess" like external command and finally offer the --descmap possibility for people who do not know python and do not have proper shells (windows folks mostly). We can do all this later, this part of the interface wouldn't change.

> diff -r 3e13ade423f0 -r cc4a7bb5d082 hgext/convert/__init__.py
> --- a/hgext/convert/__init__.py	Mon Nov 21 01:49:20 2011 +0100
> +++ b/hgext/convert/__init__.py	Sun Nov 27 22:39:04 2011 -0500
> @@ -79,6 +79,23 @@
>  
>      Empty lines and lines starting with a ``#`` are ignored.
>  
> +    The descmap option allows you to rewrite commit messages in the
> +    destination repository. The format uses indentation to distinguish
> +    between revisions (specified on a line by themselves) and the
> +    corresponding commit message::
> +
> +      revision
> +
> +        commit message line 1
> +        commit message line 2 ...

OK. I found the format a little hackish at first but it works and is simple to generate. The implementation looks too complicated, simpler rules would help documenting what "The format uses indentation to distinguish between revisions" really mean and would simplify the parser too. What about:

1- If the line starts with a #, we ignore it
2- If the line does not start with a space this is a revision identifier
3- If the line starts with 2 (or 4) spaces this is a comment line
4- Else, syntax error

In particular, we avoid all the business with tabs.

I am not fond of 'DEFAULT' either, we avoid capital letter in general. Maybe "default" or "*"?

> +
> +    If revision is the string ``DEFAULT``, all commits will have the
> +    following description unless another one is specified in the
> +    descmap file.
> +
> +    Lines starting with ``#'' are ignored and the commit messages
> +    are stripped of whitespace at the beginning and at the end.
> +
>      The filemap is a file that allows filtering and remapping of files
>      and directories. Each line can contain one of the following
>      directives::
> @@ -300,6 +317,8 @@
>             _('import up to target revision REV'), _('REV')),
>            ('A', 'authormap', '',
>             _('remap usernames using this file'), _('FILE')),
> +          ('', 'descmap', '',
> +           _('remap commit messages using this file'), _('FILE')),
>            ('', 'filemap', '',
>             _('remap file names using contents of file'), _('FILE')),
>            ('', 'splicemap', '',
> diff -r 3e13ade423f0 -r cc4a7bb5d082 hgext/convert/common.py
> --- a/hgext/convert/common.py	Mon Nov 21 01:49:20 2011 +0100
> +++ b/hgext/convert/common.py	Sun Nov 27 22:39:04 2011 -0500
> @@ -37,6 +37,7 @@
>      pass
>  
>  SKIPREV = 'SKIP'
> +DEFAULT = 'DEFAULT'

Why not in descmapper?

>  class commit(object):
>      def __init__(self, author, date, desc, parents, branch=None, rev=None,
> diff -r 3e13ade423f0 -r cc4a7bb5d082 hgext/convert/convcmd.py
> --- a/hgext/convert/convcmd.py	Mon Nov 21 01:49:20 2011 +0100
> +++ b/hgext/convert/convcmd.py	Sun Nov 27 22:39:04 2011 -0500
> @@ -5,7 +5,7 @@
>  # This software may be used and distributed according to the terms of the
>  # GNU General Public License version 2 or any later version.
>  
> -from common import NoRepo, MissingTool, SKIPREV, mapfile
> +from common import NoRepo, MissingTool, SKIPREV, DEFAULT, mapfile

Is it necessary?

>  from cvs import convert_cvs
>  from darcs import darcs_source
>  from git import convert_git
> @@ -15,6 +15,7 @@
>  from gnuarch import gnuarch_source
>  from bzr import bzr_source
>  from p4 import p4_source
> +from descmap import descmapper
>  import filemap
>  
>  import os, shutil
> @@ -103,6 +104,7 @@
>          self.commitcache = {}
>          self.authors = {}
>          self.authorfile = None
> +        self.descmapper = None
>  
>          # Record converted revisions persistently: maps source revision
>          # ID to target revision ID (both strings).  (This is how
> @@ -118,6 +120,9 @@
>              self.readauthormap(opts.get('authormap'))
>              self.authorfile = self.dest.authorfile()
>  
> +        if opts.get('descmap'):
> +            self.descmapper = descmapper(ui, source, opts.get('descmap'))
> +
>          self.splicemap = mapfile(ui, opts.get('splicemap'))
>          self.branchmap = mapfile(ui, opts.get('branchmap'))
>  
> @@ -295,6 +300,8 @@
>          commit = self.source.getcommit(rev)
>          commit.author = self.authors.get(commit.author, commit.author)
>          commit.branch = self.branchmap.get(commit.branch, commit.branch)
> +        if self.descmapper is not None:
> +            commit.desc = self.descmapper(rev, commit.desc)
>          self.commitcache[rev] = commit
>          return commit
>  
> diff -r 3e13ade423f0 -r cc4a7bb5d082 hgext/convert/descmap.py
> --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
> +++ b/hgext/convert/descmap.py	Sun Nov 27 22:39:04 2011 -0500
> @@ -0,0 +1,68 @@
> +from mercurial.i18n import _
> +from common import DEFAULT

Just define DEFAULT here.

> +
> +def indentation(line):
> +    x = line.expandtabs()
> +    return len(x) - len(x.lstrip())
> +
> +def unindent(line, amt):
> +    x = ((indentation(line) - amt)*' ' + line.lstrip())
> +    if not x or x.isspace():
> +        # Keep the newline
> +        x += '\n'
> +    return x
> +
> +class descmapper(object):
> +    def __init__(self, ui, source, path):
> +        self.descmap = {}
> +        self.default = None
> +
> +        dfile = open(path, 'r')
> +        crev  = None
> +
> +        def pushrev():
> +            if crev is None: return

We pass a line after ":", even for short statements.

> +            rev, lines = crev
> +            indents = [indentation(line) for line in lines
> +                       if line and not line.isspace()]
> +            if indents:
> +                amt = min(indents)
> +                lines = [unindent(line, amt) for line in lines]
> +            desc = ''.join(lines).strip()

No need to clean the description here, this will be done in changelog.add().

> +            if rev == DEFAULT:
> +                self.default = desc

Since we cannot tell "DEFAULT" from a revision named "DEFAULT", this can be put in descmap directly.

> +            else:
> +                self.descmap[rev] = desc

With the whitespace rules described above, pushrev() probably simplifies into one or two lines of code and may be unnecessary.

> +
> +        for line in dfile:
> +
> +            if line.startswith('#'):
> +                continue
> +
> +            sline = line.strip()
> +            isrev = line and not line[0].isspace()
> +            if isrev:
> +                pushrev()
> +                rev = (sline==DEFAULT) and DEFAULT or source.lookuprev(sline)

Spaces in comparisons: "(sline == DEFAULT)"

> +                if rev is None:
> +                    msg = _('Unknown revision in description map %s: %s')

Please keep it lower case

> +                    ui.warn(msg % (path, sline))
> +                    crev = None
> +                else:
> +                    crev = (rev, [])
> +            elif crev is not None:
> +                crev[1].append(line)
> +            elif sline:
> +                msg = _('Ignoring indented line in description map %s: %s\n')
> +                ui.warn(msg % (path, sline))

Shouldn't we fail if any warning have been emitted? And make them errors?

> +
> +        pushrev()
> +        dfile.close()
> +
> +    def __call__(self, rev, curdesc):
> +        ret = self.descmap.get(rev, self.default)
> +        if ret is None:
> +            return curdesc
> +        else:
> +            return ret

or:

           return self.descmap.get(rev, self.default or curdesc)

> +
> diff -r 3e13ade423f0 -r cc4a7bb5d082 tests/test-convert-descmap.t
> --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
> +++ b/tests/test-convert-descmap.t	Sun Nov 27 22:39:04 2011 -0500

Woohoo a test, thanks!

> @@ -0,0 +1,108 @@
> +
> +  $ cat >> $HGRCPATH <<EOF
> +  > [extensions]
> +  > convert=
> +  > EOF
> +
> +Prepare orig repo
> +
> +  $ hg init orig
> +  $ cd orig
> +  $ echo foo > foo
> +  $ hg ci -qAm 'foo'
> +  $ echo bar > bar
> +  $ hg ci -qAm 'bar'
> +  $ cd ..
> +
> +--descmap without DEFAULT
> +
> +  $ cat > descmap.txt <<EOF
> +  > 1
> +  > 
> +  > # comment 1
> +  >   a b c
> +  > # no blank line
> +  > 	foo bar
> +  > 
> +  >   # comment in description
> +  > 
> +  > EOF
> +  $ hg convert --descmap descmap.txt orig new
> +  initializing destination new repository
> +  scanning source...
> +  sorting...
> +  converting...
> +  1 foo
> +  0 a b c
> +
> +  $ hg log --debug new

--verbose is enough to display the message and would bring less clutter.

> +  changeset:   1:e8cf4d710178cb247f793e8dca54b495e29ad233
> +  tag:         tip
> +  parent:      0:e63c23eaa88ae77967edcf4ea194d31167c478b0
> +  parent:      -1:0000000000000000000000000000000000000000
> +  manifest:    1:1828a3314f79aad3bdcdbea29512835b8d9ed1a7
> +  user:        test
> +  date:        Thu Jan 01 00:00:00 1970 +0000
> +  files+:      bar
> +  extra:       branch=default
> +  description:
> +  a b c
> +        foo bar
> +  
> +  # comment in description
> +  
> +  
> +  changeset:   0:e63c23eaa88ae77967edcf4ea194d31167c478b0
> +  parent:      -1:0000000000000000000000000000000000000000
> +  parent:      -1:0000000000000000000000000000000000000000
> +  manifest:    0:9091aa5df980aea60860a2e39c95182e68d1ddec
> +  user:        test
> +  date:        Thu Jan 01 00:00:00 1970 +0000
> +  files+:      foo
> +  extra:       branch=default
> +  description:
> +  foo
> +  
> +  
> +--filemap with DEFAULT  
> +
> +  $ cat >> descmap.txt <<EOF
> +  > DEFAULT
> +  >   the default comment
> +  > EOF
> +  $ hg convert --descmap descmap.txt orig new2
> +  initializing destination new2 repository
> +  scanning source...
> +  sorting...
> +  converting...
> +  1 the default comment
> +  0 a b c
> +  $ hg log --debug new2
> +  changeset:   1:adcea90a81171d7a38bd9eb131ea6e82227b2fbd
> +  tag:         tip
> +  parent:      0:b054069f11efab0cbca49f9d3e90a50b38362512
> +  parent:      -1:0000000000000000000000000000000000000000
> +  manifest:    1:1828a3314f79aad3bdcdbea29512835b8d9ed1a7
> +  user:        test
> +  date:        Thu Jan 01 00:00:00 1970 +0000
> +  files+:      bar
> +  extra:       branch=default
> +  description:
> +  a b c
> +        foo bar
> +  
> +  # comment in description
> +  
> +  
> +  changeset:   0:b054069f11efab0cbca49f9d3e90a50b38362512
> +  parent:      -1:0000000000000000000000000000000000000000
> +  parent:      -1:0000000000000000000000000000000000000000
> +  manifest:    0:9091aa5df980aea60860a2e39c95182e68d1ddec
> +  user:        test
> +  date:        Thu Jan 01 00:00:00 1970 +0000
> +  files+:      foo
> +  extra:       branch=default
> +  description:
> +  the default comment
> +  
> +  
> diff -r 3e13ade423f0 -r cc4a7bb5d082 tests/test-convert.t
> --- a/tests/test-convert.t	Mon Nov 21 01:49:20 2011 +0100
> +++ b/tests/test-convert.t	Sun Nov 27 22:39:04 2011 -0500
> @@ -69,6 +69,22 @@
>    
>        Empty lines and lines starting with a "#" are ignored.
>    
> +      The descmap option allows you to rewrite commit messages in the
> +      destination repository. The format uses indentation to distinguish between
> +      revisions (specified on a line by themselves) and the corresponding commit
> +      message:
> +  
> +        revision
> +  
> +          commit message line 1
> +          commit message line 2 ...
> +  
> +      If revision is the string "DEFAULT", all commits will have the following
> +      description unless another one is specified in the descmap file.
> +  
> +      Lines starting with "#'' are ignored and the commit messages are stripped
> +      of whitespace at the beginning and at the end.
> +  
>        The filemap is a file that allows filtering and remapping of files and
>        directories. Each line can contain one of the following directives:
>    
> @@ -254,6 +270,7 @@
>     -d --dest-type TYPE   destination repository type
>     -r --rev REV          import up to target revision REV
>     -A --authormap FILE   remap usernames using this file
> +      --descmap FILE     remap commit messages using this file
>        --filemap FILE     remap file names using contents of file
>        --splicemap FILE   splice synthesized history into place
>        --branchmap FILE   change branch names while converting
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
> 



More information about the Mercurial-devel mailing list