[PATCH] add `hg guessrenames` extension

Peter Arrenbrecht peter.arrenbrecht at gmail.com
Thu Jun 12 08:13:06 CDT 2008


The background for this extension is that I very often either forget
to specify -s to `hg addrem` entirely, or only discover too late that
the similarity level wasn't quite right. (Yes, I know about -n, but I
usually forget it too.)

Then I try to revert and that is a mess. And recently I lost work
because of such a revert --no-backup (yes, silly). So this extension
changes the workflow such that guessing renames is a separate, easily
repeatable operation.

-parren

On Thu, Jun 12, 2008 at 3:09 PM, Peter Arrenbrecht
<peter.arrenbrecht at gmail.com> wrote:
> # HG changeset patch
> # User Peter Arrenbrecht <peter.arrenbrecht at gmail.com>
> # Date 1213276092 -7200
> # Node ID 1bec06a715c5d22045cd235d1d1e47d451bfd8df
> # Parent  549bd6d7da7f0b7b97e47b659868951c60e677ee
> add `hg guessrenames` extension
>
> Does a job similar to `hg addrem -s`, but on files already in the dirstate.
> Also has an option `--all` to first forget about all recorded copies and
> start afresh. Unlike `hg addrem -s`, this command can easily be rerun
> to fix earlier mistakes (for example, trying a different similarity).
>
> diff --git a/hgext/guessrenames.py b/hgext/guessrenames.py
> new file mode 100644
> --- /dev/null
> +++ b/hgext/guessrenames.py
> @@ -0,0 +1,97 @@
> +# Mercurial extension to provide the 'hg guessrenames' command
> +#
> +# Copyright 2008 by Peter Arrenbrecht <peter.arrenbrecht at gmail.com>
> +# Author(s):
> +# Peter Arrenbrecht <peter.arrenbrecht at gmail.com>
> +#
> +# This software may be used and distributed according to the terms
> +# of the GNU General Public License, incorporated herein by reference.
> +
> +from mercurial import cmdutil, commands
> +from mercurial.i18n import _
> +
> +
> +def guessrenames(ui, repo, *pats, **opts):
> +    """guess renamed files by similarity
> +
> +    Compares every removed file with every added file and records those
> +    similar enough as renames. The -s option takes a percentage between
> +    1 (disabled) and 100 (files must be identical) as its parameter.
> +    Detecting renamed files this way can be expensive.
> +
> +    The command normally does not compare files already recorded as a
> +    copy source or target. With the --all option, first forgets about
> +    all existing recorded copies and starts afresh.
> +
> +    This is similar to `hg addremove -s`, but operates on files already
> +    added/removed, rather than untracked/missing files. It can be used
> +    when, for example, you have forgotten -s for addrem.
> +    """
> +    try:
> +        sim = float(opts.get('similarity') or 90)
> +    except ValueError:
> +        raise util.Abort(_('similarity must be a number'))
> +    if sim < 1 or sim > 100:
> +        raise util.Abort(_('similarity must be between 1 and 100'))
> +    sim = sim / 100.0
> +    all = opts.get('all') or False
> +    dryrun = opts.get('dry_run') or False
> +
> +    ds = repo.dirstate
> +    srcs, tgts, copysrcs = [], [], []
> +    mapping = {}
> +    m = cmdutil.match(repo, pats, opts)
> +    for abs in repo.walk(m):
> +        state = ds[abs]
> +        target = m.rel(abs)
> +        if state == 'a':
> +            copied = ds.copied(abs)
> +            if copied:
> +                if all:
> +                    repo.ui.status(_('forgetting %s -> %s\n') % (copied, abs))
> +                    if not dryrun:
> +                        ds.forget(abs)
> +                        ds.add(abs)
> +                    copied = None
> +                else:
> +                    copysrcs.append(copied)
> +            if not copied:
> +                tgts.append(abs)
> +                mapping[abs] = m.rel(abs), m.exact(abs)
> +        elif state == 'r':
> +            srcs.append(abs)
> +            mapping[abs] = m.rel(abs), m.exact(abs)
> +
> +    if not all:
> +        srcs = [src for src in srcs if not src in copysrcs]
> +
> +    if not srcs or not tgts:
> +        repo.ui.status(_('no added/removed files to match\n'))
> +        return
> +
> +    if repo.ui.verbose:
> +        repo.ui.status(_('potential sources (removed files):\n'))
> +        for f in srcs:
> +            repo.ui.status(_('  %s\n') % f)
> +        repo.ui.status(_('potential targets (added files):\n'))
> +        for f in tgts:
> +            repo.ui.status(_('  %s\n') % f)
> +
> +    for old, new, score in cmdutil.findrenames(repo, tgts, srcs, sim):
> +        oldrel, oldexact = mapping[old]
> +        newrel, newexact = mapping[new]
> +        repo.ui.status(_('recording %s -> %s (%d%% similar)\n') %
> +                       (oldrel, newrel, score * 100))
> +        if not dryrun:
> +            repo.copy(old, new)
> +
> +
> +cmdtable = {
> +    "guessrenames":
> +        (guessrenames,
> +         [('s', 'similarity', '90',
> +           _('similarity needed for assuming rename (1<=s<=100)')),
> +          ('a', 'all', None, _('forget all recorded renames/copies and start over')),
> +         ] + commands.walkopts + commands.dryrunopts,
> +         _('hg guessrenames [OPTION]... [FILE]...')),
> +}
> diff --git a/tests/test-guessrenames b/tests/test-guessrenames
> new file mode 100755
> --- /dev/null
> +++ b/tests/test-guessrenames
> @@ -0,0 +1,52 @@
> +#!/bin/sh
> +
> +dohg() {
> +       echo "#------------ hg $@"
> +       hg $@
> +}
> +
> +echo "[extensions]" >> $HGRCPATH
> +echo "hgext.guessrenames=" >> $HGRCPATH
> +
> +dohg init repo
> +cd repo
> +
> +python -c 'for x in range(50): print x' > one
> +python -c 'for x in range(70): print x' > two
> +cp two three
> +echo "Something" >four
> +dohg ci -Am1
> +
> +mv two twoA
> +echo "And then some" >>twoA
> +rm four
> +echo "Some totally other thing" >five
> +dohg addrem
> +dohg mv one oneA
> +cp twoA untracked
> +dohg stat -C
> +
> +dohg guessrenames -vn
> +dohg stat -C
> +
> +dohg guessrenames -vna
> +dohg stat -C
> +
> +dohg guessrenames -v
> +dohg stat -C
> +
> +dohg guessrenames -va
> +dohg stat -C
> +
> +mkdir a
> +mkdir b
> +echo one >a/one
> +echo two >b/two
> +dohg ci -Amdirs a b
> +mv a/one a/oneA
> +mv b/two b/twoA
> +dohg addrem a b
> +dohg guessrenames -vn
> +dohg guessrenames -v a
> +dohg guessrenames -v b
> +
> diff --git a/tests/test-guessrenames.out b/tests/test-guessrenames.out
> new file mode 100644
> --- /dev/null
> +++ b/tests/test-guessrenames.out
> @@ -0,0 +1,131 @@
> +#------------ hg init repo
> +#------------ hg ci -Am1
> +adding four
> +adding one
> +adding three
> +adding two
> +#------------ hg addrem
> +adding five
> +adding twoA
> +removing four
> +removing two
> +#------------ hg mv one oneA
> +#------------ hg stat -C
> +A five
> +A oneA
> +  one
> +A twoA
> +R four
> +R one
> +R two
> +? untracked
> +#------------ hg guessrenames -vn
> +potential sources (removed files):
> +  four
> +  two
> +potential targets (added files):
> +  five
> +  twoA
> +recording two -> twoA (96% similar)
> +#------------ hg stat -C
> +A five
> +A oneA
> +  one
> +A twoA
> +R four
> +R one
> +R two
> +? untracked
> +#------------ hg guessrenames -vna
> +forgetting one -> oneA
> +potential sources (removed files):
> +  four
> +  one
> +  two
> +potential targets (added files):
> +  five
> +  oneA
> +  twoA
> +recording one -> oneA (100% similar)
> +recording two -> twoA (96% similar)
> +#------------ hg stat -C
> +A five
> +A oneA
> +  one
> +A twoA
> +R four
> +R one
> +R two
> +? untracked
> +#------------ hg guessrenames -v
> +potential sources (removed files):
> +  four
> +  two
> +potential targets (added files):
> +  five
> +  twoA
> +recording two -> twoA (96% similar)
> +#------------ hg stat -C
> +A five
> +A oneA
> +  one
> +A twoA
> +  two
> +R four
> +R one
> +R two
> +? untracked
> +#------------ hg guessrenames -va
> +forgetting one -> oneA
> +forgetting two -> twoA
> +potential sources (removed files):
> +  four
> +  one
> +  two
> +potential targets (added files):
> +  five
> +  oneA
> +  twoA
> +recording one -> oneA (100% similar)
> +recording two -> twoA (96% similar)
> +#------------ hg stat -C
> +A five
> +A oneA
> +  one
> +A twoA
> +  two
> +R four
> +R one
> +R two
> +? untracked
> +#------------ hg ci -Amdirs a b
> +adding a/one
> +adding b/two
> +#------------ hg addrem a b
> +adding a/oneA
> +adding b/twoA
> +removing a/one
> +removing b/two
> +#------------ hg guessrenames -vn
> +potential sources (removed files):
> +  a/one
> +  b/two
> +  four
> +potential targets (added files):
> +  a/oneA
> +  b/twoA
> +  five
> +recording a/one -> a/oneA (100% similar)
> +recording b/two -> b/twoA (100% similar)
> +#------------ hg guessrenames -v a
> +potential sources (removed files):
> +  a/one
> +potential targets (added files):
> +  a/oneA
> +recording a/one -> a/oneA (100% similar)
> +#------------ hg guessrenames -v b
> +potential sources (removed files):
> +  b/two
> +potential targets (added files):
> +  b/twoA
> +recording b/two -> b/twoA (100% similar)
>


More information about the Mercurial-devel mailing list