[PATCH] add `hg guessrenames` extension

Peter Arrenbrecht peter.arrenbrecht at gmail.com
Thu Jun 12 13:09:40 UTC 2008


# HG changeset patch
# User Peter Arrenbrecht <peter.arrenbrecht at gmail.com>
# Date 1213276092 -7200
# Node ID 1bec06a715c5d22045cd235d1d1e47d451bfd8df
# Parent  549bd6d7da7f0b7b97e47b659868951c60e677ee
add `hg guessrenames` extension

Does a job similar to `hg addrem -s`, but on files already in the dirstate.
Also has an option `--all` to first forget about all recorded copies and
start afresh. Unlike `hg addrem -s`, this command can easily be rerun
to fix earlier mistakes (for example, trying a different similarity).

diff --git a/hgext/guessrenames.py b/hgext/guessrenames.py
new file mode 100644
--- /dev/null
+++ b/hgext/guessrenames.py
@@ -0,0 +1,97 @@
+# Mercurial extension to provide the 'hg guessrenames' command
+#
+# Copyright 2008 by Peter Arrenbrecht <peter.arrenbrecht at gmail.com>
+# Author(s):
+# Peter Arrenbrecht <peter.arrenbrecht at gmail.com>
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+from mercurial import cmdutil, commands
+from mercurial.i18n import _
+
+
+def guessrenames(ui, repo, *pats, **opts):
+    """guess renamed files by similarity
+
+    Compares every removed file with every added file and records those
+    similar enough as renames. The -s option takes a percentage between
+    1 (disabled) and 100 (files must be identical) as its parameter.
+    Detecting renamed files this way can be expensive.
+
+    The command normally does not compare files already recorded as a
+    copy source or target. With the --all option, first forgets about
+    all existing recorded copies and starts afresh.
+
+    This is similar to `hg addremove -s`, but operates on files already
+    added/removed, rather than untracked/missing files. It can be used
+    when, for example, you have forgotten -s for addrem.
+    """
+    try:
+        sim = float(opts.get('similarity') or 90)
+    except ValueError:
+        raise util.Abort(_('similarity must be a number'))
+    if sim < 1 or sim > 100:
+        raise util.Abort(_('similarity must be between 1 and 100'))
+    sim = sim / 100.0
+    all = opts.get('all') or False
+    dryrun = opts.get('dry_run') or False
+
+    ds = repo.dirstate
+    srcs, tgts, copysrcs = [], [], []
+    mapping = {}
+    m = cmdutil.match(repo, pats, opts)
+    for abs in repo.walk(m):
+        state = ds[abs]
+        target = m.rel(abs)
+        if state == 'a':
+            copied = ds.copied(abs)
+            if copied:
+                if all:
+                    repo.ui.status(_('forgetting %s -> %s\n') % (copied, abs))
+                    if not dryrun:
+                        ds.forget(abs)
+                        ds.add(abs)
+                    copied = None
+                else:
+                    copysrcs.append(copied)
+            if not copied:
+                tgts.append(abs)
+                mapping[abs] = m.rel(abs), m.exact(abs)
+        elif state == 'r':
+            srcs.append(abs)
+            mapping[abs] = m.rel(abs), m.exact(abs)
+
+    if not all:
+        srcs = [src for src in srcs if not src in copysrcs]
+
+    if not srcs or not tgts:
+        repo.ui.status(_('no added/removed files to match\n'))
+        return
+
+    if repo.ui.verbose:
+        repo.ui.status(_('potential sources (removed files):\n'))
+        for f in srcs:
+            repo.ui.status(_('  %s\n') % f)
+        repo.ui.status(_('potential targets (added files):\n'))
+        for f in tgts:
+            repo.ui.status(_('  %s\n') % f)
+
+    for old, new, score in cmdutil.findrenames(repo, tgts, srcs, sim):
+        oldrel, oldexact = mapping[old]
+        newrel, newexact = mapping[new]
+        repo.ui.status(_('recording %s -> %s (%d%% similar)\n') %
+                       (oldrel, newrel, score * 100))
+        if not dryrun:
+            repo.copy(old, new)
+
+
+cmdtable = {
+    "guessrenames":
+        (guessrenames,
+         [('s', 'similarity', '90',
+           _('similarity needed for assuming rename (1<=s<=100)')),
+          ('a', 'all', None, _('forget all recorded renames/copies and start over')),
+         ] + commands.walkopts + commands.dryrunopts,
+         _('hg guessrenames [OPTION]... [FILE]...')),
+}
diff --git a/tests/test-guessrenames b/tests/test-guessrenames
new file mode 100755
--- /dev/null
+++ b/tests/test-guessrenames
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+dohg() {
+	echo "#------------ hg $@"
+	hg $@
+}
+
+echo "[extensions]" >> $HGRCPATH
+echo "hgext.guessrenames=" >> $HGRCPATH
+
+dohg init repo
+cd repo
+
+python -c 'for x in range(50): print x' > one
+python -c 'for x in range(70): print x' > two
+cp two three
+echo "Something" >four
+dohg ci -Am1
+
+mv two twoA
+echo "And then some" >>twoA
+rm four
+echo "Some totally other thing" >five
+dohg addrem
+dohg mv one oneA
+cp twoA untracked
+dohg stat -C
+
+dohg guessrenames -vn
+dohg stat -C
+
+dohg guessrenames -vna
+dohg stat -C
+
+dohg guessrenames -v
+dohg stat -C
+
+dohg guessrenames -va
+dohg stat -C
+
+mkdir a
+mkdir b
+echo one >a/one
+echo two >b/two
+dohg ci -Amdirs a b
+mv a/one a/oneA
+mv b/two b/twoA
+dohg addrem a b
+dohg guessrenames -vn
+dohg guessrenames -v a
+dohg guessrenames -v b
+
diff --git a/tests/test-guessrenames.out b/tests/test-guessrenames.out
new file mode 100644
--- /dev/null
+++ b/tests/test-guessrenames.out
@@ -0,0 +1,131 @@
+#------------ hg init repo
+#------------ hg ci -Am1
+adding four
+adding one
+adding three
+adding two
+#------------ hg addrem
+adding five
+adding twoA
+removing four
+removing two
+#------------ hg mv one oneA
+#------------ hg stat -C
+A five
+A oneA
+  one
+A twoA
+R four
+R one
+R two
+? untracked
+#------------ hg guessrenames -vn
+potential sources (removed files):
+  four
+  two
+potential targets (added files):
+  five
+  twoA
+recording two -> twoA (96% similar)
+#------------ hg stat -C
+A five
+A oneA
+  one
+A twoA
+R four
+R one
+R two
+? untracked
+#------------ hg guessrenames -vna
+forgetting one -> oneA
+potential sources (removed files):
+  four
+  one
+  two
+potential targets (added files):
+  five
+  oneA
+  twoA
+recording one -> oneA (100% similar)
+recording two -> twoA (96% similar)
+#------------ hg stat -C
+A five
+A oneA
+  one
+A twoA
+R four
+R one
+R two
+? untracked
+#------------ hg guessrenames -v
+potential sources (removed files):
+  four
+  two
+potential targets (added files):
+  five
+  twoA
+recording two -> twoA (96% similar)
+#------------ hg stat -C
+A five
+A oneA
+  one
+A twoA
+  two
+R four
+R one
+R two
+? untracked
+#------------ hg guessrenames -va
+forgetting one -> oneA
+forgetting two -> twoA
+potential sources (removed files):
+  four
+  one
+  two
+potential targets (added files):
+  five
+  oneA
+  twoA
+recording one -> oneA (100% similar)
+recording two -> twoA (96% similar)
+#------------ hg stat -C
+A five
+A oneA
+  one
+A twoA
+  two
+R four
+R one
+R two
+? untracked
+#------------ hg ci -Amdirs a b
+adding a/one
+adding b/two
+#------------ hg addrem a b
+adding a/oneA
+adding b/twoA
+removing a/one
+removing b/two
+#------------ hg guessrenames -vn
+potential sources (removed files):
+  a/one
+  b/two
+  four
+potential targets (added files):
+  a/oneA
+  b/twoA
+  five
+recording a/one -> a/oneA (100% similar)
+recording b/two -> b/twoA (100% similar)
+#------------ hg guessrenames -v a
+potential sources (removed files):
+  a/one
+potential targets (added files):
+  a/oneA
+recording a/one -> a/oneA (100% similar)
+#------------ hg guessrenames -v b
+potential sources (removed files):
+  b/two
+potential targets (added files):
+  b/twoA
+recording b/two -> b/twoA (100% similar)


More information about the Mercurial-devel mailing list