[PATCH 1 of 3] Move 'findrenames' code into its own file

David Greenaway hg-dev at davidgreenaway.com
Fri Apr 2 20:08:22 CDT 2010


# HG changeset patch
# User David Greenaway <hg-dev at davidgreenaway.com>
# Date 1270256296 -39600
# Node ID 20639badc7a829037913ee241ec8892a3932644e
# Parent  cd0c49bdbfd9edab18c3656d8a8a0bd27a9aa82a
Move 'findrenames' code into its own file.

The next few patches will increase the size of the "findrenames"
functionality. This patch simply moves the function into its own
file to avoid clutter building up in 'cmdutil.py'.

diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -10,6 +10,7 @@
 import os, sys, errno, re, glob, tempfile
 import mdiff, bdiff, util, templater, patch, error, encoding, templatekw
 import match as _match
+import similar
 
 revrangesep = ':'
 
@@ -285,52 +286,6 @@
 def matchfiles(repo, files):
     return _match.exact(repo.root, repo.getcwd(), files)
 
-def findrenames(repo, added, removed, threshold):
-    '''find renamed files -- yields (before, after, score) tuples'''
-    copies = {}
-    ctx = repo['.']
-    for i, r in enumerate(removed):
-        repo.ui.progress(_('searching'), i, total=len(removed))
-        if r not in ctx:
-            continue
-        fctx = ctx.filectx(r)
-
-        # lazily load text
-        @util.cachefunc
-        def data():
-            orig = fctx.data()
-            return orig, mdiff.splitnewlines(orig)
-
-        def score(text):
-            if not len(text):
-                return 0.0
-            if not fctx.cmp(text):
-                return 1.0
-            if threshold == 1.0:
-                return 0.0
-            orig, lines = data()
-            # bdiff.blocks() returns blocks of matching lines
-            # count the number of bytes in each
-            equal = 0
-            matches = bdiff.blocks(text, orig)
-            for x1, x2, y1, y2 in matches:
-                for line in lines[y1:y2]:
-                    equal += len(line)
-
-            lengths = len(text) + len(orig)
-            return equal * 2.0 / lengths
-
-        for a in added:
-            bestscore = copies.get(a, (None, threshold))[1]
-            myscore = score(repo.wread(a))
-            if myscore >= bestscore:
-                copies[a] = (r, myscore)
-    repo.ui.progress(_('searching'), None)
-
-    for dest, v in copies.iteritems():
-        source, score = v
-        yield source, dest, score
-
 def addremove(repo, pats=[], opts={}, dry_run=None, similarity=None):
     if dry_run is None:
         dry_run = opts.get('dry_run')
@@ -365,8 +320,8 @@
             added.append(abs)
     copies = {}
     if similarity > 0:
-        for old, new, score in findrenames(repo, added + unknown,
-                                           removed + deleted, similarity):
+        for old, new, score in similar.findrenames(repo,
+                added + unknown, removed + deleted, similarity):
             if repo.ui.verbose or not m.exact(old) or not m.exact(new):
                 repo.ui.status(_('recording removal of %s as rename to %s '
                                  '(%d%% similar)\n') %
diff --git a/mercurial/similar.py b/mercurial/similar.py
new file mode 100644
--- /dev/null
+++ b/mercurial/similar.py
@@ -0,0 +1,59 @@
+# similar.py - mechanisms for finding similar files
+#
+# Copyright 2005-2007 Matt Mackall <mpm at selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from i18n import _
+import util
+import mdiff
+import bdiff
+
+def findrenames(repo, added, removed, threshold):
+    '''find renamed files -- yields (before, after, score) tuples'''
+    copies = {}
+    ctx = repo['.']
+    for i, r in enumerate(removed):
+        repo.ui.progress(_('searching'), i, total=len(removed))
+        if r not in ctx:
+            continue
+        fctx = ctx.filectx(r)
+
+        # lazily load text
+        @util.cachefunc
+        def data():
+            orig = fctx.data()
+            return orig, mdiff.splitnewlines(orig)
+
+        def score(text):
+            if not len(text):
+                return 0.0
+            if not fctx.cmp(text):
+                return 1.0
+            if threshold == 1.0:
+                return 0.0
+            orig, lines = data()
+            # bdiff.blocks() returns blocks of matching lines
+            # count the number of bytes in each
+            equal = 0
+            matches = bdiff.blocks(text, orig)
+            for x1, x2, y1, y2 in matches:
+                for line in lines[y1:y2]:
+                    equal += len(line)
+
+            lengths = len(text) + len(orig)
+            return equal * 2.0 / lengths
+
+        for a in added:
+            bestscore = copies.get(a, (None, threshold))[1]
+            myscore = score(repo.wread(a))
+            if myscore >= bestscore:
+                copies[a] = (r, myscore)
+    repo.ui.progress(_('searching'), None)
+
+    for dest, v in copies.iteritems():
+        source, score = v
+        yield source, dest, score
+
+


More information about the Mercurial-devel mailing list