[PATCH 5 of 8 git-diff] similar: move score function to module level
Sean Farley
sean at farley.io
Mon Jan 9 14:49:38 EST 2017
# HG changeset patch
# User Sean Farley <sean at farley.io>
# Date 1483850877 28800
# Sat Jan 07 20:47:57 2017 -0800
# Node ID 8561e240f25e851476abde452b850fc09a7fdf06
# Parent 9af4ef2f80a8c5cb69ad4ff5d291ce81a35e4f39
similar: move score function to module level
Future patches will use this to report the similarity of a rename / copy
in the patch output.
diff --git a/mercurial/similar.py b/mercurial/similar.py
--- a/mercurial/similar.py
+++ b/mercurial/similar.py
@@ -41,10 +41,31 @@ def _findexactmatches(repo, added, remov
yield (hashes[h], fctx)
# Done
repo.ui.progress(_('searching for exact renames'), None)
+ at util.cachefunc
+def _ctxdata(fctx):
+ # lazily load text
+ orig = fctx.data()
+ return orig, mdiff.splitnewlines(orig)
+
+ at util.cachefunc
+def score(fctx1, fctx2):
+ text = fctx1.data()
+ orig, lines = _ctxdata(fctx2)
+ # bdiff.blocks() returns blocks of matching lines
+ # count the number of bytes in each
+ equal = 0
+ matches = bdiff.blocks(text, orig)
+ for x1, x2, y1, y2 in matches:
+ for line in lines[y1:y2]:
+ equal += len(line)
+
+ lengths = len(text) + len(orig)
+ return equal * 2.0 / lengths
+
def _findsimilarmatches(repo, added, removed, threshold):
'''find potentially renamed files based on similar file content
Takes a list of new filectxs and a list of removed filectxs, and yields
(before, after, score) tuples of partial matches.
@@ -52,32 +73,13 @@ def _findsimilarmatches(repo, added, rem
copies = {}
for i, r in enumerate(removed):
repo.ui.progress(_('searching for similar files'), i,
total=len(removed), unit=_('files'))
- # lazily load text
- @util.cachefunc
- def data():
- orig = r.data()
- return orig, mdiff.splitnewlines(orig)
-
- def score(text):
- orig, lines = data()
- # bdiff.blocks() returns blocks of matching lines
- # count the number of bytes in each
- equal = 0
- matches = bdiff.blocks(text, orig)
- for x1, x2, y1, y2 in matches:
- for line in lines[y1:y2]:
- equal += len(line)
-
- lengths = len(text) + len(orig)
- return equal * 2.0 / lengths
-
for a in added:
bestscore = copies.get(a, (None, threshold))[1]
- myscore = score(a.data())
+ myscore = score(a, r)
if myscore >= bestscore:
copies[a] = (r, myscore)
repo.ui.progress(_('searching'), None)
for dest, v in copies.iteritems():
More information about the Mercurial-devel
mailing list