[PATCH] similar: remove caching from the module level

Fri Jan 13 17:43:40 EST 2017

On 01/13/2017 08:43 PM, Sean Farley wrote:
> # HG changeset patch
> # User Sean Farley <sean at farley.io>
> # Date 1484336556 28800
> #      Fri Jan 13 11:42:36 2017 -0800
> # Node ID 7c54c9524a0954a3bd2f2fb4451028869f851f6d
> # Parent  8540967cd9e0909a9a73dbff458c4cedd4db26aa
> similar: remove caching from the module level
>
> To prevent Bad Things™ from happening, let's rework the logic to not use
> util.cachefunc.

Looks good to me, but… since this is my code, that should probably have 
my name of it ;-)

https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-January/092290.html

(I saw with Sean on IRC and he is okay with changing the name) Can 
another reviewer fix this in flight ?

> diff --git a/mercurial/similar.py b/mercurial/similar.py
> --- a/mercurial/similar.py
> +++ b/mercurial/similar.py
> @@ -11,11 +11,10 @@ import hashlib
>
>  from .i18n import _
>  from . import (
>      bdiff,
>      mdiff,
> -    util,
>  )
>
>  def _findexactmatches(repo, added, removed):
>      '''find renamed files that have no changes
>
> @@ -41,20 +40,18 @@ def _findexactmatches(repo, added, remov
>              yield (hashes[h], fctx)
>
>      # Done
>      repo.ui.progress(_('searching for exact renames'), None)
>
> - at util.cachefunc
>  def _ctxdata(fctx):
>      # lazily load text
>      orig = fctx.data()
>      return orig, mdiff.splitnewlines(orig)
>
> - at util.cachefunc
> -def score(fctx1, fctx2):
> -    text = fctx1.data()
> -    orig, lines = _ctxdata(fctx2)
> +def _score(fctx, otherdata):
> +    orig, lines = otherdata
> +    text = fctx.data()
>      # bdiff.blocks() returns blocks of matching lines
>      # count the number of bytes in each
>      equal = 0
>      matches = bdiff.blocks(text, orig)
>      for x1, x2, y1, y2 in matches:
> @@ -62,10 +59,13 @@ def score(fctx1, fctx2):
>              equal += len(line)
>
>      lengths = len(text) + len(orig)
>      return equal * 2.0 / lengths
>
> +def score(fctx1, fctx2):
> +    return _score(fctx1, _ctxdata(fctx2))
> +
>  def _findsimilarmatches(repo, added, removed, threshold):
>      '''find potentially renamed files based on similar file content
>
>      Takes a list of new filectxs and a list of removed filectxs, and yields
>      (before, after, score) tuples of partial matches.
> @@ -73,13 +73,16 @@ def _findsimilarmatches(repo, added, rem
>      copies = {}
>      for i, r in enumerate(removed):
>          repo.ui.progress(_('searching for similar files'), i,
>                           total=len(removed), unit=_('files'))
>
> +        data = None
>          for a in added:
>              bestscore = copies.get(a, (None, threshold))[1]
> -            myscore = score(a, r)
> +            if data is None:
> +                data = _ctxdata(r)
> +            myscore = score(a, data)
>              if myscore >= bestscore:
>                  copies[a] = (r, myscore)
>      repo.ui.progress(_('searching'), None)
>
>      for dest, v in copies.iteritems():
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>

-- 
Pierre-Yves David