[PATCH 2 of 2 stable] diff: search beyond ancestor when detecting renames

Matt Mackall mpm at selenic.com
Fri Nov 22 15:00:10 CST 2013


On Tue, 2013-11-12 at 04:08 +0100, Mads Kiilerich wrote:
> # HG changeset patch
> # User Mads Kiilerich <madski at unity3d.com>
> # Date 1384225676 -3600
> #      Tue Nov 12 04:07:56 2013 +0100
> # Branch stable
> # Node ID 6bda09b4adc115285149d36fc74824370478a15a
> # Parent  82165da0edded3801c421273d49ebccbf382878a
> diff: search beyond ancestor when detecting renames

The change to the function signature means finding the meat of the
change is lost in pages of noise. If you MUST do something like this
(and you definitely do not in this case because we have ctx._repo), it
should be two patches: sweeping but trivial change, followed by actual
change.

Your description should also mention "now uses _findlimit like
mergecopies" because that's what your reviewer suggested.

> This removes an optimization that was introduced in 91eb4512edd0 but was too
> aggressive - as indicated by how it changed test-mq-merge.t .
> 
> We are walking filelogs to find copy sources and we can thus not be sure to hit
> the base revision and find the renamed file there - it could also be in the
> first ancestor of the base ... in the filelog.
> 
> But again, we are walking the filelog and can thus not easily know when we hit
> the first ancestor of the base revision and which filename to look for there.
> The lower bound for how far we have to go has to be found from the lowest
> changelog revision that is an ancestor of only one of the compared revisions.
> Any filelog ancestor with a revision number lower than that revision will be
> the ancestor of both compared revisions, and there is thus no reason to go
> further back than that.
> 
> The profile of copies._tracefile is changed to match its new mode of
> operation. Repo objects also has to be passed around in new places ... but that
> seems to be necessary.
> 
> diff --git a/contrib/perf.py b/contrib/perf.py
> --- a/contrib/perf.py
> +++ b/contrib/perf.py
> @@ -169,7 +169,7 @@ def perfpathcopies(ui, repo, rev1, rev2)
>      ctx1 = scmutil.revsingle(repo, rev1, rev1)
>      ctx2 = scmutil.revsingle(repo, rev2, rev2)
>      def d():
> -        copies.pathcopies(ctx1, ctx2)
> +        copies.pathcopies(repo, ctx1, ctx2)
>      timer(d)
>  
>  @command('perfmanifest', [], 'REV')
> diff --git a/hgext/histedit.py b/hgext/histedit.py
> --- a/hgext/histedit.py
> +++ b/hgext/histedit.py
> @@ -247,7 +247,7 @@ def collapse(repo, first, last, commitop
>          files.update(ctx.files())
>  
>      # Recompute copies (avoid recording a -> b -> a)
> -    copied = copies.pathcopies(base, last)
> +    copied = copies.pathcopies(repo, base, last)
>  
>      # prune files which were reverted by the updates
>      def samefile(f):
> diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
> --- a/mercurial/cmdutil.py
> +++ b/mercurial/cmdutil.py
> @@ -1636,7 +1636,8 @@ def forget(ui, repo, match, prefix, expl
>  
>  def duplicatecopies(repo, rev, fromrev):
>      '''reproduce copies from fromrev to rev in the dirstate'''
> -    for dst, src in copies.pathcopies(repo[fromrev], repo[rev]).iteritems():
> +    for dst, src in copies.pathcopies(repo, repo[fromrev], repo[rev]
> +                                      ).iteritems():
>          # copies.pathcopies returns backward renames, so dst might not
>          # actually be in the dirstate
>          if repo.dirstate[dst] in "nma":
> @@ -1721,7 +1722,7 @@ def amend(ui, repo, commitfunc, old, ext
>                  user = ctx.user()
>                  date = ctx.date()
>                  # Recompute copies (avoid recording a -> b -> a)
> -                copied = copies.pathcopies(base, ctx)
> +                copied = copies.pathcopies(repo, base, ctx)
>  
>                  # Prune files which were reverted by the updates: if old
>                  # introduced file X and our intermediate commit, node,
> @@ -2102,7 +2103,7 @@ def revert(ui, repo, ctx, parents, *pats
>                  checkout(f)
>                  normal(f)
>  
> -            copied = copies.pathcopies(repo[parent], ctx)
> +            copied = copies.pathcopies(repo, repo[parent], ctx)
>  
>              for f in add[0] + undelete[0] + revert[0]:
>                  if f in copied:
> diff --git a/mercurial/commands.py b/mercurial/commands.py
> --- a/mercurial/commands.py
> +++ b/mercurial/commands.py
> @@ -5375,7 +5375,7 @@ def status(ui, repo, *pats, **opts):
>      changestates = zip(states, 'MAR!?IC', stat)
>  
>      if (opts.get('all') or opts.get('copies')) and not opts.get('no_status'):
> -        copy = copies.pathcopies(repo[node1], repo[node2])
> +        copy = copies.pathcopies(repo, repo[node1], repo[node2])
>  
>      fm = ui.formatter('status', opts)
>      fmt = '%s' + end
> diff --git a/mercurial/copies.py b/mercurial/copies.py
> --- a/mercurial/copies.py
> +++ b/mercurial/copies.py
> @@ -98,15 +98,14 @@ def _chain(src, dst, a, b):
>  
>      return t
>  
> -def _tracefile(fctx, actx):
> -    '''return file context that is the ancestor of fctx present in actx'''
> -    stop = actx.rev()
> -    am = actx.manifest()
> +def _tracefile(fctx, am, limit=-1):
> +    '''return file context that is the ancestor of fctx present in ancestor
> +    manifest am, stopping after the first ancestor lower than limit'''
>  
>      for f in fctx.ancestors():
>          if am.get(f.path(), None) == f.filenode():
>              return f
> -        if f.rev() < stop:
> +        if f.rev() < limit:
>              return None
>  
>  def _dirstatecopies(d):
> @@ -117,7 +116,7 @@ def _dirstatecopies(d):
>              del c[k]
>      return c
>  
> -def _forwardcopies(a, b):
> +def _forwardcopies(repo, a, b):
>      '''find {dst at b: src at a} copy mapping where a is an ancestor of b'''
>  
>      # check for working copy
> @@ -129,6 +128,13 @@ def _forwardcopies(a, b):
>              # short-circuit to avoid issues with merge states
>              return _dirstatecopies(w)
>  
> +    # files might have to be traced back to the fctx parent of the last
> +    # one-side-only changeset, but not further back than that
> +    limit = _findlimit(repo, a.rev(), b.rev())
> +    if limit is None:
> +        limit = -1
> +    am = a.manifest()
> +
>      # find where new files came from
>      # we currently don't try to find where old files went, too expensive
>      # this means we can miss a case like 'hg rm b; hg cp a b'
> @@ -137,7 +143,7 @@ def _forwardcopies(a, b):
>      missing.difference_update(a.manifest().iterkeys())
>  
>      for f in missing:
> -        ofctx = _tracefile(b[f], a)
> +        ofctx = _tracefile(b[f], am, limit)
>          if ofctx:
>              cm[f] = ofctx.path()
>  
> @@ -147,11 +153,11 @@ def _forwardcopies(a, b):
>  
>      return cm
>  
> -def _backwardrenames(a, b):
> +def _backwardrenames(repo, a, b):
>      # Even though we're not taking copies into account, 1:n rename situations
>      # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
>      # arbitrarily pick one of the renames.
> -    f = _forwardcopies(b, a)
> +    f = _forwardcopies(repo, b, a)
>      r = {}
>      for k, v in sorted(f.iteritems()):
>          # remove copies
> @@ -160,16 +166,17 @@ def _backwardrenames(a, b):
>          r[v] = k
>      return r
>  
> -def pathcopies(x, y):
> +def pathcopies(repo, x, y):
>      '''find {dst at y: src at x} copy mapping for directed compare'''
>      if x == y or not x or not y:
>          return {}
>      a = y.ancestor(x)
>      if a == x:
> -        return _forwardcopies(x, y)
> +        return _forwardcopies(repo, x, y)
>      if a == y:
> -        return _backwardrenames(x, y)
> -    return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
> +        return _backwardrenames(repo, x, y)
> +    return _chain(x, y,
> +                  _backwardrenames(repo, x, a), _forwardcopies(repo, a, y))
>  
>  def mergecopies(repo, c1, c2, ca):
>      """
> diff --git a/mercurial/patch.py b/mercurial/patch.py
> --- a/mercurial/patch.py
> +++ b/mercurial/patch.py
> @@ -1579,7 +1579,7 @@ def diff(repo, node1=None, node2=None, m
>  
>      copy = {}
>      if opts.git or opts.upgrade:
> -        copy = copies.pathcopies(ctx1, ctx2)
> +        copy = copies.pathcopies(repo, ctx1, ctx2)
>  
>      def difffn(opts, losedata):
>          return trydiff(repo, revs, ctx1, ctx2, modified, added, removed,
> diff --git a/tests/test-mq-merge.t b/tests/test-mq-merge.t
> --- a/tests/test-mq-merge.t
> +++ b/tests/test-mq-merge.t
> @@ -147,11 +147,13 @@ Check patcha is still a git patch:
>    -b
>    +a
>    +c
> -  diff --git a/aa b/aa
> -  new file mode 100644
> -  --- /dev/null
> +  diff --git a/a b/aa
> +  copy from a
> +  copy to aa
> +  --- a/a
>    +++ b/aa
> -  @@ -0,0 +1,1 @@
> +  @@ -1,1 +1,1 @@
> +  -b
>    +a
>  
>  Check patcha2 is still a regular patch:
> diff --git a/tests/test-mv-cp-st-diff.t b/tests/test-mv-cp-st-diff.t
> --- a/tests/test-mv-cp-st-diff.t
> +++ b/tests/test-mv-cp-st-diff.t
> @@ -1567,3 +1567,41 @@ unrelated branch diff
>    @@ -0,0 +1,1 @@
>    +a
>    $ cd ..
> +
> +
> +test for case where we didn't look sufficiently far back to find rename ancestor
> +
> +  $ hg init diffstop
> +  $ cd diffstop
> +  $ echo > f
> +  $ hg ci -qAmf
> +  $ hg mv f g
> +  $ hg ci -m'f->g'
> +  $ hg up -qr0
> +  $ touch x
> +  $ hg ci -qAmx
> +  $ echo f > f
> +  $ hg ci -qmf=f
> +  $ hg merge -q
> +  $ hg ci -mmerge
> +  $ hg log -G --template '{rev}  {desc}'
> +  @    4  merge
> +  |\
> +  | o  3  f=f
> +  | |
> +  | o  2  x
> +  | |
> +  o |  1  f->g
> +  |/
> +  o  0  f
> +  
> +  $ hg diff --git -r 2
> +  diff --git a/f b/g
> +  rename from f
> +  rename to g
> +  --- a/f
> +  +++ b/g
> +  @@ -1,1 +1,1 @@
> +  -
> +  +f
> +  $ cd ..
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

-- 
Mathematics is the supreme nostalgia of our time.




More information about the Mercurial-devel mailing list