[PATCH v6] cmdutil: add within-line color diff capacity

Yuya Nishihara yuya at tcha.org
Sun Dec 3 03:30:04 EST 2017


On Sat, 02 Dec 2017 21:51:33 +0900, matthieu.laneuville at octobus.net wrote:
> # HG changeset patch
> # User Matthieu Laneuville <matthieu.laneuville at octobus.net>
> # Date 1508944418 -32400
> #      Thu Oct 26 00:13:38 2017 +0900
> # Node ID 2cd53502aa3a206eed18d1c13ee294b418fb43c5
> # Parent  3da4bd50103daab5419fc796d0c62470bab6da03
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 2cd53502aa3a
> # EXP-Topic inline-diff
> cmdutil: add within-line color diff capacity

Looks nicer than the previous patch, but found a couple of bugs.

> diff -r 3da4bd50103d -r 2cd53502aa3a mercurial/patch.py
> --- a/mercurial/patch.py	Wed Nov 29 07:57:17 2017 +0530
> +++ b/mercurial/patch.py	Thu Oct 26 00:13:38 2017 +0900
> @@ -10,6 +10,7 @@ from __future__ import absolute_import, 
>  
>  import collections
>  import copy
> +import difflib
>  import email
>  import errno
>  import hashlib
> @@ -2461,6 +2462,12 @@ def diffhunks(repo, node1=None, node2=No
>  
>  def difflabel(func, *args, **kw):
>      '''yields 2-tuples of (output, label) based on the output of func()'''
> +    inlinecolor = False
> +    for arg in args:
> +        if util.safehasattr(arg, 'ui'):
> +            inlinecolor = arg.ui.configbool("experimental", "inline-color-diff")
> +        break

This is ugly. Perhaps the worddiff option can be carried by the opts
argument. See patch.difffeatureopts() for details.

> @@ -2477,6 +2484,7 @@ def difflabel(func, *args, **kw):
>      head = False
>      for chunk in func(*args, **kw):
>          lines = chunk.split('\n')
> +        matches = [-1 for x in range(len(lines) + 1)]
>          for i, line in enumerate(lines):
>              if i != 0:
>                  yield ('\n', '')
> @@ -2504,7 +2512,12 @@ def difflabel(func, *args, **kw):
>                              if '\t' == token[0]:
>                                  yield (token, 'diff.tab')
>                              else:
> -                                yield (token, label)
> +                                if not inlinecolor:
> +                                    yield (token, label)
> +                                else:
> +                                    buff, matches = _worddiff(lines, i, matches)
> +                                    for (color, word) in buff:
> +                                        yield (word, color)
>                      else:
>                          yield (stripline, label)
>                      break
> @@ -2513,6 +2526,63 @@ def difflabel(func, *args, **kw):
>              if line != stripline:
>                  yield (line[len(stripline):], 'diff.trailingwhitespace')
>  
> +def _worddiff(slist, idx, matches):
> +    '''Find match of a given string in current chunk and performs word diff.'''
> +    operation = 'inserted' if slist[idx][0] == '+' else 'deleted'
> +    bound = matches[-1] # last item in matches stores the id of the last match
> +
> +    # inserted lines should only be compared to lines that matched them before
> +    if operation == 'inserted':
> +        if matches[idx] != -1:
> +            return _inlinediff(slist[idx],
> +                                    slist[matches[idx]],
> +                                    operation), matches
> +        else:
> +            return [('diff.' + operation, slist[idx])], matches

a) Needs to return a stripline, not a line.

> +    # deleted lines first need to be matched
> +    for i, line in enumerate(slist[bound + 1:-1]):
> +        if line == '':
> +            continue
> +        if line[0] == '+':
> +            sim = difflib.SequenceMatcher(None, slist[idx], line).ratio()
> +            if sim > 0.7:
> +                matches[i + bound + 1] = idx
> +                matches[-1] = i + bound + 1
> +                return _inlinediff(slist[idx], line, operation), matches
> +    return [('diff.' + operation, slist[idx])], matches

b) We need to limit the search space only to the next inserted lines.

Perhaps (a) and (b) can be simply fixed by 2-pass scanning, in which build
a map of matched lines by 1st pass, and apply _inlinediff() by 2nd pass.

  lines = chunk.split('\n')
  matches = {}
  if inlinecolor:
      # build {deleted/inserted: inserted/deleted} map by looking @, -, + lines
      matches = _findmatches(lines)
  for i, line in enumerate(lines):
      ...
          elif i in matches:
              for w, l in _inlinediff(token, strip(lines[matches[i]])):
                  yield (w, l)
          else:
              yield (stripline, label)

> +def _inlinediff(s1, s2, operation):
> +    '''Perform string diff to highlight specific changes.'''
> +    operation_skip = '+?' if operation == 'deleted' else '-?'
> +    if operation == 'deleted':
> +        s2, s1 = s1, s2
> +
> +    # buffer required to remove last space, there may be smarter ways to do this
> +    buff = []
> +
> +    # we never want to higlight the leading +-
> +    if operation == 'deleted' and s2.startswith('-'):
> +        buff.append(('diff.deleted', '-'))
> +        s2 = s2[1:]
> +        s1 = s1[1:]
> +    elif operation == 'inserted' and s1.startswith('+'):
> +        buff.append(('diff.inserted', '+'))
> +        s2 = s2[1:]
> +        s1 = s1[1:]
> +
> +    s = difflib.ndiff(re.split(r'(\W)', s2), re.split(r'(\W)', s1))
                                  ^^

Nit: please use br'' for Python 3 compatibility.

> +    for line in s:
           ^^^^

Nit: the variable name "line" is confusing.

> +        if line[0] in operation_skip:
> +            continue
> +        l = 'diff.' + operation + '.highlight'
> +        if line[0] in ' ': # unchanged parts
> +            l = 'diff.' + operation
> +        buff.append((l, line[2:]))
> +
> +    buff[-1] = (buff[-1][0], buff[-1][1].strip(' '))
> +    return buff

Maybe we should compact contiguous words of the same label, but that can be
addressed by a follow-up patch.


More information about the Mercurial-devel mailing list