[PATCH v5] cmdutil: add within-line color diff capacity
Yuya Nishihara
yuya at tcha.org
Tue Nov 28 09:09:00 EST 2017
On Mon, 27 Nov 2017 21:38:48 +0900, matthieu.laneuville at octobus.net wrote:
> # HG changeset patch
> # User Matthieu Laneuville <matthieu.laneuville at octobus.net>
> # Date 1508944418 -32400
> # Thu Oct 26 00:13:38 2017 +0900
> # Node ID 9a6865e011743fa354dc1ae27d66600e022cddad
> # Parent 32bb27dd52825236ba1b6c06fe60e140d6b5ea45
> # EXP-Topic inline-diff
> cmdutil: add within-line color diff capacity
Sorry for late review. I played with this patch and I like the feature, but
the output seems not so well, e.g. highlighted parts are too broad and too few.
A couple of comments follow:
> --- a/mercurial/cmdutil.py Tue Nov 21 00:24:09 2017 -0500
> +++ b/mercurial/cmdutil.py Thu Oct 26 00:13:38 2017 +0900
> @@ -7,6 +7,7 @@
>
> from __future__ import absolute_import
>
> +import difflib
> import errno
> import itertools
> import os
> @@ -1513,6 +1514,11 @@ def diffordiffstat(ui, repo, diffopts, n
> ui.warn(_('warning: %s not inside relative root %s\n') % (
> match.uipath(matchroot), uirelroot))
>
> + store = {
> + 'diff.inserted': [],
> + 'diff.deleted': []
> + }
> + status = False
Nit: 'status = None' is preferred as it isn't a boolean type.
> if stat:
> diffopts = diffopts.copy(context=0)
> width = 80
> @@ -1529,7 +1535,31 @@ def diffordiffstat(ui, repo, diffopts, n
> changes, diffopts, prefix=prefix,
> relroot=relroot,
> hunksfilterfn=hunksfilterfn):
> - write(chunk, label=label)
> +
> + if not ui.configbool("experimental", "inline-color-diff"):
> + write(chunk, label=label)
> + continue
> +
> + # Each deleted/inserted chunk is followed by an EOL chunk with ''
> + # label. The 'status' flag helps us grab that second line.
> + if label in ['diff.deleted', 'diff.inserted'] or status:
> + if status:
> + store[status].append(chunk)
> + status = False
> + else:
> + store[label].append(chunk)
> + status = label
> + continue
> +
> + if store['diff.inserted'] or store['diff.deleted']:
> + for line, l in _chunkdiff(store):
> + write(line, label=l)
> +
> + store['diff.inserted'] = []
> + store['diff.deleted'] = []
> +
> + if chunk:
> + write(chunk, label=label)
Can't we add a word-diff feature by hooking/replacing patch.difflabel()
function? I think this "store" hack is needed because we're post-processing
a labeled output instead of doing that at the right layer.
> +def _chunkdiff(store):
> + '''Returns a (line, label) iterator over a corresponding deletion and
> + insertion set. The set has to be considered as a whole in order to match
> + lines and perform inline coloring.
> + '''
> + def chunkiterator(list1, list2, direction):
> + '''For each string in list1, finds matching string in list2 and returns
> + an iterator over their differences.
> + '''
> + used = []
> + for a in list1:
> + done = False
> + for i, b in enumerate(list2):
> + if done or i in used:
> + continue
> + if difflib.SequenceMatcher(None, a, b).ratio() > 0.7:
> + buff = _inlinediff(a, b, direction=direction)
> + for line in buff:
> + yield (line[1], line[0])
> + done = True
> + used.append(i) # insure lines in b can be matched only once
It seems wrong to allow crossed matches between list1 and list2. For example,
if list1[0] matched list2[5], list1[1] shouldn't match list2[4].
> + if not done:
> + yield (a, 'diff.' + direction)
> +
> + insert = store['diff.inserted']
> + delete = store['diff.deleted']
> + return itertools.chain(chunkiterator(delete, insert, 'deleted'),
> + chunkiterator(insert, delete, 'inserted'))
> +
> +def _inlinediff(from_string, to_string, direction):
Nit: name_with_underscore is banned.
https://www.mercurial-scm.org/wiki/CodingStyle#Naming_conventions
> + '''Perform string diff to highlight specific changes.'''
> + direction_skip = '+?' if direction == 'deleted' else '-?'
> + if direction == 'deleted':
> + to_string, from_string = from_string, to_string
> +
> + # buffer required to remove last space, there may be smarter ways to do this
> + buff = []
> +
> + # we never want to higlight the leading +-
> + if direction == 'deleted' and to_string.startswith('-'):
> + buff.append(('diff.deleted', '-'))
> + to_string = to_string[1:]
> + from_string = from_string[1:]
> + elif direction == 'inserted' and from_string.startswith('+'):
> + buff.append(('diff.inserted', '+'))
> + to_string = to_string[1:]
> + from_string = from_string[1:]
> +
> + s = difflib.ndiff(to_string.split(' '), from_string.split(' '))
re.split(r'(\W)', s) will produce a better result.
https://stackoverflow.com/a/2136580
FWIW, I tried the following change on top of this patch. The output was too
verbose, but seemed more correct.
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -1587,6 +1587,11 @@ def _chunkdiff(store):
'''For each string in list1, finds matching string in list2 and returns
an iterator over their differences.
'''
+ buff = _inlinediff(''.join(list1), ''.join(list2), direction=direction)
+ for line in buff:
+ yield (line[1], line[0])
+ return
+
used = []
for a in list1:
done = False
@@ -1626,14 +1631,14 @@ def _inlinediff(from_string, to_string,
to_string = to_string[1:]
from_string = from_string[1:]
- s = difflib.ndiff(to_string.split(' '), from_string.split(' '))
+ s = difflib.ndiff(re.split(r'(\W)', to_string), re.split(r'(\W)', from_string))
for line in s:
if line[0] in direction_skip:
continue
l = 'diff.' + direction + '.highlight'
if line[0] in ' ': # unchanged parts
l = 'diff.' + direction
- buff.append((l, line[2:] + ' '))
+ buff.append((l, line[2:]))
buff[-1] = (buff[-1][0], buff[-1][1].strip(' '))
return buff
More information about the Mercurial-devel
mailing list