[PATCH] Highlight word differences in diffs
Martin Geisler
mg at lazybytes.net
Wed May 27 15:09:41 CDT 2009
# HG changeset patch
# User Martin Geisler <mg at lazybytes.net>
# Date 1243454861 -7200
# Node ID 2504b69072eea7415bda241b43dd99dd7db7b3bb
# Parent c6483eec6092414bac04f916706f572ed2305ec0
Highlight word differences in diffs
I wondered if it would be difficult to add word diffs to the color
extension, and it turned out to be easier than I thought easy. This
patch shows a rough prototype...
I decided that a word diff is essentially a diff of a diff hunk. If we
start with the text (wrapped at 25 characters to better illustrate
word wrapping):
This is a bit of text. It
is only there to test the
new word diff feature.
and change it to
This is a small paragraph
of text. It is only there
to test the new word diff
feature.
then the diff is
diff --git a/a.txt b/a.txt
--- a/a.txt
+++ b/a.txt
@@ -1,3 +1,4 @@
-This is a bit of text. It
-is only there to test the
-new word diff feature.
+This is a small paragraph
+of text. It is only there
+to test the new word diff
+feature.
And the diff of the "-"-lines and "+"-lines is:
--- a
+++ b
@@ -1,7 +1,8 @@
This
is
a
-bit
+small
+paragraph
of
text
It
I use this information to underline the changes in the real diff:
diff --git a/a.txt b/a.txt
--- a/a.txt
+++ b/a.txt
@@ -1,3 +1,4 @@
-This is a _bit_ of text. It
-is only there to test the
-new word diff feature.
+This is a _small_ _paragraph_
+of text. It is only there
+to test the new word diff
+feature.
Things that are missing:
* adjacent changes should probably be merged (like "_small_" and
"_paragraph_" above)
* it is not clear if splitting on \W+ is the best choice
* underlining can look funny, especially when a large amount of
inserted text is underlined
* changes in the stuff between words is not highlighted. So if you
insert a comma, it wont be highlighted.
* this feature would look much better in hgweb...
* probably some more :-)
diff --git a/hgext/color.py b/hgext/color.py
--- a/hgext/color.py
+++ b/hgext/color.py
@@ -62,9 +62,10 @@
diff.trailingwhitespace = bold red_background
'''
-import os, sys
+import os, sys, re
+from pprint import pprint
-from mercurial import cmdutil, commands, extensions
+from mercurial import cmdutil, commands, extensions, bdiff
from mercurial.i18n import _
# start and stop parameters for effects
@@ -80,6 +81,7 @@
'bold': 1,
'italic': 3,
'underline': 4,
+ 'nounderline': 24,
'inverse': 7,
'black_background': 40,
'red_background': 41,
@@ -169,9 +171,76 @@
'missing': ['red', 'bold'],
'unapplied': ['black', 'bold'], }
+def worddiff(a, b):
+ # Split a and b into words and non-words. The even elements will
+ # be words, the odd elements will be what was between the words.
+ apieces = re.split(r'(\W+)', a)
+ bpieces = re.split(r'(\W+)', b)
+
+ # Put the words on separate lines.
+ atext = '\n'.join(apieces[::2])
+ btext = '\n'.join(bpieces[::2])
+
+ #print "a:"
+ #pprint(apieces)
+ #print "b:"
+ #pprint(bpieces)
+ #
+ #print "atext:"
+ #print atext
+ #print "btext:"
+ #print btext
+
+ blocks = bdiff.blocks(atext, btext)
+ #print "blocks:"
+ #pprint(blocks)
+
+ def highlight(s):
+ return "\033[%dm%s\033[%dm" % (_effect_params['underline'], s,
+ _effect_params['nounderline'])
+
+ s = (0, 0, 0, 0)
+ for t in blocks:
+ for i in range(s[1], t[0]):
+ apieces[2*i] = highlight(apieces[2*i])
+ for i in range(s[3], t[2]):
+ bpieces[2*i] = highlight(bpieces[2*i])
+ s = t
+
+ a = ''.join(apieces)
+ b = ''.join(bpieces)
+ return a, b
+
+
def colorwrap(orig, s):
'''wrap ui.write for colored diff output'''
lines = s.split('\n')
+
+ alines, blines = [], []
+ astart, bstart = 0, 0
+ for i, line in enumerate(lines):
+ if line and line[0] == '-' and not line.startswith('---'):
+ if not astart:
+ astart = i
+ alines.append(line[1:])
+ if line and line[0] == '+' and not line.startswith('+++'):
+ if not bstart:
+ bstart = i
+ blines.append(line[1:])
+ if line and line[0] == ' ' or i == len(lines) - 1:
+ if alines and blines:
+ atext, btext = worddiff('\n'.join(alines), '\n'.join(blines))
+ alines = atext.split('\n')
+ blines = btext.split('\n')
+
+ assert bstart - astart == len(alines)
+ assert i - bstart == len(blines)
+
+ lines[astart:bstart] = ['-' + a for a in alines]
+ lines[bstart:i] = ['+' + b for b in blines]
+ alines, blines = [], []
+ astart, bstart = 0, 0
+
for i, line in enumerate(lines):
stripline = line
if line and line[0] in '+-':
More information about the Mercurial-devel
mailing list