[PATCH] Highlight word differences in diffs

Martin Geisler mg at lazybytes.net
Wed May 27 15:09:41 CDT 2009


# HG changeset patch
# User Martin Geisler <mg at lazybytes.net>
# Date 1243454861 -7200
# Node ID 2504b69072eea7415bda241b43dd99dd7db7b3bb
# Parent  c6483eec6092414bac04f916706f572ed2305ec0
Highlight word differences in diffs

I wondered if it would be difficult to add word diffs to the color
extension, and it turned out to be easier than I thought easy. This
patch shows a rough prototype...

I decided that a word diff is essentially a diff of a diff hunk. If we
start with the text (wrapped at 25 characters to better illustrate
word wrapping):

  This is a bit of text. It
  is only there to test the
  new word diff feature.

and change it to

  This is a small paragraph
  of text. It is only there
  to test the new word diff
  feature.

then the diff is

  diff --git a/a.txt b/a.txt
  --- a/a.txt
  +++ b/a.txt
  @@ -1,3 +1,4 @@
  -This is a bit of text. It
  -is only there to test the
  -new word diff feature.
  +This is a small paragraph
  +of text. It is only there
  +to test the new word diff
  +feature.

And the diff of the "-"-lines and "+"-lines is:

  --- a
  +++ b
  @@ -1,7 +1,8 @@
   This
   is
   a
  -bit
  +small
  +paragraph
   of
   text
   It

I use this information to underline the changes in the real diff:

  diff --git a/a.txt b/a.txt
  --- a/a.txt
  +++ b/a.txt
  @@ -1,3 +1,4 @@
  -This is a _bit_ of text. It
  -is only there to test the
  -new word diff feature.
  +This is a _small_ _paragraph_
  +of text. It is only there
  +to test the new word diff
  +feature.

Things that are missing:

* adjacent changes should probably be merged (like "_small_" and
  "_paragraph_" above)

* it is not clear if splitting on \W+ is the best choice

* underlining can look funny, especially when a large amount of
  inserted text is underlined

* changes in the stuff between words is not highlighted. So if you
  insert a comma, it wont be highlighted.

* this feature would look much better in hgweb...

* probably some more :-)

diff --git a/hgext/color.py b/hgext/color.py
--- a/hgext/color.py
+++ b/hgext/color.py
@@ -62,9 +62,10 @@
 diff.trailingwhitespace = bold red_background
 '''
 
-import os, sys
+import os, sys, re
+from pprint import pprint
 
-from mercurial import cmdutil, commands, extensions
+from mercurial import cmdutil, commands, extensions, bdiff
 from mercurial.i18n import _
 
 # start and stop parameters for effects
@@ -80,6 +81,7 @@
                   'bold': 1,
                   'italic': 3,
                   'underline': 4,
+                  'nounderline': 24,
                   'inverse': 7,
                   'black_background': 40,
                   'red_background': 41,
@@ -169,9 +171,76 @@
                    'missing': ['red', 'bold'],
                    'unapplied': ['black', 'bold'], }
 
+def worddiff(a, b):
+    # Split a and b into words and non-words. The even elements will
+    # be words, the odd elements will be what was between the words.
+    apieces = re.split(r'(\W+)', a)
+    bpieces = re.split(r'(\W+)', b)
+
+    # Put the words on separate lines.
+    atext = '\n'.join(apieces[::2])
+    btext = '\n'.join(bpieces[::2])
+
+    #print "a:"
+    #pprint(apieces)
+    #print "b:"
+    #pprint(bpieces)
+    #
+    #print "atext:"
+    #print atext
+    #print "btext:"
+    #print btext
+
+    blocks = bdiff.blocks(atext, btext)
+    #print "blocks:"
+    #pprint(blocks)
+
+    def highlight(s):
+        return "\033[%dm%s\033[%dm" % (_effect_params['underline'], s,
+                                       _effect_params['nounderline'])
+
+    s = (0, 0, 0, 0)
+    for t in blocks:
+        for i in range(s[1], t[0]):
+            apieces[2*i] = highlight(apieces[2*i])
+        for i in range(s[3], t[2]):
+            bpieces[2*i] = highlight(bpieces[2*i])
+        s = t
+
+    a = ''.join(apieces)
+    b = ''.join(bpieces)
+    return a, b
+
+
 def colorwrap(orig, s):
     '''wrap ui.write for colored diff output'''
     lines = s.split('\n')
+
+    alines, blines = [], []
+    astart, bstart = 0, 0
+    for i, line in enumerate(lines):
+        if line and line[0] == '-' and not line.startswith('---'):
+            if not astart:
+                astart = i
+            alines.append(line[1:])
+        if line and line[0] == '+' and not line.startswith('+++'):
+            if not bstart:
+                bstart = i
+            blines.append(line[1:])
+        if line and line[0] == ' ' or i == len(lines) - 1:
+            if alines and blines:
+                atext, btext = worddiff('\n'.join(alines), '\n'.join(blines))
+                alines = atext.split('\n')
+                blines = btext.split('\n')
+
+                assert bstart - astart == len(alines)
+                assert i - bstart == len(blines)
+
+                lines[astart:bstart] = ['-' + a for a in alines]
+                lines[bstart:i] = ['+' + b for b in blines]
+            alines, blines = [], []
+            astart, bstart = 0, 0
+
     for i, line in enumerate(lines):
         stripline = line
         if line and line[0] in '+-':


More information about the Mercurial-devel mailing list