[PATCH] Highlight word differences in diffs

Martin Geisler mg at lazybytes.net
Sat May 30 13:29:34 CDT 2009


# HG changeset patch
# User Martin Geisler <mg at lazybytes.net>
# Date 1243707986 -7200
# Node ID 964cf1c47c0df37da99540b83a7aefed9b34a305
# Parent  27cc4fa6722d97fa0d23bc990906d4d47ff43cff
Highlight word differences in diffs

I wondered if it would be difficult to add word diffs to the color
extension, and it turned out to be easier than I thought easy. This
patch shows a rough prototype...

I decided that a word diff is essentially a diff of a diff hunk. If we
start with the text (wrapped at 25 characters to better illustrate
word wrapping):

  This is a bit of text. It
  is only there to test the
  new word diff feature.

and change it to

  This is a small paragraph
  of text. It is only there
  to test the new word diff
  feature.

then the diff is

  diff --git a/a.txt b/a.txt
  --- a/a.txt
  +++ b/a.txt
  @@ -1,3 +1,4 @@
  -This is a bit of text. It
  -is only there to test the
  -new word diff feature.
  +This is a small paragraph
  +of text. It is only there
  +to test the new word diff
  +feature.

And the diff of the "-"-lines and "+"-lines is:

  --- a
  +++ b
  @@ -1,7 +1,8 @@
   This
   is
   a
  -bit
  +small
  +paragraph
   of
   text
   It

I use this information to underline the changes in the real diff:

  diff --git a/a.txt b/a.txt
  --- a/a.txt
  +++ b/a.txt
  @@ -1,3 +1,4 @@
  -This is a _bit_ of text. It
  -is only there to test the
  -new word diff feature.
  +This is a _small_ _paragraph_
  +of text. It is only there
  +to test the new word diff
  +feature.

Things that are missing:

* adjacent changes should probably be merged (like "_small_" and
  "_paragraph_" above)

* it is not clear if splitting on \W+ is the best choice

* underlining can look funny, especially when a large amount of
  inserted text is underlined

* changes in the stuff between words is not highlighted. So if you
  insert a comma, it wont be highlighted.

* this feature would look much better in hgweb...

* probably some more :-)

diff --git a/hgext/wdiff.py b/hgext/wdiff.py
new file mode 100644
--- /dev/null
+++ b/hgext/wdiff.py
@@ -0,0 +1,115 @@
+# wdiff - highlight changed words in diffs
+#
+# Copyright 2009 Matt Mackall <mpm at selenic.com> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2, incorporated herein by reference.
+
+"""highlight changed words in diffs"""
+
+import re
+
+from mercurial import cmdutil, patch, bdiff, extensions
+from mercurial.hgweb import webcommands
+from mercurial.i18n import _
+
+sgr = {'negative': 7,
+       'positive': 27,
+       'underline': 4,
+       'nounderline': 24}
+
+def gnuwdiff(s, added):
+    if added:
+        return "{+%s+}" % s
+    else:
+        return "[-%s-]" % s
+
+def termtoggle(s, start, stop):
+    return "\033[%dm%s\033[%dm" % (start, s, stop)
+
+def invert(s, added):
+    return termtoggle(s, sgr['negative'], sgr['positive'])
+
+def underline(s, added):
+    return termtoggle(s, sgr['underline'], sgr['nounderline'])
+
+def htmldiff(s, added):
+    return (added and "<ins>%s</ins>" or "<del>%s</del>") % s
+
+# Global highlighter used by wdiff.
+highlighter = invert
+
+def wdiff(a, b):
+    # Split a and b into words and non-words. The even elements will
+    # be words, the odd elements will be whitespace.
+    apieces = re.split(r'(\s+)', a)
+    bpieces = re.split(r'(\s+)', b)
+
+    # Put the words on separate lines.
+    atext = '\n'.join(apieces[::2])
+    btext = '\n'.join(bpieces[::2])
+
+    def highlight(pieces, low, high, added):
+        """Highlight the segment from pieces[low:high].
+
+        This is done in one go and pieces[low+1:high] are set to empty
+        strings since these words are now part of pieces[low]. Each
+        line in the segment is highlighted seperately to avoid
+        coloring line numbers in hgweb."""
+        if low < high:
+            lines = ''.join(pieces[low:high]).split('\n')
+            pieces[low] = '\n'.join([highlighter(l, added) for l in lines])
+            for i in range(low + 1, high):
+                pieces[i] = ''
+
+    s = (0, 0, 0, 0)
+    blocks = bdiff.blocks(atext, btext)
+    for t in blocks:
+        highlight(apieces, 2 * s[1], 2 * t[0] - 1, added=False)
+        highlight(bpieces, 2 * s[3], 2 * t[2] - 1, added=True)
+        s = t
+
+    a = ''.join(apieces)
+    b = ''.join(bpieces)
+    return a, b
+
+def highlightdiff(diff):
+    lines = diff.split('\n')
+    alines, blines = [], []
+    astart, bstart = 0, 0
+    for i, line in enumerate(lines):
+        if line and line[0] == '-' and not line.startswith('---'):
+            if not astart:
+                astart = i
+            alines.append(line[1:])
+        if line and line[0] == '+' and not line.startswith('+++'):
+            if not bstart:
+                bstart = i
+            blines.append(line[1:])
+        if line and line[0] == ' ' or i == len(lines) - 1:
+            if alines and blines:
+                atext, btext = wdiff('\n'.join(alines), '\n'.join(blines))
+                alines = atext.split('\n')
+                blines = btext.split('\n')
+                lines[astart:bstart] = ['-' + a for a in alines]
+                lines[bstart:i] = ['+' + b for b in blines]
+            alines, blines = [], []
+            astart, bstart = 0, 0
+    return '\n'.join(lines)
+
+def wrapdiff(orig, *args, **kwargs):
+    chunks = orig(*args, **kwargs)
+    diff = ''.join(list(chunks))
+    yield highlightdiff(diff)
+
+def wraphgweb(orig, web, req, tmpl):
+    global highlighter
+    highlighter = htmldiff
+    for field in 'difflineplus', 'difflineminus':
+        tmpl.cache[field] = tmpl.cache[field].replace('line|escape', 'line')
+    return orig(web, req, tmpl)
+
+def extsetup():
+    extensions.wrapfunction(patch, 'diff', wrapdiff)
+    extensions.wrapfunction(webcommands, 'diff', wraphgweb)
+    extensions.wrapfunction(webcommands, 'rev', wraphgweb)
diff --git a/templates/static/style-paper.css b/templates/static/style-paper.css
--- a/templates/static/style-paper.css
+++ b/templates/static/style-paper.css
@@ -142,6 +142,9 @@
   font-size: 100%;
 }
 
+ins { background-color: #CFC; }
+del { background-color: #FCC; }
+
 /* log and tags tables */
 .bigtable {
   border-bottom: 1px solid #999;


More information about the Mercurial-devel mailing list