[PATCH] Highlight word differences in diffs
Martin Geisler
mg at lazybytes.net
Sat May 30 13:29:34 CDT 2009
# HG changeset patch
# User Martin Geisler <mg at lazybytes.net>
# Date 1243707986 -7200
# Node ID 964cf1c47c0df37da99540b83a7aefed9b34a305
# Parent 27cc4fa6722d97fa0d23bc990906d4d47ff43cff
Highlight word differences in diffs
I wondered if it would be difficult to add word diffs to the color
extension, and it turned out to be easier than I thought easy. This
patch shows a rough prototype...
I decided that a word diff is essentially a diff of a diff hunk. If we
start with the text (wrapped at 25 characters to better illustrate
word wrapping):
This is a bit of text. It
is only there to test the
new word diff feature.
and change it to
This is a small paragraph
of text. It is only there
to test the new word diff
feature.
then the diff is
diff --git a/a.txt b/a.txt
--- a/a.txt
+++ b/a.txt
@@ -1,3 +1,4 @@
-This is a bit of text. It
-is only there to test the
-new word diff feature.
+This is a small paragraph
+of text. It is only there
+to test the new word diff
+feature.
And the diff of the "-"-lines and "+"-lines is:
--- a
+++ b
@@ -1,7 +1,8 @@
This
is
a
-bit
+small
+paragraph
of
text
It
I use this information to underline the changes in the real diff:
diff --git a/a.txt b/a.txt
--- a/a.txt
+++ b/a.txt
@@ -1,3 +1,4 @@
-This is a _bit_ of text. It
-is only there to test the
-new word diff feature.
+This is a _small_ _paragraph_
+of text. It is only there
+to test the new word diff
+feature.
Things that are missing:
* adjacent changes should probably be merged (like "_small_" and
"_paragraph_" above)
* it is not clear if splitting on \W+ is the best choice
* underlining can look funny, especially when a large amount of
inserted text is underlined
* changes in the stuff between words is not highlighted. So if you
insert a comma, it wont be highlighted.
* this feature would look much better in hgweb...
* probably some more :-)
diff --git a/hgext/wdiff.py b/hgext/wdiff.py
new file mode 100644
--- /dev/null
+++ b/hgext/wdiff.py
@@ -0,0 +1,115 @@
+# wdiff - highlight changed words in diffs
+#
+# Copyright 2009 Matt Mackall <mpm at selenic.com> and others
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2, incorporated herein by reference.
+
+"""highlight changed words in diffs"""
+
+import re
+
+from mercurial import cmdutil, patch, bdiff, extensions
+from mercurial.hgweb import webcommands
+from mercurial.i18n import _
+
+sgr = {'negative': 7,
+ 'positive': 27,
+ 'underline': 4,
+ 'nounderline': 24}
+
+def gnuwdiff(s, added):
+ if added:
+ return "{+%s+}" % s
+ else:
+ return "[-%s-]" % s
+
+def termtoggle(s, start, stop):
+ return "\033[%dm%s\033[%dm" % (start, s, stop)
+
+def invert(s, added):
+ return termtoggle(s, sgr['negative'], sgr['positive'])
+
+def underline(s, added):
+ return termtoggle(s, sgr['underline'], sgr['nounderline'])
+
+def htmldiff(s, added):
+ return (added and "<ins>%s</ins>" or "<del>%s</del>") % s
+
+# Global highlighter used by wdiff.
+highlighter = invert
+
+def wdiff(a, b):
+ # Split a and b into words and non-words. The even elements will
+ # be words, the odd elements will be whitespace.
+ apieces = re.split(r'(\s+)', a)
+ bpieces = re.split(r'(\s+)', b)
+
+ # Put the words on separate lines.
+ atext = '\n'.join(apieces[::2])
+ btext = '\n'.join(bpieces[::2])
+
+ def highlight(pieces, low, high, added):
+ """Highlight the segment from pieces[low:high].
+
+ This is done in one go and pieces[low+1:high] are set to empty
+ strings since these words are now part of pieces[low]. Each
+ line in the segment is highlighted seperately to avoid
+ coloring line numbers in hgweb."""
+ if low < high:
+ lines = ''.join(pieces[low:high]).split('\n')
+ pieces[low] = '\n'.join([highlighter(l, added) for l in lines])
+ for i in range(low + 1, high):
+ pieces[i] = ''
+
+ s = (0, 0, 0, 0)
+ blocks = bdiff.blocks(atext, btext)
+ for t in blocks:
+ highlight(apieces, 2 * s[1], 2 * t[0] - 1, added=False)
+ highlight(bpieces, 2 * s[3], 2 * t[2] - 1, added=True)
+ s = t
+
+ a = ''.join(apieces)
+ b = ''.join(bpieces)
+ return a, b
+
+def highlightdiff(diff):
+ lines = diff.split('\n')
+ alines, blines = [], []
+ astart, bstart = 0, 0
+ for i, line in enumerate(lines):
+ if line and line[0] == '-' and not line.startswith('---'):
+ if not astart:
+ astart = i
+ alines.append(line[1:])
+ if line and line[0] == '+' and not line.startswith('+++'):
+ if not bstart:
+ bstart = i
+ blines.append(line[1:])
+ if line and line[0] == ' ' or i == len(lines) - 1:
+ if alines and blines:
+ atext, btext = wdiff('\n'.join(alines), '\n'.join(blines))
+ alines = atext.split('\n')
+ blines = btext.split('\n')
+ lines[astart:bstart] = ['-' + a for a in alines]
+ lines[bstart:i] = ['+' + b for b in blines]
+ alines, blines = [], []
+ astart, bstart = 0, 0
+ return '\n'.join(lines)
+
+def wrapdiff(orig, *args, **kwargs):
+ chunks = orig(*args, **kwargs)
+ diff = ''.join(list(chunks))
+ yield highlightdiff(diff)
+
+def wraphgweb(orig, web, req, tmpl):
+ global highlighter
+ highlighter = htmldiff
+ for field in 'difflineplus', 'difflineminus':
+ tmpl.cache[field] = tmpl.cache[field].replace('line|escape', 'line')
+ return orig(web, req, tmpl)
+
+def extsetup():
+ extensions.wrapfunction(patch, 'diff', wrapdiff)
+ extensions.wrapfunction(webcommands, 'diff', wraphgweb)
+ extensions.wrapfunction(webcommands, 'rev', wraphgweb)
diff --git a/templates/static/style-paper.css b/templates/static/style-paper.css
--- a/templates/static/style-paper.css
+++ b/templates/static/style-paper.css
@@ -142,6 +142,9 @@
font-size: 100%;
}
+ins { background-color: #CFC; }
+del { background-color: #FCC; }
+
/* log and tags tables */
.bigtable {
border-bottom: 1px solid #999;
More information about the Mercurial-devel
mailing list