D2577: mdiff: prefer xdiff for diff calculation
ryanmce (Ryan McElroy)
phabricator at mercurial-scm.org
Sat Mar 3 00:25:56 UTC 2018
ryanmce created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
Let's switch to xdiff for its better diff quality and faster performance!
bdiff is still used as a fallback for cases xdiff isn't built, or the pure
Python version.
TEST PLAN
Added the "patience" test case mentioned in previous commit. It fails
with a huge diff output before this change.
I expected some annotate/diff output changes due to diff blocks being
shifted around. Let's see what sandcastle says. I'll make adjustments
accordingly.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D2577
AFFECTED FILES
mercurial/mdiff.py
tests/hghave.py
tests/test-diff-antipatience.t
CHANGE DETAILS
diff --git a/tests/test-diff-antipatience.t b/tests/test-diff-antipatience.t
new file mode 100644
--- /dev/null
+++ b/tests/test-diff-antipatience.t
@@ -0,0 +1,41 @@
+#require xdiff
+
+Test case that makes use of the weakness of patience diff algorithm
+
+ $ hg init
+ >>> open('a', 'w').write('\n'.join(list('a' + 'x' * 300 + 'u' + 'x' * 700 + 'a\n')))
+ $ hg commit -m 1 -A a
+ >>> open('a', 'w').write('\n'.join(list('b' + 'x' * 700 + 'u' + 'x' * 300 + 'b\n')))
+ $ hg diff
+ diff -r 6c45a8fe8cb6 a
+ --- a/a Thu Jan 01 00:00:00 1970 +0000
+ +++ b/a Thu Jan 01 00:00:00 1970 +0000
+ @@ -1,4 +1,4 @@
+ -a
+ +b
+ x
+ x
+ x
+ @@ -299,7 +299,6 @@
+ x
+ x
+ x
+ -u
+ x
+ x
+ x
+ @@ -700,6 +699,7 @@
+ x
+ x
+ x
+ +u
+ x
+ x
+ x
+ @@ -1000,5 +1000,5 @@
+ x
+ x
+ x
+ -a
+ +b
+
diff --git a/tests/hghave.py b/tests/hghave.py
--- a/tests/hghave.py
+++ b/tests/hghave.py
@@ -708,3 +708,11 @@
# libfuzzer is new in clang 6
return int(mat.group(1)) > 5
return False
+
+ at check("xdiff", "xdiff algorithm")
+def has_xdiff():
+ try:
+ from mercurial.cext import xdiff
+ return xdiff.blocks('', '') == [(0, 0, 0, 0)]
+ except ImportError:
+ return False
diff --git a/mercurial/mdiff.py b/mercurial/mdiff.py
--- a/mercurial/mdiff.py
+++ b/mercurial/mdiff.py
@@ -25,13 +25,18 @@
bdiff = policy.importmod(r'bdiff')
mpatch = policy.importmod(r'mpatch')
-blocks = bdiff.blocks
fixws = bdiff.fixws
patches = mpatch.patches
patchedsize = mpatch.patchedsize
textdiff = bdiff.bdiff
splitnewlines = bdiff.splitnewlines
+try:
+ from .cext import xdiff
+ blocks = xdiff.blocks
+except ImportError:
+ blocks = bdiff.blocks
+
class diffopts(object):
'''context is the number of context lines
text treats all files as text
@@ -200,7 +205,7 @@
if opts.ignorews or opts.ignorewsamount or opts.ignorewseol:
text1 = wsclean(opts, text1, False)
text2 = wsclean(opts, text2, False)
- diff = bdiff.blocks(text1, text2)
+ diff = blocks(text1, text2)
for i, s1 in enumerate(diff):
# The first match is special.
# we've either found a match starting at line 0 or a match later
@@ -508,7 +513,7 @@
# similar to difflib.SequenceMatcher.get_matching_blocks
def get_matching_blocks(a, b):
- return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)]
+ return [(d[0], d[2], d[1] - d[0]) for d in blocks(a, b)]
def trivialdiffheader(length):
return struct.pack(">lll", 0, 0, length) if length else ''
To: ryanmce, #hg-reviewers
Cc: mercurial-devel
More information about the Mercurial-devel
mailing list