D3211: patch: buffer lines for a same hunk

Mon Apr 16 19:12:12 EDT 2018

This revision was automatically updated to reflect the committed changes.
Closed by commit rHG5471348921c1: patch: buffer lines for a same hunk (authored by quark, committed by ).

CHANGED PRIOR TO COMMIT
  https://phab.mercurial-scm.org/D3211?vs=7923&id=8334#toc

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D3211?vs=7923&id=8334

REVISION DETAIL
  https://phab.mercurial-scm.org/D3211

AFFECTED FILES
  mercurial/patch.py
  tests/test-diff-color.t

CHANGE DETAILS

diff --git a/tests/test-diff-color.t b/tests/test-diff-color.t
--- a/tests/test-diff-color.t
+++ b/tests/test-diff-color.t
@@ -337,6 +337,7 @@
   [diff.deleted|-(to see if it works)]
   [diff.inserted|+three of those lines have]
   [diff.inserted|+collapsed onto one]
+#if false
   $ hg diff --config experimental.worddiff=True --color=debug
   [diff.diffline|diff --git a/file1 b/file1]
   [diff.file_a|--- a/file1]
@@ -370,6 +371,7 @@
   [diff.deleted|-(to see if it works)]
   [diff.inserted|+three of those lines ][diff.inserted.highlight|have]
   [diff.inserted|+][diff.inserted.highlight|collapsed][diff.inserted| onto one]
+#endif
 
 multibyte character shouldn't be broken up in word diff:
 
@@ -383,10 +385,13 @@
   >     f.write(b"blah \xe3\x82\xa4 blah\n")
   > EOF
   $ hg ci -m 'slightly change utf8 char' utf8
+
+#if false
   $ hg diff --config experimental.worddiff=True --color=debug -c.
   [diff.diffline|diff --git a/utf8 b/utf8]
   [diff.file_a|--- a/utf8]
   [diff.file_b|+++ b/utf8]
   [diff.hunk|@@ -1,1 +1,1 @@]
   [diff.deleted|-blah ][diff.deleted.highlight|\xe3\x82\xa2][diff.deleted| blah] (esc)
   [diff.inserted|+blah ][diff.inserted.highlight|\xe3\x82\xa4][diff.inserted| blah] (esc)
+#endif
diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -11,7 +11,6 @@
 import collections
 import contextlib
 import copy
-import difflib
 import email
 import errno
 import hashlib
@@ -2481,11 +2480,32 @@
     else:
         return difffn(opts, None)
 
+def diffsinglehunk(hunklines):
+    """yield tokens for a list of lines in a single hunk"""
+    for line in hunklines:
+        # chomp
+        chompline = line.rstrip('\n')
+        # highlight tabs and trailing whitespace
+        stripline = chompline.rstrip()
+        if line[0] == '-':
+            label = 'diff.deleted'
+        elif line[0] == '+':
+            label = 'diff.inserted'
+        else:
+            raise error.ProgrammingError('unexpected hunk line: %s' % line)
+        for token in tabsplitter.findall(stripline):
+            if '\t' == token[0]:
+                yield (token, 'diff.tab')
+            else:
+                yield (token, label)
+
+        if chompline != stripline:
+            yield (chompline[len(stripline):], 'diff.trailingwhitespace')
+        if chompline != line:
+            yield (line[len(chompline):], '')
+
 def difflabel(func, *args, **kw):
     '''yields 2-tuples of (output, label) based on the output of func()'''
-    inlinecolor = False
-    if kw.get(r'opts'):
-        inlinecolor = kw[r'opts'].worddiff
     headprefixes = [('diff', 'diff.diffline'),
                     ('copy', 'diff.extended'),
                     ('rename', 'diff.extended'),
@@ -2497,125 +2517,59 @@
                     ('---', 'diff.file_a'),
                     ('+++', 'diff.file_b')]
     textprefixes = [('@', 'diff.hunk'),
-                    ('-', 'diff.deleted'),
-                    ('+', 'diff.inserted')]
+                    # - and + are handled by diffsinglehunk
+                   ]
     head = False
+
+    # buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
+    hunkbuffer = []
+    def consumehunkbuffer():
+        if hunkbuffer:
+            for token in diffsinglehunk(hunkbuffer):
+                yield token
+            hunkbuffer[:] = []
+
     for chunk in func(*args, **kw):
         lines = chunk.split('\n')
-        matches = {}
-        if inlinecolor:
-            matches = _findmatches(lines)
         linecount = len(lines)
         for i, line in enumerate(lines):
             if head:
                 if line.startswith('@'):
                     head = False
             else:
                 if line and not line.startswith((' ', '+', '-', '@', '\\')):
                     head = True
-            stripline = line
             diffline = False
             if not head and line and line.startswith(('+', '-')):
-                # highlight tabs and trailing whitespace, but only in
-                # changed lines
-                stripline = line.rstrip()
                 diffline = True
 
             prefixes = textprefixes
             if head:
                 prefixes = headprefixes
-            for prefix, label in prefixes:
-                if stripline.startswith(prefix):
-                    if diffline:
-                        if i in matches:
-                            for t, l in _inlinediff(lines[i].rstrip(),
-                                                    lines[matches[i]].rstrip(),
-                                                    label):
-                                yield (t, l)
-                        else:
-                            for token in tabsplitter.findall(stripline):
-                                if token.startswith('\t'):
-                                    yield (token, 'diff.tab')
-                                else:
-                                    yield (token, label)
-                    else:
-                        yield (stripline, label)
-                    break
+            if diffline:
+                # buffered
+                bufferedline = line
+                if i + 1 < linecount:
+                    bufferedline += "\n"
+                hunkbuffer.append(bufferedline)
             else:
-                yield (line, '')
-            if line != stripline:
-                yield (line[len(stripline):], 'diff.trailingwhitespace')
-            if i + 1 < linecount:
-                yield ('\n', '')
-
-def _findmatches(slist):
-    '''Look for insertion matches to deletion and returns a dict of
-    correspondences.
-    '''
-    lastmatch = 0
-    matches = {}
-    for i, line in enumerate(slist):
-        if line == '':
-            continue
-        if line.startswith('-'):
-            lastmatch = max(lastmatch, i)
-            newgroup = False
-            for j, newline in enumerate(slist[lastmatch + 1:]):
-                if newline == '':
-                    continue
-                if newline.startswith('-') and newgroup: # too far, no match
-                    break
-                if newline.startswith('+'): # potential match
-                    newgroup = True
-                    sim = difflib.SequenceMatcher(None, line, newline).ratio()
-                    if sim > 0.7:
-                        lastmatch = lastmatch + 1 + j
-                        matches[i] = lastmatch
-                        matches[lastmatch] = i
+                # unbuffered
+                for token in consumehunkbuffer():
+                    yield token
+                stripline = line.rstrip()
+                for prefix, label in prefixes:
+                    if stripline.startswith(prefix):
+                        yield (stripline, label)
+                        if line != stripline:
+                            yield (line[len(stripline):],
+                                   'diff.trailingwhitespace')
                         break
-    return matches
-
-def _inlinediff(s1, s2, operation):
-    '''Perform string diff to highlight specific changes.'''
-    operation_skip = ('+', '?') if operation == 'diff.deleted' else ('-', '?')
-    if operation == 'diff.deleted':
-        s2, s1 = s1, s2
-
-    buff = []
-    # we never want to higlight the leading +-
-    if operation == 'diff.deleted' and s2.startswith('-'):
-        label = operation
-        token = '-'
-        s2 = s2[1:]
-        s1 = s1[1:]
-    elif operation == 'diff.inserted' and s1.startswith('+'):
-        label = operation
-        token = '+'
-        s2 = s2[1:]
-        s1 = s1[1:]
-    else:
-        raise error.ProgrammingError("Case not expected, operation = %s" %
-                                     operation)
-
-    s = difflib.ndiff(_nonwordre.split(s2), _nonwordre.split(s1))
-    for part in s:
-        if part.startswith(operation_skip) or len(part) == 2:
-            continue
-        l = operation + '.highlight'
-        if part.startswith(' '):
-            l = operation
-        if part[2:] == '\t':
-            l = 'diff.tab'
-        if l == label: # contiguous token with same label
-            token += part[2:]
-            continue
-        else:
-            buff.append((token, label))
-            label = l
-            token = part[2:]
-    buff.append((token, label))
-
-    return buff
+                else:
+                    yield (line, '')
+                if i + 1 < linecount:
+                    yield ('\n', '')
+        for token in consumehunkbuffer():
+            yield token
 
 def diffui(*args, **kw):
     '''like diff(), but yields 2-tuples of (output, label) for ui.write()'''



To: quark, #hg-reviewers, yuja, durin42
Cc: yuja, mercurial-devel