[PATCH] run-tests: explicitly handle unicode when writing xunit file

Gregory Szorc gregory.szorc at gmail.com
Sun Mar 29 17:41:34 UTC 2015


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1427650883 25200
#      Sun Mar 29 10:41:23 2015 -0700
# Node ID 6172aafdb1a9310db266c8ee26e7dc3fd2d6188b
# Parent  03d1333662fb83144a7dac9de798c82902ceb30b
run-tests: explicitly handle unicode when writing xunit file

The xunit writer was passing a str to a minidom API. An implicit
.decode('ascii') was performed somewhere, causing UnicodeDecodeError
if test output contained non-ascii sequences.

This patch converts test output to utf-8 before passing it to minidom.
We use the "replace" strategy to ensure invalid utf-8 sequences get
munged into �.

diff --git a/tests/run-tests.py b/tests/run-tests.py
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -1515,9 +1515,13 @@ class TextTestRunner(unittest.TextTestRu
                 for tc, err in sorted(result.faildata.iteritems()):
                     t = doc.createElement('testcase')
                     t.setAttribute('name', tc)
                     t.setAttribute('time', '%.3f' % timesd[tc])
-                    cd = doc.createCDATASection(cdatasafe(err))
+                    # createCDATASection expects a unicode or it will convert
+                    # using default conversion rules, which will fail if
+                    # string isn't ASCII.
+                    err = cdatasafe(err).decode('utf-8', 'replace')
+                    cd = doc.createCDATASection(err)
                     t.appendChild(cd)
                     s.appendChild(t)
                 xuf.write(doc.toprettyxml(indent='  ', encoding='utf-8'))
             finally:
diff --git a/tests/test-run-tests.t b/tests/test-run-tests.t
--- a/tests/test-run-tests.t
+++ b/tests/test-run-tests.t
@@ -30,8 +30,12 @@ failing test
   > This is a noop statement so that
   > this test is still more bytes than success.
   > EOF
 
+  >>> fh = open('test-failure-unicode.t', 'wb')
+  >>> fh.write(u'  $ echo babar\u03b1\n'.encode('utf-8'))
+  >>> fh.write(u'  l\u03b5\u03b5t\n'.encode('utf-8'))
+
   $ $TESTDIR/run-tests.py --with-hg=`which hg`
   
   --- $TESTTMP/test-failure.t
   +++ $TESTTMP/test-failure.t.err
@@ -43,12 +47,23 @@ failing test
    this test is still more bytes than success.
   
   ERROR: test-failure.t output changed
   !.
+  --- $TESTTMP/test-failure-unicode.t
+  +++ $TESTTMP/test-failure-unicode.t.err
+  @@ -1,2 +1,2 @@
+     $ echo babar\xce\xb1 (esc)
+  -  l\xce\xb5\xce\xb5t (esc)
+  +  babar\xce\xb1 (esc)
+  
+  ERROR: test-failure-unicode.t output changed
+  !
   Failed test-failure.t: output changed
-  # Ran 2 tests, 0 skipped, 0 warned, 1 failed.
+  Failed test-failure-unicode.t: output changed
+  # Ran 3 tests, 0 skipped, 0 warned, 2 failed.
   python hash seed: * (glob)
   [1]
+
 test --xunit support
   $ $TESTDIR/run-tests.py --with-hg=`which hg` --xunit=xunit.xml
   
   --- $TESTTMP/test-failure.t
@@ -61,16 +76,34 @@ test --xunit support
    this test is still more bytes than success.
   
   ERROR: test-failure.t output changed
   !.
+  --- $TESTTMP/test-failure-unicode.t
+  +++ $TESTTMP/test-failure-unicode.t.err
+  @@ -1,2 +1,2 @@
+     $ echo babar\xce\xb1 (esc)
+  -  l\xce\xb5\xce\xb5t (esc)
+  +  babar\xce\xb1 (esc)
+  
+  ERROR: test-failure-unicode.t output changed
+  !
   Failed test-failure.t: output changed
-  # Ran 2 tests, 0 skipped, 0 warned, 1 failed.
+  Failed test-failure-unicode.t: output changed
+  # Ran 3 tests, 0 skipped, 0 warned, 2 failed.
   python hash seed: * (glob)
   [1]
   $ cat xunit.xml
   <?xml version="1.0" encoding="utf-8"?>
-  <testsuite errors="0" failures="1" name="run-tests" skipped="0" tests="2">
+  <testsuite errors="0" failures="2" name="run-tests" skipped="0" tests="3">
     <testcase name="test-success.t" time="*"/> (glob)
+    <testcase name="test-failure-unicode.t" time="*"> (glob)
+  <![CDATA[--- $TESTTMP/test-failure-unicode.t
+  +++ $TESTTMP/test-failure-unicode.t.err
+  @@ -1,2 +1,2 @@
+     $ echo babar\xce\xb1 (esc)
+  -  l\xce\xb5\xce\xb5t (esc)
+  +  babar\xce\xb1 (esc)
+  ]]>  </testcase>
     <testcase name="test-failure.t" time="*"> (glob)
   <![CDATA[--- $TESTTMP/test-failure.t
   +++ $TESTTMP/test-failure.t.err
   @@ -1,4 +1,4 @@
@@ -81,8 +114,10 @@ test --xunit support
    this test is still more bytes than success.
   ]]>  </testcase>
   </testsuite>
 
+  $ rm test-failure-unicode.t
+
 test for --retest
 ====================
 
   $ $TESTDIR/run-tests.py --with-hg=`which hg` --retest


More information about the Mercurial-devel mailing list