[PATCH] testing: [sprint] [RFC] add hypothesis fuzz testing

Sat Oct 24 11:51:50 UTC 2015

# HG changeset patch
# User David R. MacIver <david at drmaciver.com>
# Date 1445687163 -3600
#      Sat Oct 24 12:46:03 2015 +0100
# Node ID 9f4fcdad9054deda9a08a08146d309882ddeca94
# Parent  a9ed5a8fc5e0554d5cb81b7206d2203cc49a2d23
testing: [sprint] [RFC] add hypothesis fuzz testing

Hypothesis a library for adding fuzzing over a range of structure
data to your test suite: http://hypothesis.readthedocs.org/en/latest/

This adds two tests using Hypothesis to the Mercurial test suite,
including a feature flag for detecting whether Hypothesis is installed
and a helper module for more natural integration with the Mercurial
test suite.

These two tests both currently fail. This should be because they are
demonstrating genuine bugs in Mercurial. In particular:

* JSON encoding from mercurial.templatefilter assumes that any character
  with a codepoint < 80 may be validly passed through as a JSON string.
  This is not correct, as characters below 40 are non-printable control
  characters.
* utf8b encoding will sometimes pass through characters that cannot be
  validly decoded as utf8b.

diff --git a/hypothesishelpers.py b/hypothesishelpers.py
new file mode 100644
--- /dev/null
+++ b/hypothesishelpers.py
@@ -0,0 +1,50 @@
+import os
+import sys
+
+from hypothesis.settings import set_hypothesis_home_dir
+import hypothesis.strategies as st
+from hypothesis import given, Settings
+
+set_hypothesis_home_dir(os.path.join(
+    os.getenv('TESTTMP'), ".hypothesis"
+))
+
+
+def check(*args, **kwargs):
+    def accept(f):
+        print(f.__name__)
+        # Workaround for https://github.com/DRMacIver/hypothesis/issues/206
+        f.__module__ = '__anon__'
+        import traceback
+        try:
+            given(*args, settings=Settings(max_examples=2000), **kwargs)(f)()
+        except Exception:
+            traceback.print_exc(file=sys.stdout)
+            sys.exit(1)
+    return accept
+
+
+def roundtrips(data, decode, encode):
+    @given(data)
+    def testroundtrips(value):
+        encoded = encode(value)
+        decoded = decode(encoded)
+        if decoded != value:
+            raise ValueError(
+                "Round trip failed: %s(%r) -> %s(%r) -> %r" % (
+                    encode.__name__, value, decode.__name__, encoded,
+                    decoded
+                ))
+    import traceback
+    try:
+        testroundtrips()
+    except Exception:
+        traceback.print_exc(file=sys.stdout)
+        sys.exit(1)
+    print("Round trip OK")
+
+
+bytestrings = (
+    st.builds(lambda s, e: s.encode(e), st.text(), st.sampled_from([
+        'utf-8', 'utf-16',
+    ]))) | st.binary()
diff --git a/tests/hghave.py b/tests/hghave.py
--- a/tests/hghave.py
+++ b/tests/hghave.py
@@ -463,3 +463,12 @@
 @check("slow", "allow slow tests")
 def has_slow():
     return os.environ.get('HGTEST_SLOW') == 'slow'
+
+ at check("hypothesis", "Is Hypothesis installed")
+def has_hypothesis():
+    try:
+        import hypothesis
+        hypothesis.given
+        return True
+    except ImportError:
+        return False
diff --git a/tests/test-fuzzing-json.t b/tests/test-fuzzing-json.t
new file mode 100644
--- /dev/null
+++ b/tests/test-fuzzing-json.t
@@ -0,0 +1,9 @@
+#require hypothesis
+
+  >>> from hypothesishelpers import *
+  >>> import mercurial.templatefilters as tf
+  >>> import json
+  >>> @check(st.text().map(lambda s: s.encode('utf-8')))
+  ... def testtfescapeproducesvalidjson(obj):
+  ...     json.loads('"' + tf.jsonescape(obj) + '"')
+  testtfescapeproducesvalidjson
diff --git a/tests/test-fuzzing-round-tripping.t b/tests/test-fuzzing-round-tripping.t
new file mode 100644
--- /dev/null
+++ b/tests/test-fuzzing-round-tripping.t
@@ -0,0 +1,8 @@
+#require hypothesis
+
+utf8b round trips
+
+  >>> from hypothesishelpers import *
+  >>> from mercurial.encoding import fromutf8b, toutf8b
+  >>> roundtrips(st.binary(), fromutf8b, toutf8b)
+  Round trip OK