D4879: fuzz: new fuzzer for cext/manifest.c

durin42 (Augie Fackler) phabricator at mercurial-scm.org
Thu Oct 4 15:23:54 UTC 2018


durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This is a bit messy, because lazymanifest is tightly coupled to the
  cpython API for performance reasons. As a result, we have to build a
  whole Python without pymalloc (so ASAN can help us out) and link
  against that. Then we have to use an embedded Python interpreter. We
  could manually drive the lazymanifest in C from that point, but
  experimentally just using PyEval_EvalCode isn't really any slower so
  we may as well do that and write the innermost guts of the fuzzer in
  Python.
  
  Leak detection is currently disabled for this fuzzer because there are
  a few global-lifetime things in our extensions that we more or less
  intentionally leak and I didn't want to take the detour to work around
  that for now.
  
  This should not be pushed to our repo until
  https://github.com/google/oss-fuzz/pull/1853 is merged, as this
  depends on having the Python tarball around.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4879

AFFECTED FILES
  contrib/fuzz/Makefile
  contrib/fuzz/manifest.cc
  contrib/fuzz/manifest_corpus.py
  contrib/fuzz/manifest_fuzzer.options

CHANGE DETAILS

diff --git a/contrib/fuzz/manifest_fuzzer.options b/contrib/fuzz/manifest_fuzzer.options
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/manifest_fuzzer.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+detect_leaks = 0
diff --git a/contrib/fuzz/manifest_corpus.py b/contrib/fuzz/manifest_corpus.py
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/manifest_corpus.py
@@ -0,0 +1,30 @@
+from __future__ import absolute_import, print_function
+
+import argparse
+import zipfile
+
+ap = argparse.ArgumentParser()
+ap.add_argument("out", metavar="some.zip", type=str, nargs=1)
+args = ap.parse_args()
+
+with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf:
+    zf.writestr("manifest_zero",
+'''PKG-INFO\09b3ed8f2b81095a13064402e930565f083346e9a
+README\080b6e76643dcb44d4bc729e932fc464b3e36dbe3
+hg\0b6444347c629cc058d478023905cfb83b7f5bb9d
+mercurial/__init__.py\0b80de5d138758541c5f05265ad144ab9fa86d1db
+mercurial/byterange.py\017f5a9fbd99622f31a392c33ac1e903925dc80ed
+mercurial/fancyopts.py\0b6f52e23e356748c5039313d8b639cda16bf67ba
+mercurial/hg.py\023cc12f225f1b42f32dc0d897a4f95a38ddc8f4a
+mercurial/mdiff.py\0a05f65c44bfbeec6a42336cd2ff0b30217899ca3
+mercurial/revlog.py\0217bc3fde6d82c0210cf56aeae11d05a03f35b2b
+mercurial/transaction.py\09d180df101dc14ce3dd582fd998b36c98b3e39aa
+notes.txt\0703afcec5edb749cf5cec67831f554d6da13f2fb
+setup.py\0ccf3f6daf0f13101ca73631f7a1769e328b472c9
+tkmerge\03c922edb43a9c143682f7bc7b00f98b3c756ebe7
+''')
+    zf.writestr("badmanifest_shorthashes",
+                "narf\0aa\nnarf2\0aaa\n")
+    zf.writestr("badmanifest_nonull",
+                "narf\0cccccccccccccccccccccccccccccccccccccccc\n"
+                "narf2aaaaaaaaaaaaaaaaaaaa\n")
diff --git a/contrib/fuzz/manifest.cc b/contrib/fuzz/manifest.cc
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/manifest.cc
@@ -0,0 +1,75 @@
+#include <Python.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <string>
+
+extern "C" {
+
+/* TODO: use Python 3 for this fuzzing? */
+PyMODINIT_FUNC initparsers(void);
+
+static char cpypath[8192] = "\0";
+
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
+{
+        const std::string subdir = "/sanpy/lib/python2.7";
+        /* HACK ALERT: we need a full Python installation built without
+           pymalloc and with ASAN, so we dump one in
+           $OUT/sanpy/lib/python2.7. This helps us wire that up. */
+        std::string selfpath(*argv[0]);
+        std::string pypath;
+        auto pos = selfpath.rfind("/");
+        if (pos == std::string::npos) {
+                char wd[8192];
+                getcwd(wd, 8192);
+                pypath = std::string(wd) + subdir;
+        } else {
+                pypath = selfpath.substr(0, pos) + subdir;
+        }
+        strncpy(cpypath, pypath.c_str(), pypath.size());
+        setenv("PYTHONPATH", cpypath, 1);
+        Py_SetPythonHome(cpypath);
+        return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
+{
+        Py_InitializeEx(0);
+        initparsers();
+        PyObject *mtext =
+            PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
+        PyObject *mainmod = PyImport_AddModule("__main__");
+        PyObject *globals = PyModule_GetDict(mainmod);
+        PyObject *locals = PyDict_New();
+        PyDict_SetItemString(locals, "mdata", mtext);
+        PyCodeObject *code =
+            (PyCodeObject *)Py_CompileString(R"py(
+from parsers import lazymanifest
+lm = lazymanifest(mdata)
+try:
+  # iterate the whole thing, which causes the code to fully parse
+  # every line in the manifest
+  list(lm.iterentries())
+  lm[b'xyzzy'] = (b'\0' * 20, 'x')
+  # do an insert, text should change
+  assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata)
+  del lm[b'xyzzy']
+  # should be back to the same
+  assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata)
+except Exception as e:
+  pass
+  # uncomment this print if you're editing this Python code
+  # to debug failures.
+  # print e
+)py",
+                                             "fuzzer", Py_file_input);
+        PyEval_EvalCode(code, globals, locals);
+        Py_DECREF(code);
+        Py_DECREF(locals);
+        Py_DECREF(mtext);
+        Py_Finalize();
+        return 0; // Non-zero return values are reserved for future use.
+}
+}
diff --git a/contrib/fuzz/Makefile b/contrib/fuzz/Makefile
--- a/contrib/fuzz/Makefile
+++ b/contrib/fuzz/Makefile
@@ -70,12 +70,62 @@
 	  fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o \
 	  -lFuzzingEngine -o $$OUT/xdiff_fuzzer
 
+# TODO use the $OUT env var instead of hardcoding /out
+/out/sanpy/bin/python:
+	cd /Python-2.7.15/ && ./configure --without-pymalloc --prefix=$$OUT/sanpy CFLAGS='-O1 -fno-omit-frame-pointer -g -fwrapv -fstack-protector-strong' LDFLAGS=-lasan  && ASAN_OPTIONS=detect_leaks=0 make && make install
+
+sanpy: /out/sanpy/bin/python
+
+manifest.o: sanpy ../../mercurial/cext/manifest.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o manifest.o ../../mercurial/cext/manifest.c
+
+charencode.o: sanpy ../../mercurial/cext/charencode.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o charencode.o ../../mercurial/cext/charencode.c
+
+parsers.o: sanpy ../../mercurial/cext/parsers.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o parsers.o ../../mercurial/cext/parsers.c
+
+dirs.o: sanpy ../../mercurial/cext/dirs.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o dirs.o ../../mercurial/cext/dirs.c
+
+pathencode.o: sanpy ../../mercurial/cext/pathencode.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o pathencode.o ../../mercurial/cext/pathencode.c
+
+revlog.o: sanpy ../../mercurial/cext/revlog.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o revlog.o ../../mercurial/cext/revlog.c
+
+manifest_fuzzer: sanpy manifest.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o
+	$(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -Wno-register -Wno-macro-redefined \
+	  -I../../mercurial manifest.cc \
+	  manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o \
+	  -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \
+	  -o $$OUT/manifest_fuzzer
+
+manifest_corpus.zip:
+	python manifest_corpus.py $$OUT/manifest_fuzzer_seed_corpus.zip
+
+copy_options:
+	cp *.options $$OUT
+
 clean:
 	$(RM) *.o *_fuzzer \
 	  bdiff \
 	  mpatch \
 	  xdiff
 
-oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer
+oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer manifest_fuzzer manifest_corpus.zip copy_options
 
-.PHONY: all clean oss-fuzz
+.PHONY: all clean oss-fuzz sanpy copy_options



To: durin42, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list