D4879: fuzz: new fuzzer for cext/manifest.c

durin42 (Augie Fackler) phabricator at mercurial-scm.org
Thu Oct 4 16:56:08 EDT 2018


This revision was automatically updated to reflect the committed changes.
Closed by commit rHGa72bf685f2dd: fuzz: new fuzzer for cext/manifest.c (authored by durin42, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4879?vs=11676&id=11686

REVISION DETAIL
  https://phab.mercurial-scm.org/D4879

AFFECTED FILES
  contrib/fuzz/Makefile
  contrib/fuzz/manifest.cc
  contrib/fuzz/manifest_corpus.py
  contrib/fuzz/manifest_fuzzer.options

CHANGE DETAILS

diff --git a/contrib/fuzz/manifest_fuzzer.options b/contrib/fuzz/manifest_fuzzer.options
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/manifest_fuzzer.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+detect_leaks = 0
diff --git a/contrib/fuzz/manifest_corpus.py b/contrib/fuzz/manifest_corpus.py
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/manifest_corpus.py
@@ -0,0 +1,30 @@
+from __future__ import absolute_import, print_function
+
+import argparse
+import zipfile
+
+ap = argparse.ArgumentParser()
+ap.add_argument("out", metavar="some.zip", type=str, nargs=1)
+args = ap.parse_args()
+
+with zipfile.ZipFile(args.out[0], "w", zipfile.ZIP_STORED) as zf:
+    zf.writestr("manifest_zero",
+'''PKG-INFO\09b3ed8f2b81095a13064402e930565f083346e9a
+README\080b6e76643dcb44d4bc729e932fc464b3e36dbe3
+hg\0b6444347c629cc058d478023905cfb83b7f5bb9d
+mercurial/__init__.py\0b80de5d138758541c5f05265ad144ab9fa86d1db
+mercurial/byterange.py\017f5a9fbd99622f31a392c33ac1e903925dc80ed
+mercurial/fancyopts.py\0b6f52e23e356748c5039313d8b639cda16bf67ba
+mercurial/hg.py\023cc12f225f1b42f32dc0d897a4f95a38ddc8f4a
+mercurial/mdiff.py\0a05f65c44bfbeec6a42336cd2ff0b30217899ca3
+mercurial/revlog.py\0217bc3fde6d82c0210cf56aeae11d05a03f35b2b
+mercurial/transaction.py\09d180df101dc14ce3dd582fd998b36c98b3e39aa
+notes.txt\0703afcec5edb749cf5cec67831f554d6da13f2fb
+setup.py\0ccf3f6daf0f13101ca73631f7a1769e328b472c9
+tkmerge\03c922edb43a9c143682f7bc7b00f98b3c756ebe7
+''')
+    zf.writestr("badmanifest_shorthashes",
+                "narf\0aa\nnarf2\0aaa\n")
+    zf.writestr("badmanifest_nonull",
+                "narf\0cccccccccccccccccccccccccccccccccccccccc\n"
+                "narf2aaaaaaaaaaaaaaaaaaaa\n")
diff --git a/contrib/fuzz/manifest.cc b/contrib/fuzz/manifest.cc
new file mode 100644
--- /dev/null
+++ b/contrib/fuzz/manifest.cc
@@ -0,0 +1,75 @@
+#include <Python.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <string>
+
+extern "C" {
+
+/* TODO: use Python 3 for this fuzzing? */
+PyMODINIT_FUNC initparsers(void);
+
+static char cpypath[8192] = "\0";
+
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
+{
+        const std::string subdir = "/sanpy/lib/python2.7";
+        /* HACK ALERT: we need a full Python installation built without
+           pymalloc and with ASAN, so we dump one in
+           $OUT/sanpy/lib/python2.7. This helps us wire that up. */
+        std::string selfpath(*argv[0]);
+        std::string pypath;
+        auto pos = selfpath.rfind("/");
+        if (pos == std::string::npos) {
+                char wd[8192];
+                getcwd(wd, 8192);
+                pypath = std::string(wd) + subdir;
+        } else {
+                pypath = selfpath.substr(0, pos) + subdir;
+        }
+        strncpy(cpypath, pypath.c_str(), pypath.size());
+        setenv("PYTHONPATH", cpypath, 1);
+        Py_SetPythonHome(cpypath);
+        return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size)
+{
+        Py_InitializeEx(0);
+        initparsers();
+        PyObject *mtext =
+            PyBytes_FromStringAndSize((const char *)Data, (Py_ssize_t)Size);
+        PyObject *mainmod = PyImport_AddModule("__main__");
+        PyObject *globals = PyModule_GetDict(mainmod);
+        PyObject *locals = PyDict_New();
+        PyDict_SetItemString(locals, "mdata", mtext);
+        PyCodeObject *code =
+            (PyCodeObject *)Py_CompileString(R"py(
+from parsers import lazymanifest
+lm = lazymanifest(mdata)
+try:
+  # iterate the whole thing, which causes the code to fully parse
+  # every line in the manifest
+  list(lm.iterentries())
+  lm[b'xyzzy'] = (b'\0' * 20, 'x')
+  # do an insert, text should change
+  assert lm.text() != mdata, "insert should change text and didn't: %r %r" % (lm.text(), mdata)
+  del lm[b'xyzzy']
+  # should be back to the same
+  assert lm.text() == mdata, "delete should have restored text but didn't: %r %r" % (lm.text(), mdata)
+except Exception as e:
+  pass
+  # uncomment this print if you're editing this Python code
+  # to debug failures.
+  # print e
+)py",
+                                             "fuzzer", Py_file_input);
+        PyEval_EvalCode(code, globals, locals);
+        Py_DECREF(code);
+        Py_DECREF(locals);
+        Py_DECREF(mtext);
+        Py_Finalize();
+        return 0; // Non-zero return values are reserved for future use.
+}
+}
diff --git a/contrib/fuzz/Makefile b/contrib/fuzz/Makefile
--- a/contrib/fuzz/Makefile
+++ b/contrib/fuzz/Makefile
@@ -70,12 +70,62 @@
 	  fuzz-xdiffi.o fuzz-xprepare.o fuzz-xutils.o fuzzutil-oss-fuzz.o \
 	  -lFuzzingEngine -o $$OUT/xdiff_fuzzer
 
+# TODO use the $OUT env var instead of hardcoding /out
+/out/sanpy/bin/python:
+	cd /Python-2.7.15/ && ./configure --without-pymalloc --prefix=$$OUT/sanpy CFLAGS='-O1 -fno-omit-frame-pointer -g -fwrapv -fstack-protector-strong' LDFLAGS=-lasan  && ASAN_OPTIONS=detect_leaks=0 make && make install
+
+sanpy: /out/sanpy/bin/python
+
+manifest.o: sanpy ../../mercurial/cext/manifest.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o manifest.o ../../mercurial/cext/manifest.c
+
+charencode.o: sanpy ../../mercurial/cext/charencode.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o charencode.o ../../mercurial/cext/charencode.c
+
+parsers.o: sanpy ../../mercurial/cext/parsers.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o parsers.o ../../mercurial/cext/parsers.c
+
+dirs.o: sanpy ../../mercurial/cext/dirs.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o dirs.o ../../mercurial/cext/dirs.c
+
+pathencode.o: sanpy ../../mercurial/cext/pathencode.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o pathencode.o ../../mercurial/cext/pathencode.c
+
+revlog.o: sanpy ../../mercurial/cext/revlog.c
+	$(CC) $(CFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -I../../mercurial \
+	  -c -o revlog.o ../../mercurial/cext/revlog.c
+
+manifest_fuzzer: sanpy manifest.cc manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o
+	$(CXX) $(CXXFLAGS) `$$OUT/sanpy/bin/python-config --cflags` \
+	  -Wno-register -Wno-macro-redefined \
+	  -I../../mercurial manifest.cc \
+	  manifest.o charencode.o parsers.o dirs.o pathencode.o revlog.o \
+	  -lFuzzingEngine `$$OUT/sanpy/bin/python-config --ldflags` \
+	  -o $$OUT/manifest_fuzzer
+
+manifest_corpus.zip:
+	python manifest_corpus.py $$OUT/manifest_fuzzer_seed_corpus.zip
+
+copy_options:
+	cp *.options $$OUT
+
 clean:
 	$(RM) *.o *_fuzzer \
 	  bdiff \
 	  mpatch \
 	  xdiff
 
-oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer
+oss-fuzz: bdiff_fuzzer mpatch_fuzzer mpatch_corpus.zip xdiff_fuzzer manifest_fuzzer manifest_corpus.zip copy_options
 
-.PHONY: all clean oss-fuzz
+.PHONY: all clean oss-fuzz sanpy copy_options



To: durin42, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel


More information about the Mercurial-devel mailing list