D7592: fuzz: add support for fuzzing under either Python 2 or 3

durin42 (Augie Fackler) phabricator at mercurial-scm.org
Tue Dec 10 03:21:53 UTC 2019


durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This was more of a hairball than I hoped, but it appears to work. The hg-py3
  branch of my oss-fuzz fork on github has the remaining changes to switch us to
  Python 3, but we may as well retain Python 2 fuzzing support for at least a
  little while.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7592

AFFECTED FILES
  contrib/fuzz/dirs.cc
  contrib/fuzz/dirstate.cc
  contrib/fuzz/fm1readmarkers.cc
  contrib/fuzz/fncache.cc
  contrib/fuzz/jsonescapeu8fast.cc
  contrib/fuzz/manifest.cc
  contrib/fuzz/pyutil.cc
  contrib/fuzz/pyutil.h
  contrib/fuzz/revlog.cc

CHANGE DETAILS

diff --git a/contrib/fuzz/revlog.cc b/contrib/fuzz/revlog.cc
--- a/contrib/fuzz/revlog.cc
+++ b/contrib/fuzz/revlog.cc
@@ -9,16 +9,15 @@
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import parse_index2
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 for inline in (True, False):
     try:
-        index, cache = parse_index2(data, inline)
+        index, cache = parsers.parse_index2(data, inline)
         index.slicechunktodensity(list(range(len(index))), 0.5, 262144)
         index.stats()
         index.findsnapshots({}, 0)
@@ -35,7 +34,7 @@
         # to debug failures.
         # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/pyutil.h b/contrib/fuzz/pyutil.h
--- a/contrib/fuzz/pyutil.h
+++ b/contrib/fuzz/pyutil.h
@@ -1,5 +1,11 @@
 #include <Python.h>
 
+#if PY_MAJOR_VERSION >= 3
+#define PYCODETYPE PyObject
+#else
+#define PYCODETYPE PyCodeObject
+#endif
+
 namespace contrib
 {
 
diff --git a/contrib/fuzz/pyutil.cc b/contrib/fuzz/pyutil.cc
--- a/contrib/fuzz/pyutil.cc
+++ b/contrib/fuzz/pyutil.cc
@@ -6,17 +6,26 @@
 namespace contrib
 {
 
+#if PY_MAJOR_VERSION >= 3
+#define HG_FUZZER_PY3 1
+PyMODINIT_FUNC PyInit_parsers(void);
+#else
+PyMODINIT_FUNC initparsers(void);
+#endif
+
 static char cpypath[8192] = "\0";
 
 static PyObject *mainmod;
 static PyObject *globals;
 
-/* TODO: use Python 3 for this fuzzing? */
-PyMODINIT_FUNC initparsers(void);
-
 void initpy(const char *cselfpath)
 {
+#ifdef HG_FUZZER_PY3
+	const std::string subdir = "/sanpy/lib/python3.7";
+#else
 	const std::string subdir = "/sanpy/lib/python2.7";
+#endif
+
 	/* HACK ALERT: we need a full Python installation built without
 	   pymalloc and with ASAN, so we dump one in
 	   $OUT/sanpy/lib/python2.7. This helps us wire that up. */
@@ -39,11 +48,24 @@
 	setenv("PYTHONNOUSERSITE", "1", 1);
 	/* prevent Python from looking up users in the fuzz environment */
 	setenv("PYTHONUSERBASE", cpypath, 1);
+#ifdef HG_FUZZER_PY3
+	std::wstring wcpypath(pypath.begin(), pypath.end());
+	Py_SetPythonHome(wcpypath.c_str());
+#else
 	Py_SetPythonHome(cpypath);
+#endif
 	Py_InitializeEx(0);
 	mainmod = PyImport_AddModule("__main__");
 	globals = PyModule_GetDict(mainmod);
+
+#ifdef HG_FUZZER_PY3
+	PyObject *mod = PyInit_parsers();
+#else
 	initparsers();
+	PyObject *mod = PyImport_ImportModule("parsers");
+#endif
+
+	PyDict_SetItemString(globals, "parsers", mod);
 }
 
 PyObject *pyglobals()
diff --git a/contrib/fuzz/manifest.cc b/contrib/fuzz/manifest.cc
--- a/contrib/fuzz/manifest.cc
+++ b/contrib/fuzz/manifest.cc
@@ -9,15 +9,14 @@
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import lazymanifest
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
-  lm = lazymanifest(mdata)
+  lm = parsers.lazymanifest(mdata)
   # iterate the whole thing, which causes the code to fully parse
   # every line in the manifest
   for e, _, _ in lm.iterentries():
@@ -41,7 +40,7 @@
   # to debug failures.
   # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/jsonescapeu8fast.cc b/contrib/fuzz/jsonescapeu8fast.cc
--- a/contrib/fuzz/jsonescapeu8fast.cc
+++ b/contrib/fuzz/jsonescapeu8fast.cc
@@ -11,23 +11,21 @@
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import jsonescapeu8fast
-
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
-    jsonescapeu8fast(data, paranoid)
+    parsers.jsonescapeu8fast(data, paranoid)
 except Exception as e:
     pass
     # uncomment this print if you're editing this Python code
     # to debug failures.
     # print(e)
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	if (!code) {
 		std::cerr << "failed to compile Python code!" << std::endl;
 	}
diff --git a/contrib/fuzz/fncache.cc b/contrib/fuzz/fncache.cc
--- a/contrib/fuzz/fncache.cc
+++ b/contrib/fuzz/fncache.cc
@@ -10,29 +10,20 @@
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import (
-    isasciistr,
-    asciilower,
-    asciiupper,
-    encodedir,
-    pathencode,
-    lowerencode,
-)
-
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
     for fn in (
-        isasciistr,
-        asciilower,
-        asciiupper,
-        encodedir,
-        pathencode,
-        lowerencode,
+        parsers.isasciistr,
+        parsers.asciilower,
+        parsers.asciiupper,
+        parsers.encodedir,
+        parsers.pathencode,
+        parsers.lowerencode,
     ):
         try:
             fn(data)
@@ -53,7 +44,7 @@
     # to debug failures.
     # print(e)
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	if (!code) {
 		std::cerr << "failed to compile Python code!" << std::endl;
 	}
diff --git a/contrib/fuzz/fm1readmarkers.cc b/contrib/fuzz/fm1readmarkers.cc
--- a/contrib/fuzz/fm1readmarkers.cc
+++ b/contrib/fuzz/fm1readmarkers.cc
@@ -9,13 +9,12 @@
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import fm1readmarkers
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 def maybeint(s, default):
     try:
         return int(s)
@@ -31,14 +30,14 @@
     else:
         offset = stop = 0
     offset, stop = maybeint(offset, 0), maybeint(stop, len(data))
-    fm1readmarkers(data, offset, stop)
+    parsers.fm1readmarkers(data, offset, stop)
 except Exception as e:
     pass
     # uncomment this print if you're editing this Python code
     # to debug failures.
     # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/dirstate.cc b/contrib/fuzz/dirstate.cc
--- a/contrib/fuzz/dirstate.cc
+++ b/contrib/fuzz/dirstate.cc
@@ -9,24 +9,23 @@
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import parse_dirstate
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
     dmap = {}
     copymap = {}
-    p = parse_dirstate(dmap, copymap, data)
+    p = parsers.parse_dirstate(dmap, copymap, data)
 except Exception as e:
     pass
     # uncomment this print if you're editing this Python code
     # to debug failures.
     # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 
diff --git a/contrib/fuzz/dirs.cc b/contrib/fuzz/dirs.cc
--- a/contrib/fuzz/dirs.cc
+++ b/contrib/fuzz/dirs.cc
@@ -9,16 +9,15 @@
 
 extern "C" {
 
-static PyCodeObject *code;
+static PYCODETYPE *code;
 
 extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv)
 {
 	contrib::initpy(*argv[0]);
-	code = (PyCodeObject *)Py_CompileString(R"py(
-from parsers import dirs
+	code = (PYCODETYPE *)Py_CompileString(R"py(
 try:
   files = mdata.split('\n')
-  d = dirs(files)
+  d = parsers.dirs(files)
   list(d)
   'a' in d
   if files:
@@ -29,7 +28,7 @@
   # to debug failures.
   # print e
 )py",
-	                                        "fuzzer", Py_file_input);
+	                                      "fuzzer", Py_file_input);
 	return 0;
 }
 



To: durin42, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list