D7449: rust: vendor pyembed crate

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Sat Nov 16 21:22:56 UTC 2019


indygreg created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We want to use PyOxidizer to produce machine native executables
  for running Mercurial. This commit starts the process of doing
  that.
  
  Under the hood, PyOxidizer uses a "pyembed" crate to manage an
  embedded Python interpreter. This crate needs to be vendored
  because it relies on a patched version of the rust-cpython
  crate.
  
  This commit vendors the pyembed crate associated with version
  0.3 of PyOxidizer.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7449

AFFECTED FILES
  rust/Cargo.toml
  rust/pyembed/Cargo.toml
  rust/pyembed/build.rs
  rust/pyembed/src/config.rs
  rust/pyembed/src/data.rs
  rust/pyembed/src/importer.rs
  rust/pyembed/src/lib.rs
  rust/pyembed/src/osutils.rs
  rust/pyembed/src/pyalloc.rs
  rust/pyembed/src/pyinterp.rs
  rust/pyembed/src/pystr.rs

CHANGE DETAILS

diff --git a/rust/pyembed/src/pystr.rs b/rust/pyembed/src/pystr.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/pystr.rs
@@ -0,0 +1,98 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Bridge Rust and Python string types.
+
+use libc::{c_void, size_t, wchar_t};
+use python3_sys as pyffi;
+use std::ffi::{CString, OsString};
+use std::ptr::null_mut;
+
+#[cfg(target_family = "unix")]
+use std::os::unix::ffi::OsStrExt;
+#[cfg(target_family = "windows")]
+use std::os::windows::prelude::OsStrExt;
+
+use cpython::{PyObject, Python};
+
+#[derive(Debug)]
+pub struct OwnedPyStr {
+    data: *const wchar_t,
+}
+
+impl OwnedPyStr {
+    pub fn as_wchar_ptr(&self) -> *const wchar_t {
+        self.data
+    }
+
+    pub fn from_str(s: &str) -> Result<Self, &'static str> {
+        // We need to convert to a C string so there is a terminal NULL
+        // otherwise Py_DecodeLocale() can get confused.
+        let cs = CString::new(s).or_else(|_| Err("source string has NULL bytes"))?;
+
+        let size: *mut size_t = null_mut();
+        let ptr = unsafe { pyffi::Py_DecodeLocale(cs.as_ptr(), size) };
+
+        if ptr.is_null() {
+            Err("could not convert str to Python string")
+        } else {
+            Ok(OwnedPyStr { data: ptr })
+        }
+    }
+}
+
+impl Drop for OwnedPyStr {
+    fn drop(&mut self) {
+        unsafe { pyffi::PyMem_RawFree(self.data as *mut c_void) }
+    }
+}
+
+#[cfg(target_family = "unix")]
+const SURROGATEESCAPE: &[u8] = b"surrogateescape\0";
+
+#[cfg(target_family = "unix")]
+pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
+    // PyUnicode_DecodeLocaleAndSize says the input must have a trailing NULL.
+    // So use a CString for that.
+    let b = CString::new(s.as_bytes()).or_else(|_| Err("not a valid C string"))?;
+    unsafe {
+        let o = pyffi::PyUnicode_DecodeLocaleAndSize(
+            b.as_ptr() as *const i8,
+            b.to_bytes().len() as isize,
+            SURROGATEESCAPE.as_ptr() as *const i8,
+        );
+
+        Ok(PyObject::from_owned_ptr(py, o))
+    }
+}
+
+#[cfg(target_family = "windows")]
+pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
+    // Windows OsString should be valid UTF-16.
+    let w: Vec<u16> = s.encode_wide().collect();
+    unsafe {
+        Ok(PyObject::from_owned_ptr(
+            py,
+            pyffi::PyUnicode_FromWideChar(w.as_ptr(), w.len() as isize),
+        ))
+    }
+}
+
+#[cfg(target_family = "unix")]
+pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
+    let b = s.as_bytes();
+    unsafe {
+        let o = pyffi::PyBytes_FromStringAndSize(b.as_ptr() as *const i8, b.len() as isize);
+        PyObject::from_owned_ptr(py, o)
+    }
+}
+
+#[cfg(target_family = "windows")]
+pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
+    let w: Vec<u16> = s.encode_wide().collect();
+    unsafe {
+        let o = pyffi::PyBytes_FromStringAndSize(w.as_ptr() as *const i8, w.len() as isize * 2);
+        PyObject::from_owned_ptr(py, o)
+    }
+}
diff --git a/rust/pyembed/src/pyinterp.rs b/rust/pyembed/src/pyinterp.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/pyinterp.rs
@@ -0,0 +1,853 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Manage an embedded Python interpreter.
+
+use libc::c_char;
+use python3_sys as pyffi;
+use std::collections::BTreeSet;
+use std::env;
+use std::ffi::CString;
+use std::fs;
+use std::io::Write;
+use std::path::PathBuf;
+use std::ptr::null;
+
+use cpython::exc::ValueError;
+use cpython::{
+    GILGuard, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr, PyList, PyModule, PyObject, PyResult,
+    PyString, Python, PythonObject, ToPyObject,
+};
+
+use super::config::{PythonConfig, PythonRawAllocator, PythonRunMode, TerminfoResolution};
+use super::importer::PyInit__pyoxidizer_importer;
+use super::osutils::resolve_terminfo_dirs;
+#[cfg(feature = "jemalloc-sys")]
+use super::pyalloc::make_raw_jemalloc_allocator;
+use super::pyalloc::{make_raw_rust_memory_allocator, RawAllocator};
+use super::pystr::{osstring_to_bytes, osstring_to_str, OwnedPyStr};
+
+pub const PYOXIDIZER_IMPORTER_NAME: &[u8] = b"_pyoxidizer_importer\0";
+
+const FROZEN_IMPORTLIB_NAME: &[u8] = b"_frozen_importlib\0";
+const FROZEN_IMPORTLIB_EXTERNAL_NAME: &[u8] = b"_frozen_importlib_external\0";
+
+/// Represents the results of executing Python code with exception handling.
+#[derive(Debug)]
+pub enum PythonRunResult {
+    /// Code executed without raising an exception.
+    Ok {},
+    /// Code executed and raised an exception.
+    Err {},
+    /// Code executed and raised SystemExit with the specified exit code.
+    Exit { code: i32 },
+}
+
+fn make_custom_frozen_modules(config: &PythonConfig) -> [pyffi::_frozen; 3] {
+    [
+        pyffi::_frozen {
+            name: FROZEN_IMPORTLIB_NAME.as_ptr() as *const i8,
+            code: config.frozen_importlib_data.as_ptr(),
+            size: config.frozen_importlib_data.len() as i32,
+        },
+        pyffi::_frozen {
+            name: FROZEN_IMPORTLIB_EXTERNAL_NAME.as_ptr() as *const i8,
+            code: config.frozen_importlib_external_data.as_ptr(),
+            size: config.frozen_importlib_external_data.len() as i32,
+        },
+        pyffi::_frozen {
+            name: null(),
+            code: null(),
+            size: 0,
+        },
+    ]
+}
+
+#[cfg(windows)]
+extern "C" {
+    pub fn __acrt_iob_func(x: u32) -> *mut libc::FILE;
+}
+
+#[cfg(windows)]
+fn stdin_to_file() -> *mut libc::FILE {
+    // The stdin symbol is made available by importing <stdio.h>. On Windows,
+    // stdin is defined in corecrt_wstdio.h as a `#define` that calls this
+    // internal CRT function. There's no exported symbol to use. So we
+    // emulate the behavior of the C code.
+    //
+    // Relying on an internal CRT symbol is probably wrong. But Microsoft
+    // typically keeps backwards compatibility for undocumented functions
+    // like this because people use them in the wild.
+    //
+    // An attempt was made to use fdopen(0) like we do on POSIX. However,
+    // this causes a crash. The Microsoft C Runtime is already bending over
+    // backwards to coerce its native HANDLEs into POSIX file descriptors.
+    // Even if there are other ways to coerce a FILE* from a HANDLE
+    // (_open_osfhandle() + _fdopen() might work), using the same function
+    // that <stdio.h> uses to obtain a FILE* seems like the least risky thing
+    // to do.
+    unsafe { __acrt_iob_func(0) }
+}
+
+#[cfg(unix)]
+fn stdin_to_file() -> *mut libc::FILE {
+    unsafe { libc::fdopen(libc::STDIN_FILENO, &('r' as libc::c_char)) }
+}
+
+#[cfg(windows)]
+fn stderr_to_file() -> *mut libc::FILE {
+    unsafe { __acrt_iob_func(2) }
+}
+
+#[cfg(unix)]
+fn stderr_to_file() -> *mut libc::FILE {
+    unsafe { libc::fdopen(libc::STDERR_FILENO, &('w' as libc::c_char)) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
+    make_raw_jemalloc_allocator()
+}
+
+#[cfg(not(feature = "jemalloc-sys"))]
+fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
+    panic!("jemalloc is not available in this build configuration");
+}
+
+#[cfg(unix)]
+fn set_windows_flags(_config: &PythonConfig) {}
+
+#[cfg(windows)]
+fn set_windows_flags(config: &PythonConfig) {
+    unsafe {
+        pyffi::Py_LegacyWindowsFSEncodingFlag = if config.legacy_windows_fs_encoding {
+            1
+        } else {
+            0
+        };
+        pyffi::Py_LegacyWindowsStdioFlag = if config.legacy_windows_stdio { 1 } else { 0 };
+    }
+}
+
+/// Manages an embedded Python interpreter.
+///
+/// **Warning: Python interpreters have global state. There should only be a
+/// single instance of this type per process.**
+///
+/// Instances must only be constructed through [`MainPythonInterpreter::new()`](#method.new).
+///
+/// This type and its various functionality is a glorified wrapper around the
+/// Python C API. But there's a lot of added functionality on top of what the C
+/// API provides.
+///
+/// Both the low-level `python3-sys` and higher-level `cpython` crates are used.
+pub struct MainPythonInterpreter<'a> {
+    pub config: PythonConfig,
+    frozen_modules: [pyffi::_frozen; 3],
+    init_run: bool,
+    raw_allocator: Option<pyffi::PyMemAllocatorEx>,
+    raw_rust_allocator: Option<RawAllocator>,
+    gil: Option<GILGuard>,
+    py: Option<Python<'a>>,
+    program_name: Option<OwnedPyStr>,
+}
+
+impl<'a> MainPythonInterpreter<'a> {
+    /// Construct a Python interpreter from a configuration.
+    ///
+    /// The Python interpreter is initialized as a side-effect. The GIL is held.
+    pub fn new(config: PythonConfig) -> Result<MainPythonInterpreter<'a>, &'static str> {
+        match config.terminfo_resolution {
+            TerminfoResolution::Dynamic => {
+                if let Some(v) = resolve_terminfo_dirs() {
+                    env::set_var("TERMINFO_DIRS", &v);
+                }
+            }
+            TerminfoResolution::Static(ref v) => {
+                env::set_var("TERMINFO_DIRS", v);
+            }
+            TerminfoResolution::None => {}
+        }
+
+        let (raw_allocator, raw_rust_allocator) = match config.raw_allocator {
+            PythonRawAllocator::Jemalloc => (Some(raw_jemallocator()), None),
+            PythonRawAllocator::Rust => (None, Some(make_raw_rust_memory_allocator())),
+            PythonRawAllocator::System => (None, None),
+        };
+
+        let frozen_modules = make_custom_frozen_modules(&config);
+
+        let mut res = MainPythonInterpreter {
+            config,
+            frozen_modules,
+            init_run: false,
+            raw_allocator,
+            raw_rust_allocator,
+            gil: None,
+            py: None,
+            program_name: None,
+        };
+
+        res.init()?;
+
+        Ok(res)
+    }
+
+    /// Initialize the interpreter.
+    ///
+    /// This mutates global state in the Python interpreter according to the
+    /// bound config and initializes the Python interpreter.
+    ///
+    /// After this is called, the embedded Python interpreter is ready to
+    /// execute custom code.
+    ///
+    /// If called more than once, the function is a no-op from the perspective
+    /// of interpreter initialization.
+    ///
+    /// Returns a Python instance which has the GIL acquired.
+    fn init(&mut self) -> Result<Python, &'static str> {
+        if self.init_run {
+            return Ok(self.acquire_gil());
+        }
+
+        let config = &self.config;
+
+        let exe = env::current_exe().or_else(|_| Err("could not obtain current exe"))?;
+        let origin = exe
+            .parent()
+            .ok_or_else(|| "unable to get exe parent")?
+            .display()
+            .to_string();
+
+        let sys_paths: Vec<String> = config
+            .sys_paths
+            .iter()
+            .map(|path| path.replace("$ORIGIN", &origin))
+            .collect();
+
+        // TODO should we call PyMem::SetupDebugHooks() if enabled?
+        if let Some(raw_allocator) = &self.raw_allocator {
+            unsafe {
+                let ptr = raw_allocator as *const _;
+                pyffi::PyMem_SetAllocator(
+                    pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
+                    ptr as *mut _,
+                );
+            }
+        } else if let Some(raw_rust_allocator) = &self.raw_rust_allocator {
+            unsafe {
+                let ptr = &raw_rust_allocator.allocator as *const _;
+                pyffi::PyMem_SetAllocator(
+                    pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
+                    ptr as *mut _,
+                );
+            }
+        }
+
+        // Module state is a bit wonky.
+        //
+        // Our in-memory importer relies on a special module which holds references
+        // to Python objects exposing module/resource data. This module is imported as
+        // part of initializing the Python interpreter.
+        //
+        // This Python module object needs to hold references to the raw Python module
+        // and resource data. Those references are defined by the InitModuleState struct.
+        //
+        // Unfortunately, we can't easily associate state with the interpreter before
+        // calling Py_Initialize(). And the module initialization function receives no
+        // arguments. Our solution is to update a global pointer to point at "our" state
+        // then call Py_Initialize(). The module will be initialized as part of calling
+        // Py_Initialize(). It will copy the contents at the pointer into the local
+        // module state and the global pointer will be unused after that. The end result
+        // is that we have no reliance on global variables outside of a short window
+        // between now and when Py_Initialize() is called.
+        //
+        // We could potentially do away with this global variable by using a closure for
+        // the initialization function. But this rabbit hole may involve gross hackery
+        // like dynamic module names. It probably isn't worth it.
+
+        // It is important for references in this struct to have a lifetime of at least
+        // that of the interpreter.
+        // TODO specify lifetimes so the compiler validates this for us.
+        let module_state = super::importer::InitModuleState {
+            register_filesystem_importer: self.config.filesystem_importer,
+            sys_paths,
+            py_modules_data: config.py_modules_data,
+            py_resources_data: config.py_resources_data,
+        };
+
+        if config.use_custom_importlib {
+            // Replace the frozen modules in the interpreter with our custom set
+            // that knows how to import from memory.
+            unsafe {
+                pyffi::PyImport_FrozenModules = self.frozen_modules.as_ptr();
+            }
+
+            // Register our _pyoxidizer_importer extension which provides importing functionality.
+            unsafe {
+                // name char* needs to live as long as the interpreter is active.
+                pyffi::PyImport_AppendInittab(
+                    PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const i8,
+                    Some(PyInit__pyoxidizer_importer),
+                );
+
+                // Move pointer to our stack allocated instance. This pointer will be
+                // accessed when creating the Python module object, which should be
+                // done automatically as part of low-level interpreter initialization
+                // when calling Py_Initialize() below.
+                super::importer::NEXT_MODULE_STATE = &module_state;
+            }
+        }
+
+        // TODO call PyImport_ExtendInitTab to avoid O(n) overhead.
+        for e in &config.extra_extension_modules {
+            let res = unsafe {
+                pyffi::PyImport_AppendInittab(e.name.as_ptr() as *const i8, Some(e.init_func))
+            };
+
+            if res != 0 {
+                return Err("unable to register extension module");
+            }
+        }
+
+        let exe_str = exe.to_str().ok_or_else(|| "unable to convert exe to str")?;
+
+        let home = OwnedPyStr::from_str(exe_str)?;
+
+        unsafe {
+            // Pointer needs to live for lifetime of interpreter.
+            pyffi::Py_SetPythonHome(home.as_wchar_ptr());
+        }
+
+        let program_name = OwnedPyStr::from_str(exe_str)?;
+
+        unsafe {
+            pyffi::Py_SetProgramName(program_name.as_wchar_ptr());
+        }
+
+        // Value needs to live for lifetime of interpreter.
+        self.program_name = Some(program_name);
+
+        // If we don't call Py_SetPath(), Python has its own logic for initializing it.
+        // We set it to an empty string because we don't want any paths by default. If
+        // we do have defined paths, they will be set after Py_Initialize().
+        unsafe {
+            // Value is copied internally. So short lifetime is OK.
+            let value = OwnedPyStr::from_str("")?;
+            pyffi::Py_SetPath(value.as_wchar_ptr());
+        }
+
+        if let (Some(ref encoding), Some(ref errors)) =
+            (&config.standard_io_encoding, &config.standard_io_errors)
+        {
+            let cencoding = CString::new(encoding.clone())
+                .or_else(|_| Err("unable to convert encoding to C string"))?;
+            let cerrors = CString::new(errors.clone())
+                .or_else(|_| Err("unable to convert encoding error mode to C string"))?;
+
+            let res = unsafe {
+                pyffi::Py_SetStandardStreamEncoding(
+                    cencoding.as_ptr() as *const i8,
+                    cerrors.as_ptr() as *const i8,
+                )
+            };
+
+            if res != 0 {
+                return Err("unable to set standard stream encoding");
+            }
+        }
+
+        unsafe {
+            pyffi::Py_BytesWarningFlag = config.bytes_warning;
+            pyffi::Py_DebugFlag = if config.parser_debug { 1 } else { 0 };
+            pyffi::Py_DontWriteBytecodeFlag = if config.dont_write_bytecode { 1 } else { 0 };
+            // TODO we could potentially have the config be an Option<i32> so we can control
+            // the hash seed explicitly. But the APIs in Python 3.7 aren't great here, as we'd
+            // need to set an environment variable. Once we support the new initialization
+            // API in Python 3.8, things will be easier to implement.
+            pyffi::Py_HashRandomizationFlag = if config.use_hash_seed { 1 } else { 0 };
+            pyffi::Py_IgnoreEnvironmentFlag = if config.ignore_python_env { 1 } else { 0 };
+            pyffi::Py_InteractiveFlag = if config.interactive { 1 } else { 0 };
+            pyffi::Py_InspectFlag = if config.inspect { 1 } else { 0 };
+            pyffi::Py_IsolatedFlag = if config.isolated { 1 } else { 0 };
+            pyffi::Py_NoSiteFlag = if config.import_site { 0 } else { 1 };
+            pyffi::Py_NoUserSiteDirectory = if config.import_user_site { 0 } else { 1 };
+            pyffi::Py_OptimizeFlag = config.opt_level;
+            pyffi::Py_QuietFlag = if config.quiet { 1 } else { 0 };
+            pyffi::Py_UnbufferedStdioFlag = if config.unbuffered_stdio { 1 } else { 0 };
+            pyffi::Py_VerboseFlag = config.verbose;
+        }
+
+        set_windows_flags(config);
+
+        /* Pre-initialization functions we could support:
+         *
+         * PyObject_SetArenaAllocator()
+         * PySys_AddWarnOption()
+         * PySys_AddXOption()
+         * PySys_ResetWarnOptions()
+         */
+
+        unsafe {
+            pyffi::Py_Initialize();
+        }
+
+        // We shouldn't be accessing this pointer after Py_Initialize(). And the
+        // memory is stack allocated and doesn't outlive this frame. We don't want
+        // to leave a stack pointer sitting around!
+        unsafe {
+            super::importer::NEXT_MODULE_STATE = std::ptr::null();
+        }
+
+        let py = unsafe { Python::assume_gil_acquired() };
+        self.py = Some(py);
+        self.init_run = true;
+
+        // env::args() panics if arguments aren't valid Unicode. But invalid
+        // Unicode arguments are possible and some applications may want to
+        // support them.
+        //
+        // env::args_os() provides access to the raw OsString instances, which
+        // will be derived from wchar_t on Windows and char* on POSIX. We can
+        // convert these to Python str instances using a platform-specific
+        // mechanism.
+        let args_objs = env::args_os()
+            .map(|os_arg| osstring_to_str(py, os_arg))
+            .collect::<Result<Vec<PyObject>, &'static str>>()?;
+
+        // This will steal the pointer to the elements and mem::forget them.
+        let args = PyList::new(py, &args_objs);
+        let argv = b"argv\0";
+
+        let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
+            pyffi::PySys_SetObject(argv.as_ptr() as *const i8, args_ptr)
+        });
+
+        match res {
+            0 => (),
+            _ => return Err("unable to set sys.argv"),
+        }
+
+        if config.argvb {
+            let args_objs: Vec<PyObject> = env::args_os()
+                .map(|os_arg| osstring_to_bytes(py, os_arg))
+                .collect();
+
+            let args = PyList::new(py, &args_objs);
+            let argvb = b"argvb\0";
+
+            let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
+                pyffi::PySys_SetObject(argvb.as_ptr() as *const i8, args_ptr)
+            });
+
+            match res {
+                0 => (),
+                _ => return Err("unable to set sys.argvb"),
+            }
+        }
+
+        // As a convention, sys.oxidized is set to indicate we are running from
+        // a self-contained application.
+        let oxidized = b"oxidized\0";
+
+        let res = py.True().with_borrowed_ptr(py, |py_true| unsafe {
+            pyffi::PySys_SetObject(oxidized.as_ptr() as *const i8, py_true)
+        });
+
+        match res {
+            0 => (),
+            _ => return Err("unable to set sys.oxidized"),
+        }
+
+        if config.sys_frozen {
+            let frozen = b"frozen\0";
+
+            match py.True().with_borrowed_ptr(py, |py_true| unsafe {
+                pyffi::PySys_SetObject(frozen.as_ptr() as *const i8, py_true)
+            }) {
+                0 => (),
+                _ => return Err("unable to set sys.frozen"),
+            }
+        }
+
+        if config.sys_meipass {
+            let meipass = b"_MEIPASS\0";
+            let value = PyString::new(py, &origin);
+
+            match value.with_borrowed_ptr(py, |py_value| unsafe {
+                pyffi::PySys_SetObject(meipass.as_ptr() as *const i8, py_value)
+            }) {
+                0 => (),
+                _ => return Err("unable to set sys._MEIPASS"),
+            }
+        }
+
+        Ok(py)
+    }
+
+    /// Ensure the Python GIL is released.
+    pub fn release_gil(&mut self) {
+        if self.py.is_some() {
+            self.py = None;
+            self.gil = None;
+        }
+    }
+
+    /// Ensure the Python GIL is acquired, returning a handle on the interpreter.
+    pub fn acquire_gil(&mut self) -> Python<'a> {
+        match self.py {
+            Some(py) => py,
+            None => {
+                let gil = GILGuard::acquire();
+                let py = unsafe { Python::assume_gil_acquired() };
+
+                self.gil = Some(gil);
+                self.py = Some(py);
+
+                py
+            }
+        }
+    }
+
+    /// Runs the interpreter with the default code execution settings.
+    ///
+    /// The crate was built with settings that configure what should be
+    /// executed by default. Those settings will be loaded and executed.
+    pub fn run(&mut self) -> PyResult<PyObject> {
+        // clone() to avoid issues mixing mutable and immutable borrows of self.
+        let run = self.config.run.clone();
+
+        let py = self.acquire_gil();
+
+        match run {
+            PythonRunMode::None => Ok(py.None()),
+            PythonRunMode::Repl => self.run_repl(),
+            PythonRunMode::Module { module } => self.run_module_as_main(&module),
+            PythonRunMode::Eval { code } => self.run_code(&code),
+        }
+    }
+
+    /// Handle a raised SystemExit exception.
+    ///
+    /// This emulates the behavior in pythonrun.c:handle_system_exit() and
+    /// _Py_HandleSystemExit() but without the call to exit(), which we don't want.
+    fn handle_system_exit(&mut self, py: Python, err: PyErr) -> Result<i32, &'static str> {
+        std::io::stdout()
+            .flush()
+            .or_else(|_| Err("failed to flush stdout"))?;
+
+        let mut value = match err.pvalue {
+            Some(ref instance) => {
+                if instance.as_ptr() == py.None().as_ptr() {
+                    return Ok(0);
+                }
+
+                instance.clone_ref(py)
+            }
+            None => {
+                return Ok(0);
+            }
+        };
+
+        if unsafe { pyffi::PyExceptionInstance_Check(value.as_ptr()) } != 0 {
+            // The error code should be in the "code" attribute.
+            if let Ok(code) = value.getattr(py, "code") {
+                if code == py.None() {
+                    return Ok(0);
+                }
+
+                // Else pretend exc_value.code is the new exception value to use
+                // and fall through to below.
+                value = code;
+            }
+        }
+
+        if unsafe { pyffi::PyLong_Check(value.as_ptr()) } != 0 {
+            return Ok(unsafe { pyffi::PyLong_AsLong(value.as_ptr()) as i32 });
+        }
+
+        let sys_module = py
+            .import("sys")
+            .or_else(|_| Err("unable to obtain sys module"))?;
+        let stderr = sys_module.get(py, "stderr");
+
+        // This is a cargo cult from the canonical implementation.
+        unsafe { pyffi::PyErr_Clear() }
+
+        match stderr {
+            Ok(o) => unsafe {
+                pyffi::PyFile_WriteObject(value.as_ptr(), o.as_ptr(), pyffi::Py_PRINT_RAW);
+            },
+            Err(_) => {
+                unsafe {
+                    pyffi::PyObject_Print(value.as_ptr(), stderr_to_file(), pyffi::Py_PRINT_RAW);
+                }
+                std::io::stderr()
+                    .flush()
+                    .or_else(|_| Err("failure to flush stderr"))?;
+            }
+        }
+
+        unsafe {
+            pyffi::PySys_WriteStderr(b"\n\0".as_ptr() as *const i8);
+        }
+
+        // This frees references to this exception, which may be necessary to avoid
+        // badness.
+        err.restore(py);
+        unsafe {
+            pyffi::PyErr_Clear();
+        }
+
+        Ok(1)
+    }
+
+    /// Runs the interpreter and handles any exception that was raised.
+    pub fn run_and_handle_error(&mut self) -> PythonRunResult {
+        // There are underdefined lifetime bugs at play here. There is no
+        // explicit lifetime for the PyObject's returned. If we don't have
+        // the local variable in scope, we can get into a situation where
+        // drop() on self is called before the PyObject's drop(). This is
+        // problematic because PyObject's drop() attempts to acquire the GIL.
+        // If the interpreter is shut down, there is no GIL to acquire, and
+        // we may segfault.
+        // TODO look into setting lifetimes properly so the compiler can
+        // prevent some issues.
+        let res = self.run();
+        let py = self.acquire_gil();
+
+        match res {
+            Ok(_) => PythonRunResult::Ok {},
+            Err(err) => {
+                // SystemExit is special in that PyErr_PrintEx() will call
+                // exit() if it is seen. So, we handle it manually so we can
+                // return an exit code instead of exiting.
+
+                // TODO surely the cpython crate offers a better way to do this...
+                err.restore(py);
+                let matches =
+                    unsafe { pyffi::PyErr_ExceptionMatches(pyffi::PyExc_SystemExit) } != 0;
+                let err = cpython::PyErr::fetch(py);
+
+                if matches {
+                    return PythonRunResult::Exit {
+                        code: match self.handle_system_exit(py, err) {
+                            Ok(code) => code,
+                            Err(msg) => {
+                                eprintln!("{}", msg);
+                                1
+                            }
+                        },
+                    };
+                }
+
+                self.print_err(err);
+
+                PythonRunResult::Err {}
+            }
+        }
+    }
+
+    /// Calls run() and resolves a suitable exit code.
+    pub fn run_as_main(&mut self) -> i32 {
+        match self.run_and_handle_error() {
+            PythonRunResult::Ok {} => 0,
+            PythonRunResult::Err {} => 1,
+            PythonRunResult::Exit { code } => code,
+        }
+    }
+
+    /// Runs a Python module as the __main__ module.
+    ///
+    /// Returns the execution result of the module code.
+    ///
+    /// The interpreter is automatically initialized if needed.
+    pub fn run_module_as_main(&mut self, name: &str) -> PyResult<PyObject> {
+        let py = self.acquire_gil();
+
+        // This is modeled after runpy.py:_run_module_as_main().
+        let main: PyModule = unsafe {
+            PyObject::from_borrowed_ptr(
+                py,
+                pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const c_char),
+            )
+            .cast_into(py)?
+        };
+
+        let main_dict = main.dict(py);
+
+        let importlib_util = py.import("importlib.util")?;
+        let spec = importlib_util.call(py, "find_spec", (name,), None)?;
+        let loader = spec.getattr(py, "loader")?;
+        let code = loader.call_method(py, "get_code", (name,), None)?;
+
+        let origin = spec.getattr(py, "origin")?;
+        let cached = spec.getattr(py, "cached")?;
+
+        // TODO handle __package__.
+        main_dict.set_item(py, "__name__", "__main__")?;
+        main_dict.set_item(py, "__file__", origin)?;
+        main_dict.set_item(py, "__cached__", cached)?;
+        main_dict.set_item(py, "__doc__", py.None())?;
+        main_dict.set_item(py, "__loader__", loader)?;
+        main_dict.set_item(py, "__spec__", spec)?;
+
+        unsafe {
+            let globals = main_dict.as_object().as_ptr();
+            let res = pyffi::PyEval_EvalCode(code.as_ptr(), globals, globals);
+
+            if res.is_null() {
+                let err = PyErr::fetch(py);
+                err.print(py);
+                Err(PyErr::fetch(py))
+            } else {
+                Ok(PyObject::from_owned_ptr(py, res))
+            }
+        }
+    }
+
+    /// Start and run a Python REPL.
+    ///
+    /// This emulates what CPython's main.c does.
+    ///
+    /// The interpreter is automatically initialized if needed.
+    pub fn run_repl(&mut self) -> PyResult<PyObject> {
+        let py = self.acquire_gil();
+
+        unsafe {
+            pyffi::Py_InspectFlag = 0;
+        }
+
+        // readline is optional. We don't care if it fails.
+        if py.import("readline").is_ok() {}
+
+        let sys = py.import("sys")?;
+
+        if let Ok(hook) = sys.get(py, "__interactivehook__") {
+            hook.call(py, NoArgs, None)?;
+        }
+
+        let stdin_filename = "<stdin>";
+        let filename = CString::new(stdin_filename)
+            .or_else(|_| Err(PyErr::new::<ValueError, _>(py, "could not create CString")))?;
+        let mut cf = pyffi::PyCompilerFlags { cf_flags: 0 };
+
+        // TODO use return value.
+        unsafe {
+            let stdin = stdin_to_file();
+            pyffi::PyRun_AnyFileExFlags(stdin, filename.as_ptr() as *const c_char, 0, &mut cf)
+        };
+
+        Ok(py.None())
+    }
+
+    /// Runs Python code provided by a string.
+    ///
+    /// This is similar to what ``python -c <code>`` would do.
+    ///
+    /// The interpreter is automatically initialized if needed.
+    pub fn run_code(&mut self, code: &str) -> PyResult<PyObject> {
+        let py = self.acquire_gil();
+
+        let code = CString::new(code).or_else(|_| {
+            Err(PyErr::new::<ValueError, _>(
+                py,
+                "source code is not a valid C string",
+            ))
+        })?;
+
+        unsafe {
+            let main = pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const _);
+
+            if main.is_null() {
+                return Err(PyErr::fetch(py));
+            }
+
+            let main_dict = pyffi::PyModule_GetDict(main);
+
+            let res = pyffi::PyRun_StringFlags(
+                code.as_ptr() as *const _,
+                pyffi::Py_file_input,
+                main_dict,
+                main_dict,
+                std::ptr::null_mut(),
+            );
+
+            if res.is_null() {
+                Err(PyErr::fetch(py))
+            } else {
+                Ok(PyObject::from_owned_ptr(py, res))
+            }
+        }
+    }
+
+    /// Print a Python error.
+    ///
+    /// Under the hood this calls ``PyErr_PrintEx()``, which may call
+    /// ``Py_Exit()`` and may write to stderr.
+    pub fn print_err(&mut self, err: PyErr) {
+        let py = self.acquire_gil();
+        err.print(py);
+    }
+}
+
+/// Write loaded Python modules to a directory.
+///
+/// Given a Python interpreter and a path to a directory, this will create a
+/// file in that directory named ``modules-<UUID>`` and write a ``\n`` delimited
+/// list of loaded names from ``sys.modules`` into that file.
+fn write_modules_to_directory(py: Python, path: &PathBuf) -> Result<(), &'static str> {
+    // TODO this needs better error handling all over.
+
+    fs::create_dir_all(path).or_else(|_| Err("could not create directory for modules"))?;
+
+    let rand = uuid::Uuid::new_v4();
+
+    let path = path.join(format!("modules-{}", rand.to_string()));
+
+    let sys = py
+        .import("sys")
+        .or_else(|_| Err("could not obtain sys module"))?;
+    let modules = sys
+        .get(py, "modules")
+        .or_else(|_| Err("could not obtain sys.modules"))?;
+
+    let modules = modules
+        .cast_as::<PyDict>(py)
+        .or_else(|_| Err("sys.modules is not a dict"))?;
+
+    let mut names = BTreeSet::new();
+    for (key, _value) in modules.items(py) {
+        names.insert(
+            key.extract::<String>(py)
+                .or_else(|_| Err("module name is not a str"))?,
+        );
+    }
+
+    let mut f = fs::File::create(path).or_else(|_| Err("could not open file for writing"))?;
+
+    for name in names {
+        f.write_fmt(format_args!("{}\n", name))
+            .or_else(|_| Err("could not write"))?;
+    }
+
+    Ok(())
+}
+
+impl<'a> Drop for MainPythonInterpreter<'a> {
+    fn drop(&mut self) {
+        if let Some(key) = &self.config.write_modules_directory_env {
+            if let Ok(path) = env::var(key) {
+                let path = PathBuf::from(path);
+                let py = self.acquire_gil();
+
+                if let Err(msg) = write_modules_to_directory(py, &path) {
+                    eprintln!("error writing modules file: {}", msg);
+                }
+            }
+        }
+
+        let _ = unsafe { pyffi::Py_FinalizeEx() };
+    }
+}
diff --git a/rust/pyembed/src/pyalloc.rs b/rust/pyembed/src/pyalloc.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/pyalloc.rs
@@ -0,0 +1,221 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Custom Python memory allocators.
+
+#[cfg(feature = "jemalloc-sys")]
+use jemalloc_sys as jemallocffi;
+use libc::{c_void, size_t};
+use python3_sys as pyffi;
+use std::alloc;
+use std::collections::HashMap;
+#[cfg(feature = "jemalloc-sys")]
+use std::ptr::null_mut;
+
+const MIN_ALIGN: usize = 16;
+
+type RawAllocatorState = HashMap<*mut u8, alloc::Layout>;
+
+/// Holds state for the raw memory allocator.
+///
+/// Ideally we wouldn't need to track state. But Rust's dealloc() API
+/// requires passing in a Layout that matches the allocation. This means
+/// we need to track the Layout for each allocation. This data structure
+/// facilitates that.
+///
+/// TODO HashMap isn't thread safe and the Python raw allocator doesn't
+/// hold the GIL. So we need a thread safe map or a mutex guarding access.
+pub struct RawAllocator {
+    pub allocator: pyffi::PyMemAllocatorEx,
+    _state: Box<RawAllocatorState>,
+}
+
+extern "C" fn raw_rust_malloc(ctx: *mut c_void, size: size_t) -> *mut c_void {
+    // PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
+    // non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
+    // instead.
+    let size = match size {
+        0 => 1,
+        val => val,
+    };
+
+    unsafe {
+        let state = ctx as *mut RawAllocatorState;
+        let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
+        let res = alloc::alloc(layout);
+
+        (*state).insert(res, layout);
+
+        //println!("allocated {} bytes to {:?}", size, res);
+        res as *mut c_void
+    }
+}
+
+extern "C" fn raw_rust_calloc(ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
+    // PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
+    // size zero bytes returns a distinct non-NULL pointer if possible, as if
+    // PyMem_RawCalloc(1, 1) had been called instead.
+    let size = match nelem * elsize {
+        0 => 1,
+        val => val,
+    };
+
+    unsafe {
+        let state = ctx as *mut RawAllocatorState;
+        let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
+        let res = alloc::alloc_zeroed(layout);
+
+        (*state).insert(res, layout);
+
+        //println!("zero allocated {} bytes to {:?}", size, res);
+
+        res as *mut c_void
+    }
+}
+
+extern "C" fn raw_rust_realloc(
+    ctx: *mut c_void,
+    ptr: *mut c_void,
+    new_size: size_t,
+) -> *mut c_void {
+    //println!("reallocating {:?} to {} bytes", ptr as *mut u8, new_size);
+
+    // PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
+    // PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
+    // resized but is not freed, and the returned pointer is non-NULL.
+    if ptr.is_null() {
+        return raw_rust_malloc(ctx, new_size);
+    }
+
+    let new_size = match new_size {
+        0 => 1,
+        val => val,
+    };
+
+    unsafe {
+        let state = ctx as *mut RawAllocatorState;
+        let layout = alloc::Layout::from_size_align_unchecked(new_size, MIN_ALIGN);
+
+        let key = ptr as *mut u8;
+        let old_layout = (*state)
+            .remove(&key)
+            .expect("original memory address not tracked");
+
+        let res = alloc::realloc(ptr as *mut u8, old_layout, new_size);
+
+        (*state).insert(res, layout);
+
+        res as *mut c_void
+    }
+}
+
+extern "C" fn raw_rust_free(ctx: *mut c_void, ptr: *mut c_void) {
+    if ptr.is_null() {
+        return;
+    }
+
+    //println!("freeing {:?}", ptr as *mut u8);
+    unsafe {
+        let state = ctx as *mut RawAllocatorState;
+
+        let key = ptr as *mut u8;
+        let layout = (*state)
+            .get(&key)
+            .expect(format!("could not find allocated memory record: {:?}", key).as_str());
+
+        alloc::dealloc(key, *layout);
+        (*state).remove(&key);
+    }
+}
+
+pub fn make_raw_rust_memory_allocator() -> RawAllocator {
+    // We need to allocate the HashMap on the heap so the pointer doesn't refer
+    // to the stack. We rebox and add the Box to our struct so lifetimes are
+    // managed.
+    let alloc = Box::new(HashMap::<*mut u8, alloc::Layout>::new());
+    let state = Box::into_raw(alloc);
+
+    let allocator = pyffi::PyMemAllocatorEx {
+        ctx: state as *mut c_void,
+        malloc: Some(raw_rust_malloc),
+        calloc: Some(raw_rust_calloc),
+        realloc: Some(raw_rust_realloc),
+        free: Some(raw_rust_free),
+    };
+
+    RawAllocator {
+        allocator,
+        _state: unsafe { Box::from_raw(state) },
+    }
+}
+
+// Now let's define a raw memory allocator that interfaces directly with jemalloc.
+// This avoids the overhead of going through Rust's allocation layer.
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_malloc(_ctx: *mut c_void, size: size_t) -> *mut c_void {
+    // PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
+    // non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
+    // instead.
+    let size = match size {
+        0 => 1,
+        val => val,
+    };
+
+    unsafe { jemallocffi::mallocx(size, 0) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_calloc(_ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
+    // PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
+    // size zero bytes returns a distinct non-NULL pointer if possible, as if
+    // PyMem_RawCalloc(1, 1) had been called instead.
+    let size = match nelem * elsize {
+        0 => 1,
+        val => val,
+    };
+
+    unsafe { jemallocffi::mallocx(size, jemallocffi::MALLOCX_ZERO) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_realloc(
+    ctx: *mut c_void,
+    ptr: *mut c_void,
+    new_size: size_t,
+) -> *mut c_void {
+    // PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
+    // PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
+    // resized but is not freed, and the returned pointer is non-NULL.
+    if ptr.is_null() {
+        return raw_jemalloc_malloc(ctx, new_size);
+    }
+
+    let new_size = match new_size {
+        0 => 1,
+        val => val,
+    };
+
+    unsafe { jemallocffi::rallocx(ptr, new_size, 0) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_free(_ctx: *mut c_void, ptr: *mut c_void) {
+    if ptr.is_null() {
+        return;
+    }
+
+    unsafe { jemallocffi::dallocx(ptr, 0) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+pub fn make_raw_jemalloc_allocator() -> pyffi::PyMemAllocatorEx {
+    pyffi::PyMemAllocatorEx {
+        ctx: null_mut(),
+        malloc: Some(raw_jemalloc_malloc),
+        calloc: Some(raw_jemalloc_calloc),
+        realloc: Some(raw_jemalloc_realloc),
+        free: Some(raw_jemalloc_free),
+    }
+}
diff --git a/rust/pyembed/src/osutils.rs b/rust/pyembed/src/osutils.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/osutils.rs
@@ -0,0 +1,147 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use lazy_static::lazy_static;
+use std::path::{Path, PathBuf};
+
+/// terminfo directories for Debian based distributions.
+///
+/// Search for `--with-terminfo-dirs` at
+/// https://salsa.debian.org/debian/ncurses/blob/master/debian/rules to find
+/// the source of truth for this.
+const TERMINFO_DIRS_DEBIAN: &str = "/etc/terminfo:/lib/terminfo:/usr/share/terminfo";
+
+/// terminfo directories for RedHat based distributions.
+///
+/// CentOS compiled with
+/// `--with-terminfo-dirs=%{_sysconfdir}/terminfo:%{_datadir}/terminfo`.
+const TERMINFO_DIRS_REDHAT: &str = "/etc/terminfo:/usr/share/terminfo";
+
+/// terminfo directories for macOS.
+const TERMINFO_DIRS_MACOS: &str = "/usr/share/terminfo";
+
+lazy_static! {
+    static ref TERMINFO_DIRS_COMMON: Vec<PathBuf> = {
+        vec![
+            PathBuf::from("/usr/local/etc/terminfo"),
+            PathBuf::from("/usr/local/lib/terminfo"),
+            PathBuf::from("/usr/local/share/terminfo"),
+            PathBuf::from("/etc/terminfo"),
+            PathBuf::from("/usr/lib/terminfo"),
+            PathBuf::from("/lib/terminfo"),
+            PathBuf::from("/usr/share/terminfo"),
+        ]
+    };
+}
+
+#[derive(Clone)]
+enum OsVariant {
+    Linux,
+    MacOs,
+    Windows,
+    Other,
+}
+
+enum LinuxDistroVariant {
+    Debian,
+    RedHat,
+    Unknown,
+}
+
+lazy_static! {
+    static ref TARGET_OS: OsVariant = {
+        if cfg!(target_os = "linux") {
+            OsVariant::Linux
+        } else if cfg!(target_os = "macos") {
+            OsVariant::MacOs
+        } else if cfg!(target_os = "windows") {
+            OsVariant::Windows
+        } else {
+            OsVariant::Other
+        }
+    };
+}
+
+struct OsInfo {
+    os: OsVariant,
+    linux_distro: Option<LinuxDistroVariant>,
+}
+
+fn resolve_linux_distro() -> LinuxDistroVariant {
+    // Attempt to resolve the Linux distro by parsing /etc files.
+    let os_release = Path::new("/etc/os-release");
+
+    if let Ok(data) = std::fs::read_to_string(os_release) {
+        for line in data.split("\n") {
+            if line.starts_with("ID_LIKE=") {
+                if line.contains("debian") {
+                    return LinuxDistroVariant::Debian;
+                } else if line.contains("rhel") || line.contains("fedora") {
+                    return LinuxDistroVariant::RedHat;
+                }
+            } else if line.starts_with("ID=") {
+                if line.contains("fedora") {
+                    return LinuxDistroVariant::RedHat;
+                }
+            }
+        }
+    }
+
+    LinuxDistroVariant::Unknown
+}
+
+fn resolve_os_info() -> OsInfo {
+    let os = TARGET_OS.clone();
+    let linux_distro = match os {
+        OsVariant::Linux => Some(resolve_linux_distro()),
+        _ => None,
+    };
+
+    OsInfo { os, linux_distro }
+}
+
+/// Attempt to resolve the value for the `TERMINFO_DIRS` environment variable.
+///
+/// Returns Some() value that `TERMINFO_DIRS` should be set to or None if
+/// no environment variable should be set.
+pub fn resolve_terminfo_dirs() -> Option<String> {
+    // Always respect an environment variable, if present.
+    if std::env::var("TERMINFO_DIRS").is_ok() {
+        return None;
+    }
+
+    let os_info = resolve_os_info();
+
+    match os_info.os {
+        OsVariant::Linux => match os_info.linux_distro.unwrap() {
+            // TODO we could stat() the well-known paths ourselves and omit
+            // paths that don't exist. This /might/ save some syscalls, since
+            // ncurses doesn't appear to be the most frugal w.r.t. filesystem
+            // requests.
+            LinuxDistroVariant::Debian => Some(TERMINFO_DIRS_DEBIAN.to_string()),
+            LinuxDistroVariant::RedHat => Some(TERMINFO_DIRS_REDHAT.to_string()),
+            LinuxDistroVariant::Unknown => {
+                // We don't know this Linux variant. Look for common terminfo
+                // database directories and use paths that are found.
+                let paths = TERMINFO_DIRS_COMMON
+                    .iter()
+                    .filter_map(|p| {
+                        if p.exists() {
+                            Some(p.display().to_string())
+                        } else {
+                            None
+                        }
+                    })
+                    .collect::<Vec<String>>()
+                    .join(":");
+
+                Some(paths)
+            }
+        },
+        OsVariant::MacOs => Some(TERMINFO_DIRS_MACOS.to_string()),
+        // Windows doesn't use the terminfo database.
+        OsVariant::Windows => None,
+        OsVariant::Other => None,
+    }
+}
diff --git a/rust/pyembed/src/lib.rs b/rust/pyembed/src/lib.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/lib.rs
@@ -0,0 +1,35 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+/*!
+Manage an embedded Python interpreter.
+
+The `pyembed` crate contains functionality for managing a Python interpreter
+embedded in the current binary. This crate is typically used along with
+[PyOxidizer](https://github.com/indygreg/PyOxidizer) for producing
+self-contained binaries containing Python.
+
+The most important types are [`PythonConfig`](struct.PythonConfig.html) and
+[`MainPythonInterpreter`](struct.MainPythonInterpreter.html). A `PythonConfig`
+defines how a Python interpreter is to behave. A `MainPythonInterpreter`
+creates and manages that interpreter and serves as a high-level interface for
+running code in the interpreter.
+*/
+
+mod config;
+mod data;
+mod importer;
+mod osutils;
+mod pyalloc;
+mod pyinterp;
+mod pystr;
+
+#[allow(unused_imports)]
+pub use crate::config::PythonConfig;
+
+#[allow(unused_imports)]
+pub use crate::data::default_python_config;
+
+#[allow(unused_imports)]
+pub use crate::pyinterp::MainPythonInterpreter;
diff --git a/rust/pyembed/src/importer.rs b/rust/pyembed/src/importer.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/importer.rs
@@ -0,0 +1,911 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+/*!
+Functionality for a Python importer.
+
+This module defines a Python meta path importer and associated functionality
+for importing Python modules from memory.
+*/
+
+use std::cell::RefCell;
+use std::collections::{HashMap, HashSet};
+use std::ffi::CStr;
+use std::io::Cursor;
+use std::sync::Arc;
+
+use byteorder::{LittleEndian, ReadBytesExt};
+use cpython::exc::{FileNotFoundError, ImportError, RuntimeError, ValueError};
+use cpython::{
+    py_class, py_class_impl, py_coerce_item, py_fn, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr,
+    PyList, PyModule, PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
+};
+use python3_sys as pyffi;
+use python3_sys::{PyBUF_READ, PyMemoryView_FromMemory};
+
+use super::pyinterp::PYOXIDIZER_IMPORTER_NAME;
+
+/// Obtain a Python memoryview referencing a memory slice.
+///
+/// New memoryview allows Python to access the underlying memory without
+/// copying it.
+#[inline]
+fn get_memory_view(py: Python, data: &'static [u8]) -> Option<PyObject> {
+    let ptr = unsafe { PyMemoryView_FromMemory(data.as_ptr() as _, data.len() as _, PyBUF_READ) };
+    unsafe { PyObject::from_owned_ptr_opt(py, ptr) }
+}
+
+/// Holds pointers to Python module data in memory.
+#[derive(Debug)]
+struct PythonModuleData {
+    source: Option<&'static [u8]>,
+    bytecode: Option<&'static [u8]>,
+}
+
+impl PythonModuleData {
+    /// Obtain a PyMemoryView instance for source data.
+    fn get_source_memory_view(&self, py: Python) -> Option<PyObject> {
+        match self.source {
+            Some(data) => get_memory_view(py, data),
+            None => None,
+        }
+    }
+
+    /// Obtain a PyMemoryView instance for bytecode data.
+    fn get_bytecode_memory_view(&self, py: Python) -> Option<PyObject> {
+        match self.bytecode {
+            Some(data) => get_memory_view(py, data),
+            None => None,
+        }
+    }
+}
+
+/// Represents Python modules data in memory.
+///
+/// This is essentially an index over a raw backing blob.
+struct PythonModulesData {
+    /// Packages in this set of modules.
+    packages: HashSet<&'static str>,
+
+    /// Maps module name to source/bytecode.
+    data: HashMap<&'static str, PythonModuleData>,
+}
+
+impl PythonModulesData {
+    /// Construct a new instance from a memory slice.
+    fn from(data: &'static [u8]) -> Result<PythonModulesData, &'static str> {
+        let mut reader = Cursor::new(data);
+
+        let count = reader
+            .read_u32::<LittleEndian>()
+            .or_else(|_| Err("failed reading count"))?;
+
+        let mut index = Vec::with_capacity(count as usize);
+        let mut total_names_length = 0;
+        let mut total_sources_length = 0;
+        let mut package_count = 0;
+
+        for _ in 0..count {
+            let name_length = reader
+                .read_u32::<LittleEndian>()
+                .or_else(|_| Err("failed reading name length"))?
+                as usize;
+            let source_length = reader
+                .read_u32::<LittleEndian>()
+                .or_else(|_| Err("failed reading source length"))?
+                as usize;
+            let bytecode_length = reader
+                .read_u32::<LittleEndian>()
+                .or_else(|_| Err("failed reading bytecode length"))?
+                as usize;
+            let flags = reader
+                .read_u32::<LittleEndian>()
+                .or_else(|_| Err("failed reading module flags"))?;
+
+            let is_package = flags & 0x01 != 0;
+
+            if is_package {
+                package_count += 1;
+            }
+
+            index.push((name_length, source_length, bytecode_length, is_package));
+            total_names_length += name_length;
+            total_sources_length += source_length;
+        }
+
+        let mut res = HashMap::with_capacity(count as usize);
+        let mut packages = HashSet::with_capacity(package_count);
+        let sources_start_offset = reader.position() as usize + total_names_length;
+        let bytecodes_start_offset = sources_start_offset + total_sources_length;
+
+        let mut sources_current_offset: usize = 0;
+        let mut bytecodes_current_offset: usize = 0;
+
+        for (name_length, source_length, bytecode_length, is_package) in index {
+            let offset = reader.position() as usize;
+
+            let name =
+                unsafe { std::str::from_utf8_unchecked(&data[offset..offset + name_length]) };
+
+            let source_offset = sources_start_offset + sources_current_offset;
+            let source = if source_length > 0 {
+                Some(&data[source_offset..source_offset + source_length])
+            } else {
+                None
+            };
+
+            let bytecode_offset = bytecodes_start_offset + bytecodes_current_offset;
+            let bytecode = if bytecode_length > 0 {
+                Some(&data[bytecode_offset..bytecode_offset + bytecode_length])
+            } else {
+                None
+            };
+
+            reader.set_position(offset as u64 + name_length as u64);
+
+            sources_current_offset += source_length;
+            bytecodes_current_offset += bytecode_length;
+
+            if is_package {
+                packages.insert(name);
+            }
+
+            // Extension modules will have their names present to populate the
+            // packages set. So only populate module data if we have data for it.
+            if source.is_some() || bytecode.is_some() {
+                res.insert(name, PythonModuleData { source, bytecode });
+            }
+        }
+
+        Ok(PythonModulesData {
+            packages,
+            data: res,
+        })
+    }
+}
+
+/// Represents Python resources data in memory.
+///
+/// This is essentially an index over a raw backing blob.
+struct PythonResourcesData {
+    packages: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>,
+}
+
+impl PythonResourcesData {
+    fn from(data: &'static [u8]) -> Result<PythonResourcesData, &'static str> {
+        let mut reader = Cursor::new(data);
+
+        let package_count = reader
+            .read_u32::<LittleEndian>()
+            .or_else(|_| Err("failed reading package count"))? as usize;
+
+        let mut index = Vec::with_capacity(package_count);
+        let mut total_names_length = 0;
+
+        for _ in 0..package_count {
+            let package_name_length = reader
+                .read_u32::<LittleEndian>()
+                .or_else(|_| Err("failed reading package name length"))?
+                as usize;
+            let resource_count = reader
+                .read_u32::<LittleEndian>()
+                .or_else(|_| Err("failed reading resource count"))?
+                as usize;
+
+            total_names_length += package_name_length;
+
+            let mut package_index = Vec::with_capacity(resource_count);
+
+            for _ in 0..resource_count {
+                let resource_name_length = reader
+                    .read_u32::<LittleEndian>()
+                    .or_else(|_| Err("failed reading resource name length"))?
+                    as usize;
+                let resource_data_length = reader
+                    .read_u32::<LittleEndian>()
+                    .or_else(|_| Err("failed reading resource data length"))?
+                    as usize;
+
+                total_names_length += resource_name_length;
+
+                package_index.push((resource_name_length, resource_data_length));
+            }
+
+            index.push((package_name_length, package_index));
+        }
+
+        let mut name_offset = reader.position() as usize;
+        let mut data_offset = name_offset + total_names_length;
+        let mut res = HashMap::new();
+
+        for (package_name_length, package_index) in index {
+            let package_name = unsafe {
+                std::str::from_utf8_unchecked(&data[name_offset..name_offset + package_name_length])
+            };
+
+            name_offset += package_name_length;
+
+            let mut package_data = Box::new(HashMap::new());
+
+            for (resource_name_length, resource_data_length) in package_index {
+                let resource_name = unsafe {
+                    std::str::from_utf8_unchecked(
+                        &data[name_offset..name_offset + resource_name_length],
+                    )
+                };
+
+                name_offset += resource_name_length;
+
+                let resource_data = &data[data_offset..data_offset + resource_data_length];
+
+                data_offset += resource_data_length;
+
+                package_data.insert(resource_name, resource_data);
+            }
+
+            res.insert(package_name, Arc::new(package_data));
+        }
+
+        Ok(PythonResourcesData { packages: res })
+    }
+}
+
+#[allow(unused_doc_comments)]
+/// Python type to import modules.
+///
+/// This type implements the importlib.abc.MetaPathFinder interface for
+/// finding/loading modules. It supports loading various flavors of modules,
+/// allowing it to be the only registered sys.meta_path importer.
+py_class!(class PyOxidizerFinder |py| {
+    data imp_module: PyModule;
+    data marshal_loads: PyObject;
+    data builtin_importer: PyObject;
+    data frozen_importer: PyObject;
+    data call_with_frames_removed: PyObject;
+    data module_spec_type: PyObject;
+    data decode_source: PyObject;
+    data exec_fn: PyObject;
+    data packages: HashSet<&'static str>;
+    data known_modules: KnownModules;
+    data resources: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>;
+    data resource_readers: RefCell<Box<HashMap<String, PyObject>>>;
+
+    // Start of importlib.abc.MetaPathFinder interface.
+
+    def find_spec(&self, fullname: &PyString, path: &PyObject, target: Option<PyObject> = None) -> PyResult<PyObject> {
+        let key = fullname.to_string(py)?;
+
+        if let Some(flavor) = self.known_modules(py).get(&*key) {
+            match flavor {
+                KnownModuleFlavor::Builtin => {
+                    // BuiltinImporter.find_spec() always returns None if `path` is defined.
+                    // And it doesn't use `target`. So don't proxy these values.
+                    self.builtin_importer(py).call_method(py, "find_spec", (fullname,), None)
+                }
+                KnownModuleFlavor::Frozen => {
+                    self.frozen_importer(py).call_method(py, "find_spec", (fullname, path, target), None)
+                }
+                KnownModuleFlavor::InMemory { .. } => {
+                    let is_package = self.packages(py).contains(&*key);
+
+                    // TODO consider setting origin and has_location so __file__ will be
+                    // populated.
+
+                    let kwargs = PyDict::new(py);
+                    kwargs.set_item(py, "is_package", is_package)?;
+
+                    self.module_spec_type(py).call(py, (fullname, self), Some(&kwargs))
+                }
+            }
+        } else {
+            Ok(py.None())
+        }
+    }
+
+    def find_module(&self, _fullname: &PyObject, _path: &PyObject) -> PyResult<PyObject> {
+        // Method is deprecated. Always returns None.
+        // We /could/ call find_spec(). Meh.
+        Ok(py.None())
+    }
+
+    def invalidate_caches(&self) -> PyResult<PyObject> {
+        Ok(py.None())
+    }
+
+    // End of importlib.abc.MetaPathFinder interface.
+
+    // Start of importlib.abc.Loader interface.
+
+    def create_module(&self, _spec: &PyObject) -> PyResult<PyObject> {
+        Ok(py.None())
+    }
+
+    def exec_module(&self, module: &PyObject) -> PyResult<PyObject> {
+        let name = module.getattr(py, "__name__")?;
+        let key = name.extract::<String>(py)?;
+
+        if let Some(flavor) = self.known_modules(py).get(&*key) {
+            match flavor {
+                KnownModuleFlavor::Builtin => {
+                    self.builtin_importer(py).call_method(py, "exec_module", (module,), None)
+                },
+                KnownModuleFlavor::Frozen => {
+                    self.frozen_importer(py).call_method(py, "exec_module", (module,), None)
+                },
+                KnownModuleFlavor::InMemory { module_data } => {
+                    match module_data.get_bytecode_memory_view(py) {
+                        Some(value) => {
+                            let code = self.marshal_loads(py).call(py, (value,), None)?;
+                            let exec_fn = self.exec_fn(py);
+                            let dict = module.getattr(py, "__dict__")?;
+
+                            self.call_with_frames_removed(py).call(py, (exec_fn, code, dict), None)
+                        },
+                        None => {
+                            Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", name)))
+                        }
+                    }
+                },
+            }
+        } else {
+            // Raising here might make more sense, as exec_module() shouldn't
+            // be called on the Loader that didn't create the module.
+            Ok(py.None())
+        }
+    }
+
+    // End of importlib.abc.Loader interface.
+
+    // Start of importlib.abc.InspectLoader interface.
+
+    def get_code(&self, fullname: &PyString) -> PyResult<PyObject> {
+        let key = fullname.to_string(py)?;
+
+        if let Some(flavor) = self.known_modules(py).get(&*key) {
+            match flavor {
+                KnownModuleFlavor::Frozen => {
+                    let imp_module = self.imp_module(py);
+
+                    imp_module.call(py, "get_frozen_object", (fullname,), None)
+                },
+                KnownModuleFlavor::InMemory { module_data } => {
+                    match module_data.get_bytecode_memory_view(py) {
+                        Some(value) => {
+                            self.marshal_loads(py).call(py, (value,), None)
+                        }
+                        None => {
+                            Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", fullname)))
+                        }
+                    }
+                },
+                KnownModuleFlavor::Builtin => {
+                    Ok(py.None())
+                }
+            }
+        } else {
+            Ok(py.None())
+        }
+    }
+
+    def get_source(&self, fullname: &PyString) -> PyResult<PyObject> {
+        let key = fullname.to_string(py)?;
+
+        if let Some(flavor) = self.known_modules(py).get(&*key) {
+            if let KnownModuleFlavor::InMemory { module_data } = flavor {
+                match module_data.get_source_memory_view(py) {
+                    Some(value) => {
+                        // decode_source (from importlib._bootstrap_external)
+                        // can't handle memoryview. So we take the memory hit and
+                        // cast to bytes.
+                        let b = value.call_method(py, "tobytes", NoArgs, None)?;
+                        self.decode_source(py).call(py, (b,), None)
+                    },
+                    None => {
+                        Err(PyErr::new::<ImportError, _>(py, ("source not available", fullname)))
+                    }
+                }
+            } else {
+                Ok(py.None())
+            }
+        } else {
+            Ok(py.None())
+        }
+    }
+
+    // End of importlib.abc.InspectLoader interface.
+
+    // Support obtaining ResourceReader instances.
+    def get_resource_reader(&self, fullname: &PyString) -> PyResult<PyObject> {
+        let key = fullname.to_string(py)?;
+
+        // This should not happen since code below should not be recursive into this
+        // function.
+        let mut resource_readers = match self.resource_readers(py).try_borrow_mut() {
+            Ok(v) => v,
+            Err(_) => {
+                return Err(PyErr::new::<RuntimeError, _>(py, "resource reader already borrowed"));
+            }
+        };
+
+        // Return an existing instance if we have one.
+        if let Some(reader) = resource_readers.get(&*key) {
+            return Ok(reader.clone_ref(py));
+        }
+
+        // Only create a reader if the name is a package.
+        if self.packages(py).contains(&*key) {
+
+            // Not all packages have known resources.
+            let resources = match self.resources(py).get(&*key) {
+                Some(v) => v.clone(),
+                None => {
+                    let h: Box<HashMap<&'static str, &'static [u8]>> = Box::new(HashMap::new());
+                    Arc::new(h)
+                }
+            };
+
+            let reader = PyOxidizerResourceReader::create_instance(py, resources)?.into_object();
+            resource_readers.insert(key.to_string(), reader.clone_ref(py));
+
+            Ok(reader)
+        } else {
+            Ok(py.None())
+        }
+    }
+});
+
+#[allow(unused_doc_comments)]
+/// Implements in-memory reading of resource data.
+///
+/// Implements importlib.abc.ResourceReader.
+py_class!(class PyOxidizerResourceReader |py| {
+    data resources: Arc<Box<HashMap<&'static str, &'static [u8]>>>;
+
+    /// Returns an opened, file-like object for binary reading of the resource.
+    ///
+    /// If the resource cannot be found, FileNotFoundError is raised.
+    def open_resource(&self, resource: &PyString) -> PyResult<PyObject> {
+        let key = resource.to_string(py)?;
+
+        if let Some(data) = self.resources(py).get(&*key) {
+            match get_memory_view(py, data) {
+                Some(mv) => {
+                    let io_module = py.import("io")?;
+                    let bytes_io = io_module.get(py, "BytesIO")?;
+
+                    bytes_io.call(py, (mv,), None)
+                }
+                None => Err(PyErr::fetch(py))
+            }
+        } else {
+            Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
+        }
+    }
+
+    /// Returns the file system path to the resource.
+    ///
+    /// If the resource does not concretely exist on the file system, raise
+    /// FileNotFoundError.
+    def resource_path(&self, _resource: &PyString) -> PyResult<PyObject> {
+        Err(PyErr::new::<FileNotFoundError, _>(py, "in-memory resources do not have filesystem paths"))
+    }
+
+    /// Returns True if the named name is considered a resource. FileNotFoundError
+    /// is raised if name does not exist.
+    def is_resource(&self, name: &PyString) -> PyResult<PyObject> {
+        let key = name.to_string(py)?;
+
+        if self.resources(py).contains_key(&*key) {
+            Ok(py.True().as_object().clone_ref(py))
+        } else {
+            Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
+        }
+    }
+
+    /// Returns an iterable of strings over the contents of the package.
+    ///
+    /// Do note that it is not required that all names returned by the iterator be actual resources,
+    /// e.g. it is acceptable to return names for which is_resource() would be false.
+    ///
+    /// Allowing non-resource names to be returned is to allow for situations where how a package
+    /// and its resources are stored are known a priori and the non-resource names would be useful.
+    /// For instance, returning subdirectory names is allowed so that when it is known that the
+    /// package and resources are stored on the file system then those subdirectory names can be
+    /// used directly.
+    def contents(&self) -> PyResult<PyObject> {
+        let resources = self.resources(py);
+        let mut names = Vec::with_capacity(resources.len());
+
+        for name in resources.keys() {
+            names.push(name.to_py_object(py));
+        }
+
+        let names_list = names.to_py_object(py);
+
+        Ok(names_list.as_object().clone_ref(py))
+    }
+});
+
+const DOC: &[u8] = b"Binary representation of Python modules\0";
+
+/// Represents global module state to be passed at interpreter initialization time.
+#[derive(Debug)]
+pub struct InitModuleState {
+    /// Whether to register the filesystem importer on sys.meta_path.
+    pub register_filesystem_importer: bool,
+
+    /// Values to set on sys.path.
+    pub sys_paths: Vec<String>,
+
+    /// Raw data constituting Python module source code.
+    pub py_modules_data: &'static [u8],
+
+    /// Raw data constituting Python resources data.
+    pub py_resources_data: &'static [u8],
+}
+
+/// Holds reference to next module state struct.
+///
+/// This module state will be copied into the module's state when the
+/// Python module is initialized.
+pub static mut NEXT_MODULE_STATE: *const InitModuleState = std::ptr::null();
+
+/// Represents which importer to use for known modules.
+#[derive(Debug)]
+enum KnownModuleFlavor {
+    Builtin,
+    Frozen,
+    InMemory { module_data: PythonModuleData },
+}
+
+type KnownModules = HashMap<&'static str, KnownModuleFlavor>;
+
+/// State associated with each importer module instance.
+///
+/// We write per-module state to per-module instances of this struct so
+/// we don't rely on global variables and so multiple importer modules can
+/// exist without issue.
+#[derive(Debug)]
+struct ModuleState {
+    /// Whether to register PathFinder on sys.meta_path.
+    register_filesystem_importer: bool,
+
+    /// Values to set on sys.path.
+    sys_paths: Vec<String>,
+
+    /// Raw data constituting Python module source code.
+    py_modules_data: &'static [u8],
+
+    /// Raw data constituting Python resources data.
+    py_resources_data: &'static [u8],
+
+    /// Whether setup() has been called.
+    setup_called: bool,
+}
+
+/// Obtain the module state for an instance of our importer module.
+///
+/// Creates a Python exception on failure.
+///
+/// Doesn't do type checking that the PyModule is of the appropriate type.
+fn get_module_state<'a>(py: Python, m: &'a PyModule) -> Result<&'a mut ModuleState, PyErr> {
+    let ptr = m.as_object().as_ptr();
+    let state = unsafe { pyffi::PyModule_GetState(ptr) as *mut ModuleState };
+
+    if state.is_null() {
+        let err = PyErr::new::<ValueError, _>(py, "unable to retrieve module state");
+        return Err(err);
+    }
+
+    Ok(unsafe { &mut *state })
+}
+
+/// Initialize the Python module object.
+///
+/// This is called as part of the PyInit_* function to create the internal
+/// module object for the interpreter.
+///
+/// This receives a handle to the current Python interpreter and just-created
+/// Python module instance. It populates the internal module state and registers
+/// a _setup() on the module object for usage by Python.
+///
+/// Because this function accesses NEXT_MODULE_STATE, it should only be
+/// called during interpreter initialization.
+fn module_init(py: Python, m: &PyModule) -> PyResult<()> {
+    let mut state = get_module_state(py, m)?;
+
+    unsafe {
+        state.register_filesystem_importer = (*NEXT_MODULE_STATE).register_filesystem_importer;
+        // TODO we could move the value if we wanted to avoid the clone().
+        state.sys_paths = (*NEXT_MODULE_STATE).sys_paths.clone();
+        state.py_modules_data = (*NEXT_MODULE_STATE).py_modules_data;
+        state.py_resources_data = (*NEXT_MODULE_STATE).py_resources_data;
+    }
+
+    state.setup_called = false;
+
+    m.add(
+        py,
+        "_setup",
+        py_fn!(
+            py,
+            module_setup(
+                m: PyModule,
+                bootstrap_module: PyModule,
+                marshal_module: PyModule,
+                decode_source: PyObject
+            )
+        ),
+    )?;
+
+    Ok(())
+}
+
+/// Called after module import/initialization to configure the importing mechanism.
+///
+/// This does the heavy work of configuring the importing mechanism.
+///
+/// This function should only be called once as part of
+/// _frozen_importlib_external._install_external_importers().
+fn module_setup(
+    py: Python,
+    m: PyModule,
+    bootstrap_module: PyModule,
+    marshal_module: PyModule,
+    decode_source: PyObject,
+) -> PyResult<PyObject> {
+    let state = get_module_state(py, &m)?;
+
+    if state.setup_called {
+        return Err(PyErr::new::<RuntimeError, _>(
+            py,
+            "PyOxidizer _setup() already called",
+        ));
+    }
+
+    state.setup_called = true;
+
+    let imp_module = bootstrap_module.get(py, "_imp")?;
+    let imp_module = imp_module.cast_into::<PyModule>(py)?;
+    let sys_module = bootstrap_module.get(py, "sys")?;
+    let sys_module = sys_module.cast_as::<PyModule>(py)?;
+    let meta_path_object = sys_module.get(py, "meta_path")?;
+
+    // We should be executing as part of
+    // _frozen_importlib_external._install_external_importers().
+    // _frozen_importlib._install() should have already been called and set up
+    // sys.meta_path with [BuiltinImporter, FrozenImporter]. Those should be the
+    // only meta path importers present.
+
+    let meta_path = meta_path_object.cast_as::<PyList>(py)?;
+
+    if meta_path.len(py) != 2 {
+        return Err(PyErr::new::<ValueError, _>(
+            py,
+            "sys.meta_path does not contain 2 values",
+        ));
+    }
+
+    let builtin_importer = meta_path.get_item(py, 0);
+    let frozen_importer = meta_path.get_item(py, 1);
+
+    // It may seem inefficient to create a full HashMap of the parsed data instead of e.g.
+    // streaming it. But the overhead of iterators was measured to be more than building
+    // up a temporary HashMap.
+    let modules_data = match PythonModulesData::from(state.py_modules_data) {
+        Ok(v) => v,
+        Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
+    };
+
+    // Populate our known module lookup table with entries from builtins, frozens, and
+    // finally us. Last write wins and has the same effect as registering our
+    // meta path importer first. This should be safe. If nothing else, it allows
+    // some builtins to be overwritten by .py implemented modules.
+    let mut known_modules = KnownModules::with_capacity(modules_data.data.len() + 10);
+
+    for i in 0.. {
+        let record = unsafe { pyffi::PyImport_Inittab.offset(i) };
+
+        if unsafe { *record }.name.is_null() {
+            break;
+        }
+
+        let name = unsafe { CStr::from_ptr((*record).name as _) };
+        let name_str = match name.to_str() {
+            Ok(v) => v,
+            Err(_) => {
+                return Err(PyErr::new::<ValueError, _>(
+                    py,
+                    "unable to parse PyImport_Inittab",
+                ));
+            }
+        };
+
+        known_modules.insert(name_str, KnownModuleFlavor::Builtin);
+    }
+
+    for i in 0.. {
+        let record = unsafe { pyffi::PyImport_FrozenModules.offset(i) };
+
+        if unsafe { *record }.name.is_null() {
+            break;
+        }
+
+        let name = unsafe { CStr::from_ptr((*record).name as _) };
+        let name_str = match name.to_str() {
+            Ok(v) => v,
+            Err(_) => {
+                return Err(PyErr::new::<ValueError, _>(
+                    py,
+                    "unable to parse PyImport_FrozenModules",
+                ));
+            }
+        };
+
+        known_modules.insert(name_str, KnownModuleFlavor::Frozen);
+    }
+
+    for (name, record) in modules_data.data {
+        known_modules.insert(
+            name,
+            KnownModuleFlavor::InMemory {
+                module_data: record,
+            },
+        );
+    }
+
+    let resources_data = match PythonResourcesData::from(state.py_resources_data) {
+        Ok(v) => v,
+        Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
+    };
+
+    let marshal_loads = marshal_module.get(py, "loads")?;
+    let call_with_frames_removed = bootstrap_module.get(py, "_call_with_frames_removed")?;
+    let module_spec_type = bootstrap_module.get(py, "ModuleSpec")?;
+
+    let builtins_module =
+        match unsafe { PyObject::from_borrowed_ptr_opt(py, pyffi::PyEval_GetBuiltins()) } {
+            Some(o) => o.cast_into::<PyDict>(py),
+            None => {
+                return Err(PyErr::new::<ValueError, _>(
+                    py,
+                    "unable to obtain __builtins__",
+                ));
+            }
+        }?;
+
+    let exec_fn = match builtins_module.get_item(py, "exec") {
+        Some(v) => v,
+        None => {
+            return Err(PyErr::new::<ValueError, _>(
+                py,
+                "could not obtain __builtins__.exec",
+            ));
+        }
+    };
+
+    let resource_readers: RefCell<Box<HashMap<String, PyObject>>> =
+        RefCell::new(Box::new(HashMap::new()));
+
+    let unified_importer = PyOxidizerFinder::create_instance(
+        py,
+        imp_module,
+        marshal_loads,
+        builtin_importer,
+        frozen_importer,
+        call_with_frames_removed,
+        module_spec_type,
+        decode_source,
+        exec_fn,
+        modules_data.packages,
+        known_modules,
+        resources_data.packages,
+        resource_readers,
+    )?;
+    meta_path_object.call_method(py, "clear", NoArgs, None)?;
+    meta_path_object.call_method(py, "append", (unified_importer,), None)?;
+
+    // At this point the importing mechanism is fully initialized to use our
+    // unified importer, which handles built-in, frozen, and in-memory imports.
+
+    // Because we're probably running during Py_Initialize() and stdlib modules
+    // may not be in-memory, we need to register and configure additional importers
+    // here, before continuing with Py_Initialize(), otherwise we may not find
+    // the standard library!
+
+    if state.register_filesystem_importer {
+        // This is what importlib._bootstrap_external usually does:
+        // supported_loaders = _get_supported_file_loaders()
+        // sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
+        // sys.meta_path.append(PathFinder)
+        let frozen_importlib_external = py.import("_frozen_importlib_external")?;
+
+        let loaders =
+            frozen_importlib_external.call(py, "_get_supported_file_loaders", NoArgs, None)?;
+        let loaders_list = loaders.cast_as::<PyList>(py)?;
+        let loaders_vec: Vec<PyObject> = loaders_list.iter(py).collect();
+        let loaders_tuple = PyTuple::new(py, loaders_vec.as_slice());
+
+        let file_finder = frozen_importlib_external.get(py, "FileFinder")?;
+        let path_hook = file_finder.call_method(py, "path_hook", loaders_tuple, None)?;
+        let path_hooks = sys_module.get(py, "path_hooks")?;
+        path_hooks.call_method(py, "append", (path_hook,), None)?;
+
+        let path_finder = frozen_importlib_external.get(py, "PathFinder")?;
+        let meta_path = sys_module.get(py, "meta_path")?;
+        meta_path.call_method(py, "append", (path_finder,), None)?;
+    }
+
+    // Ideally we should be calling Py_SetPath() before Py_Initialize() to set sys.path.
+    // But we tried to do this and only ran into problems due to string conversions,
+    // unwanted side-effects. Updating sys.path directly before it is used by PathFinder
+    // (which was just registered above) should have the same effect.
+
+    // Always clear out sys.path.
+    let sys_path = sys_module.get(py, "path")?;
+    sys_path.call_method(py, "clear", NoArgs, None)?;
+
+    // And repopulate it with entries from the config.
+    for path in &state.sys_paths {
+        let py_path = PyString::new(py, path.as_str());
+
+        sys_path.call_method(py, "append", (py_path,), None)?;
+    }
+
+    Ok(py.None())
+}
+
+static mut MODULE_DEF: pyffi::PyModuleDef = pyffi::PyModuleDef {
+    m_base: pyffi::PyModuleDef_HEAD_INIT,
+    m_name: std::ptr::null(),
+    m_doc: std::ptr::null(),
+    m_size: std::mem::size_of::<ModuleState>() as isize,
+    m_methods: 0 as *mut _,
+    m_slots: 0 as *mut _,
+    m_traverse: None,
+    m_clear: None,
+    m_free: None,
+};
+
+/// Module initialization function.
+///
+/// This creates the Python module object.
+///
+/// We don't use the macros in the cpython crate because they are somewhat
+/// opinionated about how things should work. e.g. they call
+/// PyEval_InitThreads(), which is undesired. We want total control.
+#[allow(non_snake_case)]
+pub extern "C" fn PyInit__pyoxidizer_importer() -> *mut pyffi::PyObject {
+    let py = unsafe { cpython::Python::assume_gil_acquired() };
+
+    // TRACKING RUST1.32 We can't call as_ptr() in const fn in Rust 1.31.
+    unsafe {
+        if MODULE_DEF.m_name.is_null() {
+            MODULE_DEF.m_name = PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const _;
+            MODULE_DEF.m_doc = DOC.as_ptr() as *const _;
+        }
+    }
+
+    let module = unsafe { pyffi::PyModule_Create(&mut MODULE_DEF) };
+
+    if module.is_null() {
+        return module;
+    }
+
+    let module = match unsafe { PyObject::from_owned_ptr(py, module).cast_into::<PyModule>(py) } {
+        Ok(m) => m,
+        Err(e) => {
+            PyErr::from(e).restore(py);
+            return std::ptr::null_mut();
+        }
+    };
+
+    match module_init(py, &module) {
+        Ok(()) => module.into_object().steal_ptr(),
+        Err(e) => {
+            e.restore(py);
+            std::ptr::null_mut()
+        }
+    }
+}
diff --git a/rust/pyembed/src/data.rs b/rust/pyembed/src/data.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/data.rs
@@ -0,0 +1,5 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+include!(env!("PYEMBED_DATA_RS_PATH"));
diff --git a/rust/pyembed/src/config.rs b/rust/pyembed/src/config.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/config.rs
@@ -0,0 +1,195 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Data structures for configuring a Python interpreter.
+
+use python3_sys as pyffi;
+use std::ffi::CString;
+
+/// Defines which allocator to use for the raw domain.
+#[derive(Clone, Debug)]
+pub enum PythonRawAllocator {
+    /// Use jemalloc.
+    Jemalloc,
+    /// Use the Rust global allocator.
+    Rust,
+    /// Use the system allocator.
+    System,
+}
+
+/// Defines Python code to run.
+#[derive(Clone, Debug)]
+pub enum PythonRunMode {
+    /// No-op.
+    None,
+    /// Run a Python REPL.
+    Repl,
+    /// Run a Python module as the main module.
+    Module { module: String },
+    /// Evaluate Python code from a string.
+    Eval { code: String },
+}
+
+/// Defines `terminfo`` database resolution semantics.
+#[derive(Clone, Debug)]
+pub enum TerminfoResolution {
+    /// Resolve `terminfo` database using appropriate behavior for current OS.
+    Dynamic,
+    /// Do not attempt to resolve the `terminfo` database. Basically a no-op.
+    None,
+    /// Use a specified string as the `TERMINFO_DIRS` value.
+    Static(String),
+}
+
+/// Defines an extra extension module to load.
+#[derive(Clone, Debug)]
+pub struct ExtensionModule {
+    /// Name of the extension module.
+    pub name: CString,
+
+    /// Extension module initialization function.
+    pub init_func: unsafe extern "C" fn() -> *mut pyffi::PyObject,
+}
+
+/// Holds the configuration of an embedded Python interpreter.
+///
+/// Instances of this struct can be used to construct Python interpreters.
+///
+/// Each instance contains the total state to define the run-time behavior of
+/// a Python interpreter.
+#[derive(Clone, Debug)]
+pub struct PythonConfig {
+    /// Name of encoding for stdio handles.
+    pub standard_io_encoding: Option<String>,
+
+    /// Name of encoding error mode for stdio handles.
+    pub standard_io_errors: Option<String>,
+
+    /// Python optimization level.
+    pub opt_level: i32,
+
+    /// Whether to load our custom frozen importlib bootstrap modules.
+    pub use_custom_importlib: bool,
+
+    /// Whether to load the filesystem-based sys.meta_path finder.
+    pub filesystem_importer: bool,
+
+    /// Filesystem paths to add to sys.path.
+    ///
+    /// ``$ORIGIN`` will resolve to the directory of the application at
+    /// run-time.
+    pub sys_paths: Vec<String>,
+
+    /// Controls whether to detect comparing bytes/bytearray with str.
+    ///
+    /// If 1, issues a warning. If 2 or greater, raises a BytesWarning
+    /// exception.
+    pub bytes_warning: i32,
+
+    /// Whether to load the site.py module at initialization time.
+    pub import_site: bool,
+
+    /// Whether to load a user-specific site module at initialization time.
+    pub import_user_site: bool,
+
+    /// Whether to ignore various PYTHON* environment variables.
+    pub ignore_python_env: bool,
+
+    /// Whether to enter interactive mode after executing a script or a command.
+    pub inspect: bool,
+
+    /// Whether to put interpreter in interactive mode.
+    pub interactive: bool,
+
+    /// Whether to enable isolated mode.
+    pub isolated: bool,
+
+    /// If set, set the Windows filesystem encoding to mbcs and the filesystem
+    /// error handler to replace.
+    pub legacy_windows_fs_encoding: bool,
+
+    /// Whether io.File instead of io.WindowsConsoleIO for sys.stdin, sys.stdout,
+    /// and sys.stderr.
+    pub legacy_windows_stdio: bool,
+
+    /// Whether to suppress writing of ``.pyc`` files when importing ``.py``
+    /// files from the filesystem. This is typically irrelevant since modules
+    /// are imported from memory.
+    pub dont_write_bytecode: bool,
+
+    /// Whether stdout and stderr streams should be unbuffered.
+    pub unbuffered_stdio: bool,
+
+    /// Whether to enable parser debugging output.
+    pub parser_debug: bool,
+
+    /// Whether to enable quiet mode.
+    pub quiet: bool,
+
+    /// Whether to use the PYTHONHASHSEED environment variable to initialize the
+    /// hash seed.
+    pub use_hash_seed: bool,
+
+    /// Controls the level of the verbose mode for the interpreter.
+    pub verbose: i32,
+
+    /// Bytecode for the importlib._bootstrap / _frozen_importlib module.
+    pub frozen_importlib_data: &'static [u8],
+
+    /// Bytecode for the importlib._bootstrap_external / _frozen_importlib_external module.
+    pub frozen_importlib_external_data: &'static [u8],
+
+    /// Reference to raw Python modules data.
+    ///
+    /// The referenced data is produced as part of PyOxidizer packaging. This
+    /// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
+    pub py_modules_data: &'static [u8],
+
+    /// Reference to raw Python resources data.
+    ///
+    /// The referenced data is produced as part of PyOxidizer packaging. This
+    /// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
+    pub py_resources_data: &'static [u8],
+
+    /// Extra extension modules to make available to the interpreter.
+    ///
+    /// The values will effectively be passed to ``PyImport_ExtendInitTab()``.
+    pub extra_extension_modules: Vec<ExtensionModule>,
+
+    /// Whether to set sys.argvb with bytes versions of process arguments.
+    ///
+    /// On Windows, bytes will be UTF-16. On POSIX, bytes will be raw char*
+    /// values passed to `int main()`.
+    pub argvb: bool,
+
+    /// Whether to set sys.frozen=True.
+    ///
+    /// Setting this will enable Python to emulate "frozen" binaries, such as
+    /// those used by PyInstaller.
+    pub sys_frozen: bool,
+
+    /// Whether to set sys._MEIPASS to the directory of the executable.
+    ///
+    /// Setting this will enable Python to emulate PyInstaller's behavior
+    /// of setting this attribute.
+    pub sys_meipass: bool,
+
+    /// Which memory allocator to use for the raw domain.
+    pub raw_allocator: PythonRawAllocator,
+
+    /// How to resolve the `terminfo` database.
+    pub terminfo_resolution: TerminfoResolution,
+
+    /// Environment variable holding the directory to write a loaded modules file.
+    ///
+    /// If this value is set and the environment it refers to is set,
+    /// on interpreter shutdown, we will write a ``modules-<random>`` file to
+    /// the directory specified containing a ``\n`` delimited list of modules
+    /// loaded in ``sys.modules``.
+    pub write_modules_directory_env: Option<String>,
+
+    /// Defines what code to run by default.
+    ///
+    pub run: PythonRunMode,
+}
diff --git a/rust/pyembed/build.rs b/rust/pyembed/build.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/build.rs
@@ -0,0 +1,65 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use std::env;
+use std::path::PathBuf;
+use std::process;
+
+/// Path to pyoxidizer executable this file was created with.
+const DEFAULT_PYOXIDIZER_EXE: &str = r#"/Users/gps/.cargo/bin/pyoxidizer"#;
+
+fn main() {
+    // We support using pre-built artifacts, in which case we emit the
+    // cargo metadata lines from the "original" build to "register" the
+    // artifacts with this cargo invocation.
+    if env::var("PYOXIDIZER_REUSE_ARTIFACTS").is_ok() {
+        let artifact_dir_env = env::var("PYOXIDIZER_ARTIFACT_DIR");
+
+        let artifact_dir_path = match artifact_dir_env {
+            Ok(ref v) => PathBuf::from(v),
+            Err(_) => {
+                let out_dir = env::var("OUT_DIR").unwrap();
+                PathBuf::from(&out_dir)
+            }
+        };
+
+        println!(
+            "using pre-built artifacts from {}",
+            artifact_dir_path.display()
+        );
+
+        println!("cargo:rerun-if-env-changed=PYOXIDIZER_REUSE_ARTIFACTS");
+        println!("cargo:rerun-if-env-changed=PYOXIDIZER_ARTIFACT_DIR");
+
+        // Emit the cargo metadata lines to register libraries for linking.
+        let cargo_metadata_path = artifact_dir_path.join("cargo_metadata.txt");
+        let metadata = std::fs::read_to_string(&cargo_metadata_path)
+            .expect(format!("failed to read {}", cargo_metadata_path.display()).as_str());
+        println!("{}", metadata);
+    } else {
+        let pyoxidizer_exe = match env::var("PYOXIDIZER_EXE") {
+            Ok(value) => value,
+            Err(_) => DEFAULT_PYOXIDIZER_EXE.to_string(),
+        };
+
+        let pyoxidizer_path = PathBuf::from(&pyoxidizer_exe);
+
+        if !pyoxidizer_path.exists() {
+            panic!("pyoxidizer executable does not exist: {}", &pyoxidizer_exe);
+        }
+
+        match process::Command::new(&pyoxidizer_exe)
+            .arg("run-build-script")
+            .arg("build.rs")
+            .status()
+        {
+            Ok(status) => {
+                if !status.success() {
+                    panic!("`pyoxidizer run-build-script` failed");
+                }
+            }
+            Err(e) => panic!("`pyoxidizer run-build-script` failed: {}", e.to_string()),
+        }
+    }
+}
diff --git a/rust/pyembed/Cargo.toml b/rust/pyembed/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "pyembed"
+version = "0.3.0"
+authors = ["Gregory Szorc <gregory.szorc at gmail.com>"]
+edition = "2018"
+build = "build.rs"
+
+[dependencies]
+byteorder = "1"
+jemalloc-sys = { version = "0.3", optional = true }
+lazy_static = "1.3"
+libc = "0.2"
+uuid = { version = "0.7", features = ["v4"] }
+
+[dependencies.python3-sys]
+git = "https://github.com/indygreg/PyOxidizer.git"
+tag = "v0.3.0"
+
+[dependencies.cpython]
+git = "https://github.com/indygreg/PyOxidizer.git"
+tag = "v0.3.0"
+features = ["link-mode-unresolved-static", "python3-sys", "no-auto-initialize"]
+
+[features]
+default = []
+jemalloc = ["jemalloc-sys"]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -1,3 +1,3 @@
 [workspace]
 members = ["hg-core", "hg-direct-ffi", "hg-cpython"]
-exclude = ["chg", "hgcli"]
+exclude = ["chg", "hgcli", "pyembed"]



To: indygreg, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list