D7449: rust: vendor pyembed crate
indygreg (Gregory Szorc)
phabricator at mercurial-scm.org
Sat Nov 16 21:22:56 UTC 2019
indygreg created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
We want to use PyOxidizer to produce machine native executables
for running Mercurial. This commit starts the process of doing
that.
Under the hood, PyOxidizer uses a "pyembed" crate to manage an
embedded Python interpreter. This crate needs to be vendored
because it relies on a patched version of the rust-cpython
crate.
This commit vendors the pyembed crate associated with version
0.3 of PyOxidizer.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D7449
AFFECTED FILES
rust/Cargo.toml
rust/pyembed/Cargo.toml
rust/pyembed/build.rs
rust/pyembed/src/config.rs
rust/pyembed/src/data.rs
rust/pyembed/src/importer.rs
rust/pyembed/src/lib.rs
rust/pyembed/src/osutils.rs
rust/pyembed/src/pyalloc.rs
rust/pyembed/src/pyinterp.rs
rust/pyembed/src/pystr.rs
CHANGE DETAILS
diff --git a/rust/pyembed/src/pystr.rs b/rust/pyembed/src/pystr.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/pystr.rs
@@ -0,0 +1,98 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Bridge Rust and Python string types.
+
+use libc::{c_void, size_t, wchar_t};
+use python3_sys as pyffi;
+use std::ffi::{CString, OsString};
+use std::ptr::null_mut;
+
+#[cfg(target_family = "unix")]
+use std::os::unix::ffi::OsStrExt;
+#[cfg(target_family = "windows")]
+use std::os::windows::prelude::OsStrExt;
+
+use cpython::{PyObject, Python};
+
+#[derive(Debug)]
+pub struct OwnedPyStr {
+ data: *const wchar_t,
+}
+
+impl OwnedPyStr {
+ pub fn as_wchar_ptr(&self) -> *const wchar_t {
+ self.data
+ }
+
+ pub fn from_str(s: &str) -> Result<Self, &'static str> {
+ // We need to convert to a C string so there is a terminal NULL
+ // otherwise Py_DecodeLocale() can get confused.
+ let cs = CString::new(s).or_else(|_| Err("source string has NULL bytes"))?;
+
+ let size: *mut size_t = null_mut();
+ let ptr = unsafe { pyffi::Py_DecodeLocale(cs.as_ptr(), size) };
+
+ if ptr.is_null() {
+ Err("could not convert str to Python string")
+ } else {
+ Ok(OwnedPyStr { data: ptr })
+ }
+ }
+}
+
+impl Drop for OwnedPyStr {
+ fn drop(&mut self) {
+ unsafe { pyffi::PyMem_RawFree(self.data as *mut c_void) }
+ }
+}
+
+#[cfg(target_family = "unix")]
+const SURROGATEESCAPE: &[u8] = b"surrogateescape\0";
+
+#[cfg(target_family = "unix")]
+pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
+ // PyUnicode_DecodeLocaleAndSize says the input must have a trailing NULL.
+ // So use a CString for that.
+ let b = CString::new(s.as_bytes()).or_else(|_| Err("not a valid C string"))?;
+ unsafe {
+ let o = pyffi::PyUnicode_DecodeLocaleAndSize(
+ b.as_ptr() as *const i8,
+ b.to_bytes().len() as isize,
+ SURROGATEESCAPE.as_ptr() as *const i8,
+ );
+
+ Ok(PyObject::from_owned_ptr(py, o))
+ }
+}
+
+#[cfg(target_family = "windows")]
+pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
+ // Windows OsString should be valid UTF-16.
+ let w: Vec<u16> = s.encode_wide().collect();
+ unsafe {
+ Ok(PyObject::from_owned_ptr(
+ py,
+ pyffi::PyUnicode_FromWideChar(w.as_ptr(), w.len() as isize),
+ ))
+ }
+}
+
+#[cfg(target_family = "unix")]
+pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
+ let b = s.as_bytes();
+ unsafe {
+ let o = pyffi::PyBytes_FromStringAndSize(b.as_ptr() as *const i8, b.len() as isize);
+ PyObject::from_owned_ptr(py, o)
+ }
+}
+
+#[cfg(target_family = "windows")]
+pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
+ let w: Vec<u16> = s.encode_wide().collect();
+ unsafe {
+ let o = pyffi::PyBytes_FromStringAndSize(w.as_ptr() as *const i8, w.len() as isize * 2);
+ PyObject::from_owned_ptr(py, o)
+ }
+}
diff --git a/rust/pyembed/src/pyinterp.rs b/rust/pyembed/src/pyinterp.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/pyinterp.rs
@@ -0,0 +1,853 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Manage an embedded Python interpreter.
+
+use libc::c_char;
+use python3_sys as pyffi;
+use std::collections::BTreeSet;
+use std::env;
+use std::ffi::CString;
+use std::fs;
+use std::io::Write;
+use std::path::PathBuf;
+use std::ptr::null;
+
+use cpython::exc::ValueError;
+use cpython::{
+ GILGuard, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr, PyList, PyModule, PyObject, PyResult,
+ PyString, Python, PythonObject, ToPyObject,
+};
+
+use super::config::{PythonConfig, PythonRawAllocator, PythonRunMode, TerminfoResolution};
+use super::importer::PyInit__pyoxidizer_importer;
+use super::osutils::resolve_terminfo_dirs;
+#[cfg(feature = "jemalloc-sys")]
+use super::pyalloc::make_raw_jemalloc_allocator;
+use super::pyalloc::{make_raw_rust_memory_allocator, RawAllocator};
+use super::pystr::{osstring_to_bytes, osstring_to_str, OwnedPyStr};
+
+pub const PYOXIDIZER_IMPORTER_NAME: &[u8] = b"_pyoxidizer_importer\0";
+
+const FROZEN_IMPORTLIB_NAME: &[u8] = b"_frozen_importlib\0";
+const FROZEN_IMPORTLIB_EXTERNAL_NAME: &[u8] = b"_frozen_importlib_external\0";
+
+/// Represents the results of executing Python code with exception handling.
+#[derive(Debug)]
+pub enum PythonRunResult {
+ /// Code executed without raising an exception.
+ Ok {},
+ /// Code executed and raised an exception.
+ Err {},
+ /// Code executed and raised SystemExit with the specified exit code.
+ Exit { code: i32 },
+}
+
+fn make_custom_frozen_modules(config: &PythonConfig) -> [pyffi::_frozen; 3] {
+ [
+ pyffi::_frozen {
+ name: FROZEN_IMPORTLIB_NAME.as_ptr() as *const i8,
+ code: config.frozen_importlib_data.as_ptr(),
+ size: config.frozen_importlib_data.len() as i32,
+ },
+ pyffi::_frozen {
+ name: FROZEN_IMPORTLIB_EXTERNAL_NAME.as_ptr() as *const i8,
+ code: config.frozen_importlib_external_data.as_ptr(),
+ size: config.frozen_importlib_external_data.len() as i32,
+ },
+ pyffi::_frozen {
+ name: null(),
+ code: null(),
+ size: 0,
+ },
+ ]
+}
+
+#[cfg(windows)]
+extern "C" {
+ pub fn __acrt_iob_func(x: u32) -> *mut libc::FILE;
+}
+
+#[cfg(windows)]
+fn stdin_to_file() -> *mut libc::FILE {
+ // The stdin symbol is made available by importing <stdio.h>. On Windows,
+ // stdin is defined in corecrt_wstdio.h as a `#define` that calls this
+ // internal CRT function. There's no exported symbol to use. So we
+ // emulate the behavior of the C code.
+ //
+ // Relying on an internal CRT symbol is probably wrong. But Microsoft
+ // typically keeps backwards compatibility for undocumented functions
+ // like this because people use them in the wild.
+ //
+ // An attempt was made to use fdopen(0) like we do on POSIX. However,
+ // this causes a crash. The Microsoft C Runtime is already bending over
+ // backwards to coerce its native HANDLEs into POSIX file descriptors.
+ // Even if there are other ways to coerce a FILE* from a HANDLE
+ // (_open_osfhandle() + _fdopen() might work), using the same function
+ // that <stdio.h> uses to obtain a FILE* seems like the least risky thing
+ // to do.
+ unsafe { __acrt_iob_func(0) }
+}
+
+#[cfg(unix)]
+fn stdin_to_file() -> *mut libc::FILE {
+ unsafe { libc::fdopen(libc::STDIN_FILENO, &('r' as libc::c_char)) }
+}
+
+#[cfg(windows)]
+fn stderr_to_file() -> *mut libc::FILE {
+ unsafe { __acrt_iob_func(2) }
+}
+
+#[cfg(unix)]
+fn stderr_to_file() -> *mut libc::FILE {
+ unsafe { libc::fdopen(libc::STDERR_FILENO, &('w' as libc::c_char)) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
+ make_raw_jemalloc_allocator()
+}
+
+#[cfg(not(feature = "jemalloc-sys"))]
+fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
+ panic!("jemalloc is not available in this build configuration");
+}
+
+#[cfg(unix)]
+fn set_windows_flags(_config: &PythonConfig) {}
+
+#[cfg(windows)]
+fn set_windows_flags(config: &PythonConfig) {
+ unsafe {
+ pyffi::Py_LegacyWindowsFSEncodingFlag = if config.legacy_windows_fs_encoding {
+ 1
+ } else {
+ 0
+ };
+ pyffi::Py_LegacyWindowsStdioFlag = if config.legacy_windows_stdio { 1 } else { 0 };
+ }
+}
+
+/// Manages an embedded Python interpreter.
+///
+/// **Warning: Python interpreters have global state. There should only be a
+/// single instance of this type per process.**
+///
+/// Instances must only be constructed through [`MainPythonInterpreter::new()`](#method.new).
+///
+/// This type and its various functionality is a glorified wrapper around the
+/// Python C API. But there's a lot of added functionality on top of what the C
+/// API provides.
+///
+/// Both the low-level `python3-sys` and higher-level `cpython` crates are used.
+pub struct MainPythonInterpreter<'a> {
+ pub config: PythonConfig,
+ frozen_modules: [pyffi::_frozen; 3],
+ init_run: bool,
+ raw_allocator: Option<pyffi::PyMemAllocatorEx>,
+ raw_rust_allocator: Option<RawAllocator>,
+ gil: Option<GILGuard>,
+ py: Option<Python<'a>>,
+ program_name: Option<OwnedPyStr>,
+}
+
+impl<'a> MainPythonInterpreter<'a> {
+ /// Construct a Python interpreter from a configuration.
+ ///
+ /// The Python interpreter is initialized as a side-effect. The GIL is held.
+ pub fn new(config: PythonConfig) -> Result<MainPythonInterpreter<'a>, &'static str> {
+ match config.terminfo_resolution {
+ TerminfoResolution::Dynamic => {
+ if let Some(v) = resolve_terminfo_dirs() {
+ env::set_var("TERMINFO_DIRS", &v);
+ }
+ }
+ TerminfoResolution::Static(ref v) => {
+ env::set_var("TERMINFO_DIRS", v);
+ }
+ TerminfoResolution::None => {}
+ }
+
+ let (raw_allocator, raw_rust_allocator) = match config.raw_allocator {
+ PythonRawAllocator::Jemalloc => (Some(raw_jemallocator()), None),
+ PythonRawAllocator::Rust => (None, Some(make_raw_rust_memory_allocator())),
+ PythonRawAllocator::System => (None, None),
+ };
+
+ let frozen_modules = make_custom_frozen_modules(&config);
+
+ let mut res = MainPythonInterpreter {
+ config,
+ frozen_modules,
+ init_run: false,
+ raw_allocator,
+ raw_rust_allocator,
+ gil: None,
+ py: None,
+ program_name: None,
+ };
+
+ res.init()?;
+
+ Ok(res)
+ }
+
+ /// Initialize the interpreter.
+ ///
+ /// This mutates global state in the Python interpreter according to the
+ /// bound config and initializes the Python interpreter.
+ ///
+ /// After this is called, the embedded Python interpreter is ready to
+ /// execute custom code.
+ ///
+ /// If called more than once, the function is a no-op from the perspective
+ /// of interpreter initialization.
+ ///
+ /// Returns a Python instance which has the GIL acquired.
+ fn init(&mut self) -> Result<Python, &'static str> {
+ if self.init_run {
+ return Ok(self.acquire_gil());
+ }
+
+ let config = &self.config;
+
+ let exe = env::current_exe().or_else(|_| Err("could not obtain current exe"))?;
+ let origin = exe
+ .parent()
+ .ok_or_else(|| "unable to get exe parent")?
+ .display()
+ .to_string();
+
+ let sys_paths: Vec<String> = config
+ .sys_paths
+ .iter()
+ .map(|path| path.replace("$ORIGIN", &origin))
+ .collect();
+
+ // TODO should we call PyMem::SetupDebugHooks() if enabled?
+ if let Some(raw_allocator) = &self.raw_allocator {
+ unsafe {
+ let ptr = raw_allocator as *const _;
+ pyffi::PyMem_SetAllocator(
+ pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
+ ptr as *mut _,
+ );
+ }
+ } else if let Some(raw_rust_allocator) = &self.raw_rust_allocator {
+ unsafe {
+ let ptr = &raw_rust_allocator.allocator as *const _;
+ pyffi::PyMem_SetAllocator(
+ pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
+ ptr as *mut _,
+ );
+ }
+ }
+
+ // Module state is a bit wonky.
+ //
+ // Our in-memory importer relies on a special module which holds references
+ // to Python objects exposing module/resource data. This module is imported as
+ // part of initializing the Python interpreter.
+ //
+ // This Python module object needs to hold references to the raw Python module
+ // and resource data. Those references are defined by the InitModuleState struct.
+ //
+ // Unfortunately, we can't easily associate state with the interpreter before
+ // calling Py_Initialize(). And the module initialization function receives no
+ // arguments. Our solution is to update a global pointer to point at "our" state
+ // then call Py_Initialize(). The module will be initialized as part of calling
+ // Py_Initialize(). It will copy the contents at the pointer into the local
+ // module state and the global pointer will be unused after that. The end result
+ // is that we have no reliance on global variables outside of a short window
+ // between now and when Py_Initialize() is called.
+ //
+ // We could potentially do away with this global variable by using a closure for
+ // the initialization function. But this rabbit hole may involve gross hackery
+ // like dynamic module names. It probably isn't worth it.
+
+ // It is important for references in this struct to have a lifetime of at least
+ // that of the interpreter.
+ // TODO specify lifetimes so the compiler validates this for us.
+ let module_state = super::importer::InitModuleState {
+ register_filesystem_importer: self.config.filesystem_importer,
+ sys_paths,
+ py_modules_data: config.py_modules_data,
+ py_resources_data: config.py_resources_data,
+ };
+
+ if config.use_custom_importlib {
+ // Replace the frozen modules in the interpreter with our custom set
+ // that knows how to import from memory.
+ unsafe {
+ pyffi::PyImport_FrozenModules = self.frozen_modules.as_ptr();
+ }
+
+ // Register our _pyoxidizer_importer extension which provides importing functionality.
+ unsafe {
+ // name char* needs to live as long as the interpreter is active.
+ pyffi::PyImport_AppendInittab(
+ PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const i8,
+ Some(PyInit__pyoxidizer_importer),
+ );
+
+ // Move pointer to our stack allocated instance. This pointer will be
+ // accessed when creating the Python module object, which should be
+ // done automatically as part of low-level interpreter initialization
+ // when calling Py_Initialize() below.
+ super::importer::NEXT_MODULE_STATE = &module_state;
+ }
+ }
+
+ // TODO call PyImport_ExtendInitTab to avoid O(n) overhead.
+ for e in &config.extra_extension_modules {
+ let res = unsafe {
+ pyffi::PyImport_AppendInittab(e.name.as_ptr() as *const i8, Some(e.init_func))
+ };
+
+ if res != 0 {
+ return Err("unable to register extension module");
+ }
+ }
+
+ let exe_str = exe.to_str().ok_or_else(|| "unable to convert exe to str")?;
+
+ let home = OwnedPyStr::from_str(exe_str)?;
+
+ unsafe {
+ // Pointer needs to live for lifetime of interpreter.
+ pyffi::Py_SetPythonHome(home.as_wchar_ptr());
+ }
+
+ let program_name = OwnedPyStr::from_str(exe_str)?;
+
+ unsafe {
+ pyffi::Py_SetProgramName(program_name.as_wchar_ptr());
+ }
+
+ // Value needs to live for lifetime of interpreter.
+ self.program_name = Some(program_name);
+
+ // If we don't call Py_SetPath(), Python has its own logic for initializing it.
+ // We set it to an empty string because we don't want any paths by default. If
+ // we do have defined paths, they will be set after Py_Initialize().
+ unsafe {
+ // Value is copied internally. So short lifetime is OK.
+ let value = OwnedPyStr::from_str("")?;
+ pyffi::Py_SetPath(value.as_wchar_ptr());
+ }
+
+ if let (Some(ref encoding), Some(ref errors)) =
+ (&config.standard_io_encoding, &config.standard_io_errors)
+ {
+ let cencoding = CString::new(encoding.clone())
+ .or_else(|_| Err("unable to convert encoding to C string"))?;
+ let cerrors = CString::new(errors.clone())
+ .or_else(|_| Err("unable to convert encoding error mode to C string"))?;
+
+ let res = unsafe {
+ pyffi::Py_SetStandardStreamEncoding(
+ cencoding.as_ptr() as *const i8,
+ cerrors.as_ptr() as *const i8,
+ )
+ };
+
+ if res != 0 {
+ return Err("unable to set standard stream encoding");
+ }
+ }
+
+ unsafe {
+ pyffi::Py_BytesWarningFlag = config.bytes_warning;
+ pyffi::Py_DebugFlag = if config.parser_debug { 1 } else { 0 };
+ pyffi::Py_DontWriteBytecodeFlag = if config.dont_write_bytecode { 1 } else { 0 };
+ // TODO we could potentially have the config be an Option<i32> so we can control
+ // the hash seed explicitly. But the APIs in Python 3.7 aren't great here, as we'd
+ // need to set an environment variable. Once we support the new initialization
+ // API in Python 3.8, things will be easier to implement.
+ pyffi::Py_HashRandomizationFlag = if config.use_hash_seed { 1 } else { 0 };
+ pyffi::Py_IgnoreEnvironmentFlag = if config.ignore_python_env { 1 } else { 0 };
+ pyffi::Py_InteractiveFlag = if config.interactive { 1 } else { 0 };
+ pyffi::Py_InspectFlag = if config.inspect { 1 } else { 0 };
+ pyffi::Py_IsolatedFlag = if config.isolated { 1 } else { 0 };
+ pyffi::Py_NoSiteFlag = if config.import_site { 0 } else { 1 };
+ pyffi::Py_NoUserSiteDirectory = if config.import_user_site { 0 } else { 1 };
+ pyffi::Py_OptimizeFlag = config.opt_level;
+ pyffi::Py_QuietFlag = if config.quiet { 1 } else { 0 };
+ pyffi::Py_UnbufferedStdioFlag = if config.unbuffered_stdio { 1 } else { 0 };
+ pyffi::Py_VerboseFlag = config.verbose;
+ }
+
+ set_windows_flags(config);
+
+ /* Pre-initialization functions we could support:
+ *
+ * PyObject_SetArenaAllocator()
+ * PySys_AddWarnOption()
+ * PySys_AddXOption()
+ * PySys_ResetWarnOptions()
+ */
+
+ unsafe {
+ pyffi::Py_Initialize();
+ }
+
+ // We shouldn't be accessing this pointer after Py_Initialize(). And the
+ // memory is stack allocated and doesn't outlive this frame. We don't want
+ // to leave a stack pointer sitting around!
+ unsafe {
+ super::importer::NEXT_MODULE_STATE = std::ptr::null();
+ }
+
+ let py = unsafe { Python::assume_gil_acquired() };
+ self.py = Some(py);
+ self.init_run = true;
+
+ // env::args() panics if arguments aren't valid Unicode. But invalid
+ // Unicode arguments are possible and some applications may want to
+ // support them.
+ //
+ // env::args_os() provides access to the raw OsString instances, which
+ // will be derived from wchar_t on Windows and char* on POSIX. We can
+ // convert these to Python str instances using a platform-specific
+ // mechanism.
+ let args_objs = env::args_os()
+ .map(|os_arg| osstring_to_str(py, os_arg))
+ .collect::<Result<Vec<PyObject>, &'static str>>()?;
+
+ // This will steal the pointer to the elements and mem::forget them.
+ let args = PyList::new(py, &args_objs);
+ let argv = b"argv\0";
+
+ let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
+ pyffi::PySys_SetObject(argv.as_ptr() as *const i8, args_ptr)
+ });
+
+ match res {
+ 0 => (),
+ _ => return Err("unable to set sys.argv"),
+ }
+
+ if config.argvb {
+ let args_objs: Vec<PyObject> = env::args_os()
+ .map(|os_arg| osstring_to_bytes(py, os_arg))
+ .collect();
+
+ let args = PyList::new(py, &args_objs);
+ let argvb = b"argvb\0";
+
+ let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
+ pyffi::PySys_SetObject(argvb.as_ptr() as *const i8, args_ptr)
+ });
+
+ match res {
+ 0 => (),
+ _ => return Err("unable to set sys.argvb"),
+ }
+ }
+
+ // As a convention, sys.oxidized is set to indicate we are running from
+ // a self-contained application.
+ let oxidized = b"oxidized\0";
+
+ let res = py.True().with_borrowed_ptr(py, |py_true| unsafe {
+ pyffi::PySys_SetObject(oxidized.as_ptr() as *const i8, py_true)
+ });
+
+ match res {
+ 0 => (),
+ _ => return Err("unable to set sys.oxidized"),
+ }
+
+ if config.sys_frozen {
+ let frozen = b"frozen\0";
+
+ match py.True().with_borrowed_ptr(py, |py_true| unsafe {
+ pyffi::PySys_SetObject(frozen.as_ptr() as *const i8, py_true)
+ }) {
+ 0 => (),
+ _ => return Err("unable to set sys.frozen"),
+ }
+ }
+
+ if config.sys_meipass {
+ let meipass = b"_MEIPASS\0";
+ let value = PyString::new(py, &origin);
+
+ match value.with_borrowed_ptr(py, |py_value| unsafe {
+ pyffi::PySys_SetObject(meipass.as_ptr() as *const i8, py_value)
+ }) {
+ 0 => (),
+ _ => return Err("unable to set sys._MEIPASS"),
+ }
+ }
+
+ Ok(py)
+ }
+
+ /// Ensure the Python GIL is released.
+ pub fn release_gil(&mut self) {
+ if self.py.is_some() {
+ self.py = None;
+ self.gil = None;
+ }
+ }
+
+ /// Ensure the Python GIL is acquired, returning a handle on the interpreter.
+ pub fn acquire_gil(&mut self) -> Python<'a> {
+ match self.py {
+ Some(py) => py,
+ None => {
+ let gil = GILGuard::acquire();
+ let py = unsafe { Python::assume_gil_acquired() };
+
+ self.gil = Some(gil);
+ self.py = Some(py);
+
+ py
+ }
+ }
+ }
+
+ /// Runs the interpreter with the default code execution settings.
+ ///
+ /// The crate was built with settings that configure what should be
+ /// executed by default. Those settings will be loaded and executed.
+ pub fn run(&mut self) -> PyResult<PyObject> {
+ // clone() to avoid issues mixing mutable and immutable borrows of self.
+ let run = self.config.run.clone();
+
+ let py = self.acquire_gil();
+
+ match run {
+ PythonRunMode::None => Ok(py.None()),
+ PythonRunMode::Repl => self.run_repl(),
+ PythonRunMode::Module { module } => self.run_module_as_main(&module),
+ PythonRunMode::Eval { code } => self.run_code(&code),
+ }
+ }
+
+ /// Handle a raised SystemExit exception.
+ ///
+ /// This emulates the behavior in pythonrun.c:handle_system_exit() and
+ /// _Py_HandleSystemExit() but without the call to exit(), which we don't want.
+ fn handle_system_exit(&mut self, py: Python, err: PyErr) -> Result<i32, &'static str> {
+ std::io::stdout()
+ .flush()
+ .or_else(|_| Err("failed to flush stdout"))?;
+
+ let mut value = match err.pvalue {
+ Some(ref instance) => {
+ if instance.as_ptr() == py.None().as_ptr() {
+ return Ok(0);
+ }
+
+ instance.clone_ref(py)
+ }
+ None => {
+ return Ok(0);
+ }
+ };
+
+ if unsafe { pyffi::PyExceptionInstance_Check(value.as_ptr()) } != 0 {
+ // The error code should be in the "code" attribute.
+ if let Ok(code) = value.getattr(py, "code") {
+ if code == py.None() {
+ return Ok(0);
+ }
+
+ // Else pretend exc_value.code is the new exception value to use
+ // and fall through to below.
+ value = code;
+ }
+ }
+
+ if unsafe { pyffi::PyLong_Check(value.as_ptr()) } != 0 {
+ return Ok(unsafe { pyffi::PyLong_AsLong(value.as_ptr()) as i32 });
+ }
+
+ let sys_module = py
+ .import("sys")
+ .or_else(|_| Err("unable to obtain sys module"))?;
+ let stderr = sys_module.get(py, "stderr");
+
+ // This is a cargo cult from the canonical implementation.
+ unsafe { pyffi::PyErr_Clear() }
+
+ match stderr {
+ Ok(o) => unsafe {
+ pyffi::PyFile_WriteObject(value.as_ptr(), o.as_ptr(), pyffi::Py_PRINT_RAW);
+ },
+ Err(_) => {
+ unsafe {
+ pyffi::PyObject_Print(value.as_ptr(), stderr_to_file(), pyffi::Py_PRINT_RAW);
+ }
+ std::io::stderr()
+ .flush()
+ .or_else(|_| Err("failure to flush stderr"))?;
+ }
+ }
+
+ unsafe {
+ pyffi::PySys_WriteStderr(b"\n\0".as_ptr() as *const i8);
+ }
+
+ // This frees references to this exception, which may be necessary to avoid
+ // badness.
+ err.restore(py);
+ unsafe {
+ pyffi::PyErr_Clear();
+ }
+
+ Ok(1)
+ }
+
+ /// Runs the interpreter and handles any exception that was raised.
+ pub fn run_and_handle_error(&mut self) -> PythonRunResult {
+ // There are underdefined lifetime bugs at play here. There is no
+ // explicit lifetime for the PyObject's returned. If we don't have
+ // the local variable in scope, we can get into a situation where
+ // drop() on self is called before the PyObject's drop(). This is
+ // problematic because PyObject's drop() attempts to acquire the GIL.
+ // If the interpreter is shut down, there is no GIL to acquire, and
+ // we may segfault.
+ // TODO look into setting lifetimes properly so the compiler can
+ // prevent some issues.
+ let res = self.run();
+ let py = self.acquire_gil();
+
+ match res {
+ Ok(_) => PythonRunResult::Ok {},
+ Err(err) => {
+ // SystemExit is special in that PyErr_PrintEx() will call
+ // exit() if it is seen. So, we handle it manually so we can
+ // return an exit code instead of exiting.
+
+ // TODO surely the cpython crate offers a better way to do this...
+ err.restore(py);
+ let matches =
+ unsafe { pyffi::PyErr_ExceptionMatches(pyffi::PyExc_SystemExit) } != 0;
+ let err = cpython::PyErr::fetch(py);
+
+ if matches {
+ return PythonRunResult::Exit {
+ code: match self.handle_system_exit(py, err) {
+ Ok(code) => code,
+ Err(msg) => {
+ eprintln!("{}", msg);
+ 1
+ }
+ },
+ };
+ }
+
+ self.print_err(err);
+
+ PythonRunResult::Err {}
+ }
+ }
+ }
+
+ /// Calls run() and resolves a suitable exit code.
+ pub fn run_as_main(&mut self) -> i32 {
+ match self.run_and_handle_error() {
+ PythonRunResult::Ok {} => 0,
+ PythonRunResult::Err {} => 1,
+ PythonRunResult::Exit { code } => code,
+ }
+ }
+
+ /// Runs a Python module as the __main__ module.
+ ///
+ /// Returns the execution result of the module code.
+ ///
+ /// The interpreter is automatically initialized if needed.
+ pub fn run_module_as_main(&mut self, name: &str) -> PyResult<PyObject> {
+ let py = self.acquire_gil();
+
+ // This is modeled after runpy.py:_run_module_as_main().
+ let main: PyModule = unsafe {
+ PyObject::from_borrowed_ptr(
+ py,
+ pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const c_char),
+ )
+ .cast_into(py)?
+ };
+
+ let main_dict = main.dict(py);
+
+ let importlib_util = py.import("importlib.util")?;
+ let spec = importlib_util.call(py, "find_spec", (name,), None)?;
+ let loader = spec.getattr(py, "loader")?;
+ let code = loader.call_method(py, "get_code", (name,), None)?;
+
+ let origin = spec.getattr(py, "origin")?;
+ let cached = spec.getattr(py, "cached")?;
+
+ // TODO handle __package__.
+ main_dict.set_item(py, "__name__", "__main__")?;
+ main_dict.set_item(py, "__file__", origin)?;
+ main_dict.set_item(py, "__cached__", cached)?;
+ main_dict.set_item(py, "__doc__", py.None())?;
+ main_dict.set_item(py, "__loader__", loader)?;
+ main_dict.set_item(py, "__spec__", spec)?;
+
+ unsafe {
+ let globals = main_dict.as_object().as_ptr();
+ let res = pyffi::PyEval_EvalCode(code.as_ptr(), globals, globals);
+
+ if res.is_null() {
+ let err = PyErr::fetch(py);
+ err.print(py);
+ Err(PyErr::fetch(py))
+ } else {
+ Ok(PyObject::from_owned_ptr(py, res))
+ }
+ }
+ }
+
+ /// Start and run a Python REPL.
+ ///
+ /// This emulates what CPython's main.c does.
+ ///
+ /// The interpreter is automatically initialized if needed.
+ pub fn run_repl(&mut self) -> PyResult<PyObject> {
+ let py = self.acquire_gil();
+
+ unsafe {
+ pyffi::Py_InspectFlag = 0;
+ }
+
+ // readline is optional. We don't care if it fails.
+ if py.import("readline").is_ok() {}
+
+ let sys = py.import("sys")?;
+
+ if let Ok(hook) = sys.get(py, "__interactivehook__") {
+ hook.call(py, NoArgs, None)?;
+ }
+
+ let stdin_filename = "<stdin>";
+ let filename = CString::new(stdin_filename)
+ .or_else(|_| Err(PyErr::new::<ValueError, _>(py, "could not create CString")))?;
+ let mut cf = pyffi::PyCompilerFlags { cf_flags: 0 };
+
+ // TODO use return value.
+ unsafe {
+ let stdin = stdin_to_file();
+ pyffi::PyRun_AnyFileExFlags(stdin, filename.as_ptr() as *const c_char, 0, &mut cf)
+ };
+
+ Ok(py.None())
+ }
+
+ /// Runs Python code provided by a string.
+ ///
+ /// This is similar to what ``python -c <code>`` would do.
+ ///
+ /// The interpreter is automatically initialized if needed.
+ pub fn run_code(&mut self, code: &str) -> PyResult<PyObject> {
+ let py = self.acquire_gil();
+
+ let code = CString::new(code).or_else(|_| {
+ Err(PyErr::new::<ValueError, _>(
+ py,
+ "source code is not a valid C string",
+ ))
+ })?;
+
+ unsafe {
+ let main = pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const _);
+
+ if main.is_null() {
+ return Err(PyErr::fetch(py));
+ }
+
+ let main_dict = pyffi::PyModule_GetDict(main);
+
+ let res = pyffi::PyRun_StringFlags(
+ code.as_ptr() as *const _,
+ pyffi::Py_file_input,
+ main_dict,
+ main_dict,
+ std::ptr::null_mut(),
+ );
+
+ if res.is_null() {
+ Err(PyErr::fetch(py))
+ } else {
+ Ok(PyObject::from_owned_ptr(py, res))
+ }
+ }
+ }
+
+ /// Print a Python error.
+ ///
+ /// Under the hood this calls ``PyErr_PrintEx()``, which may call
+ /// ``Py_Exit()`` and may write to stderr.
+ pub fn print_err(&mut self, err: PyErr) {
+ let py = self.acquire_gil();
+ err.print(py);
+ }
+}
+
+/// Write loaded Python modules to a directory.
+///
+/// Given a Python interpreter and a path to a directory, this will create a
+/// file in that directory named ``modules-<UUID>`` and write a ``\n`` delimited
+/// list of loaded names from ``sys.modules`` into that file.
+fn write_modules_to_directory(py: Python, path: &PathBuf) -> Result<(), &'static str> {
+ // TODO this needs better error handling all over.
+
+ fs::create_dir_all(path).or_else(|_| Err("could not create directory for modules"))?;
+
+ let rand = uuid::Uuid::new_v4();
+
+ let path = path.join(format!("modules-{}", rand.to_string()));
+
+ let sys = py
+ .import("sys")
+ .or_else(|_| Err("could not obtain sys module"))?;
+ let modules = sys
+ .get(py, "modules")
+ .or_else(|_| Err("could not obtain sys.modules"))?;
+
+ let modules = modules
+ .cast_as::<PyDict>(py)
+ .or_else(|_| Err("sys.modules is not a dict"))?;
+
+ let mut names = BTreeSet::new();
+ for (key, _value) in modules.items(py) {
+ names.insert(
+ key.extract::<String>(py)
+ .or_else(|_| Err("module name is not a str"))?,
+ );
+ }
+
+ let mut f = fs::File::create(path).or_else(|_| Err("could not open file for writing"))?;
+
+ for name in names {
+ f.write_fmt(format_args!("{}\n", name))
+ .or_else(|_| Err("could not write"))?;
+ }
+
+ Ok(())
+}
+
+impl<'a> Drop for MainPythonInterpreter<'a> {
+ fn drop(&mut self) {
+ if let Some(key) = &self.config.write_modules_directory_env {
+ if let Ok(path) = env::var(key) {
+ let path = PathBuf::from(path);
+ let py = self.acquire_gil();
+
+ if let Err(msg) = write_modules_to_directory(py, &path) {
+ eprintln!("error writing modules file: {}", msg);
+ }
+ }
+ }
+
+ let _ = unsafe { pyffi::Py_FinalizeEx() };
+ }
+}
diff --git a/rust/pyembed/src/pyalloc.rs b/rust/pyembed/src/pyalloc.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/pyalloc.rs
@@ -0,0 +1,221 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Custom Python memory allocators.
+
+#[cfg(feature = "jemalloc-sys")]
+use jemalloc_sys as jemallocffi;
+use libc::{c_void, size_t};
+use python3_sys as pyffi;
+use std::alloc;
+use std::collections::HashMap;
+#[cfg(feature = "jemalloc-sys")]
+use std::ptr::null_mut;
+
+const MIN_ALIGN: usize = 16;
+
+type RawAllocatorState = HashMap<*mut u8, alloc::Layout>;
+
+/// Holds state for the raw memory allocator.
+///
+/// Ideally we wouldn't need to track state. But Rust's dealloc() API
+/// requires passing in a Layout that matches the allocation. This means
+/// we need to track the Layout for each allocation. This data structure
+/// facilitates that.
+///
+/// TODO HashMap isn't thread safe and the Python raw allocator doesn't
+/// hold the GIL. So we need a thread safe map or a mutex guarding access.
+pub struct RawAllocator {
+ pub allocator: pyffi::PyMemAllocatorEx,
+ _state: Box<RawAllocatorState>,
+}
+
+extern "C" fn raw_rust_malloc(ctx: *mut c_void, size: size_t) -> *mut c_void {
+ // PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
+ // non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
+ // instead.
+ let size = match size {
+ 0 => 1,
+ val => val,
+ };
+
+ unsafe {
+ let state = ctx as *mut RawAllocatorState;
+ let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
+ let res = alloc::alloc(layout);
+
+ (*state).insert(res, layout);
+
+ //println!("allocated {} bytes to {:?}", size, res);
+ res as *mut c_void
+ }
+}
+
+extern "C" fn raw_rust_calloc(ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
+ // PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
+ // size zero bytes returns a distinct non-NULL pointer if possible, as if
+ // PyMem_RawCalloc(1, 1) had been called instead.
+ let size = match nelem * elsize {
+ 0 => 1,
+ val => val,
+ };
+
+ unsafe {
+ let state = ctx as *mut RawAllocatorState;
+ let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
+ let res = alloc::alloc_zeroed(layout);
+
+ (*state).insert(res, layout);
+
+ //println!("zero allocated {} bytes to {:?}", size, res);
+
+ res as *mut c_void
+ }
+}
+
+extern "C" fn raw_rust_realloc(
+ ctx: *mut c_void,
+ ptr: *mut c_void,
+ new_size: size_t,
+) -> *mut c_void {
+ //println!("reallocating {:?} to {} bytes", ptr as *mut u8, new_size);
+
+ // PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
+ // PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
+ // resized but is not freed, and the returned pointer is non-NULL.
+ if ptr.is_null() {
+ return raw_rust_malloc(ctx, new_size);
+ }
+
+ let new_size = match new_size {
+ 0 => 1,
+ val => val,
+ };
+
+ unsafe {
+ let state = ctx as *mut RawAllocatorState;
+ let layout = alloc::Layout::from_size_align_unchecked(new_size, MIN_ALIGN);
+
+ let key = ptr as *mut u8;
+ let old_layout = (*state)
+ .remove(&key)
+ .expect("original memory address not tracked");
+
+ let res = alloc::realloc(ptr as *mut u8, old_layout, new_size);
+
+ (*state).insert(res, layout);
+
+ res as *mut c_void
+ }
+}
+
+extern "C" fn raw_rust_free(ctx: *mut c_void, ptr: *mut c_void) {
+ if ptr.is_null() {
+ return;
+ }
+
+ //println!("freeing {:?}", ptr as *mut u8);
+ unsafe {
+ let state = ctx as *mut RawAllocatorState;
+
+ let key = ptr as *mut u8;
+ let layout = (*state)
+ .get(&key)
+ .expect(format!("could not find allocated memory record: {:?}", key).as_str());
+
+ alloc::dealloc(key, *layout);
+ (*state).remove(&key);
+ }
+}
+
+pub fn make_raw_rust_memory_allocator() -> RawAllocator {
+ // We need to allocate the HashMap on the heap so the pointer doesn't refer
+ // to the stack. We rebox and add the Box to our struct so lifetimes are
+ // managed.
+ let alloc = Box::new(HashMap::<*mut u8, alloc::Layout>::new());
+ let state = Box::into_raw(alloc);
+
+ let allocator = pyffi::PyMemAllocatorEx {
+ ctx: state as *mut c_void,
+ malloc: Some(raw_rust_malloc),
+ calloc: Some(raw_rust_calloc),
+ realloc: Some(raw_rust_realloc),
+ free: Some(raw_rust_free),
+ };
+
+ RawAllocator {
+ allocator,
+ _state: unsafe { Box::from_raw(state) },
+ }
+}
+
+// Now let's define a raw memory allocator that interfaces directly with jemalloc.
+// This avoids the overhead of going through Rust's allocation layer.
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_malloc(_ctx: *mut c_void, size: size_t) -> *mut c_void {
+ // PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
+ // non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
+ // instead.
+ let size = match size {
+ 0 => 1,
+ val => val,
+ };
+
+ unsafe { jemallocffi::mallocx(size, 0) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_calloc(_ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
+ // PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
+ // size zero bytes returns a distinct non-NULL pointer if possible, as if
+ // PyMem_RawCalloc(1, 1) had been called instead.
+ let size = match nelem * elsize {
+ 0 => 1,
+ val => val,
+ };
+
+ unsafe { jemallocffi::mallocx(size, jemallocffi::MALLOCX_ZERO) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_realloc(
+ ctx: *mut c_void,
+ ptr: *mut c_void,
+ new_size: size_t,
+) -> *mut c_void {
+ // PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
+ // PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
+ // resized but is not freed, and the returned pointer is non-NULL.
+ if ptr.is_null() {
+ return raw_jemalloc_malloc(ctx, new_size);
+ }
+
+ let new_size = match new_size {
+ 0 => 1,
+ val => val,
+ };
+
+ unsafe { jemallocffi::rallocx(ptr, new_size, 0) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+extern "C" fn raw_jemalloc_free(_ctx: *mut c_void, ptr: *mut c_void) {
+ if ptr.is_null() {
+ return;
+ }
+
+ unsafe { jemallocffi::dallocx(ptr, 0) }
+}
+
+#[cfg(feature = "jemalloc-sys")]
+pub fn make_raw_jemalloc_allocator() -> pyffi::PyMemAllocatorEx {
+ pyffi::PyMemAllocatorEx {
+ ctx: null_mut(),
+ malloc: Some(raw_jemalloc_malloc),
+ calloc: Some(raw_jemalloc_calloc),
+ realloc: Some(raw_jemalloc_realloc),
+ free: Some(raw_jemalloc_free),
+ }
+}
diff --git a/rust/pyembed/src/osutils.rs b/rust/pyembed/src/osutils.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/osutils.rs
@@ -0,0 +1,147 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use lazy_static::lazy_static;
+use std::path::{Path, PathBuf};
+
+/// terminfo directories for Debian based distributions.
+///
+/// Search for `--with-terminfo-dirs` at
+/// https://salsa.debian.org/debian/ncurses/blob/master/debian/rules to find
+/// the source of truth for this.
+const TERMINFO_DIRS_DEBIAN: &str = "/etc/terminfo:/lib/terminfo:/usr/share/terminfo";
+
+/// terminfo directories for RedHat based distributions.
+///
+/// CentOS compiled with
+/// `--with-terminfo-dirs=%{_sysconfdir}/terminfo:%{_datadir}/terminfo`.
+const TERMINFO_DIRS_REDHAT: &str = "/etc/terminfo:/usr/share/terminfo";
+
+/// terminfo directories for macOS.
+const TERMINFO_DIRS_MACOS: &str = "/usr/share/terminfo";
+
+lazy_static! {
+ static ref TERMINFO_DIRS_COMMON: Vec<PathBuf> = {
+ vec![
+ PathBuf::from("/usr/local/etc/terminfo"),
+ PathBuf::from("/usr/local/lib/terminfo"),
+ PathBuf::from("/usr/local/share/terminfo"),
+ PathBuf::from("/etc/terminfo"),
+ PathBuf::from("/usr/lib/terminfo"),
+ PathBuf::from("/lib/terminfo"),
+ PathBuf::from("/usr/share/terminfo"),
+ ]
+ };
+}
+
+#[derive(Clone)]
+enum OsVariant {
+ Linux,
+ MacOs,
+ Windows,
+ Other,
+}
+
+enum LinuxDistroVariant {
+ Debian,
+ RedHat,
+ Unknown,
+}
+
+lazy_static! {
+ static ref TARGET_OS: OsVariant = {
+ if cfg!(target_os = "linux") {
+ OsVariant::Linux
+ } else if cfg!(target_os = "macos") {
+ OsVariant::MacOs
+ } else if cfg!(target_os = "windows") {
+ OsVariant::Windows
+ } else {
+ OsVariant::Other
+ }
+ };
+}
+
+struct OsInfo {
+ os: OsVariant,
+ linux_distro: Option<LinuxDistroVariant>,
+}
+
+fn resolve_linux_distro() -> LinuxDistroVariant {
+ // Attempt to resolve the Linux distro by parsing /etc files.
+ let os_release = Path::new("/etc/os-release");
+
+ if let Ok(data) = std::fs::read_to_string(os_release) {
+ for line in data.split("\n") {
+ if line.starts_with("ID_LIKE=") {
+ if line.contains("debian") {
+ return LinuxDistroVariant::Debian;
+ } else if line.contains("rhel") || line.contains("fedora") {
+ return LinuxDistroVariant::RedHat;
+ }
+ } else if line.starts_with("ID=") {
+ if line.contains("fedora") {
+ return LinuxDistroVariant::RedHat;
+ }
+ }
+ }
+ }
+
+ LinuxDistroVariant::Unknown
+}
+
+fn resolve_os_info() -> OsInfo {
+ let os = TARGET_OS.clone();
+ let linux_distro = match os {
+ OsVariant::Linux => Some(resolve_linux_distro()),
+ _ => None,
+ };
+
+ OsInfo { os, linux_distro }
+}
+
+/// Attempt to resolve the value for the `TERMINFO_DIRS` environment variable.
+///
+/// Returns Some() value that `TERMINFO_DIRS` should be set to or None if
+/// no environment variable should be set.
+pub fn resolve_terminfo_dirs() -> Option<String> {
+ // Always respect an environment variable, if present.
+ if std::env::var("TERMINFO_DIRS").is_ok() {
+ return None;
+ }
+
+ let os_info = resolve_os_info();
+
+ match os_info.os {
+ OsVariant::Linux => match os_info.linux_distro.unwrap() {
+ // TODO we could stat() the well-known paths ourselves and omit
+ // paths that don't exist. This /might/ save some syscalls, since
+ // ncurses doesn't appear to be the most frugal w.r.t. filesystem
+ // requests.
+ LinuxDistroVariant::Debian => Some(TERMINFO_DIRS_DEBIAN.to_string()),
+ LinuxDistroVariant::RedHat => Some(TERMINFO_DIRS_REDHAT.to_string()),
+ LinuxDistroVariant::Unknown => {
+ // We don't know this Linux variant. Look for common terminfo
+ // database directories and use paths that are found.
+ let paths = TERMINFO_DIRS_COMMON
+ .iter()
+ .filter_map(|p| {
+ if p.exists() {
+ Some(p.display().to_string())
+ } else {
+ None
+ }
+ })
+ .collect::<Vec<String>>()
+ .join(":");
+
+ Some(paths)
+ }
+ },
+ OsVariant::MacOs => Some(TERMINFO_DIRS_MACOS.to_string()),
+ // Windows doesn't use the terminfo database.
+ OsVariant::Windows => None,
+ OsVariant::Other => None,
+ }
+}
diff --git a/rust/pyembed/src/lib.rs b/rust/pyembed/src/lib.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/lib.rs
@@ -0,0 +1,35 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+/*!
+Manage an embedded Python interpreter.
+
+The `pyembed` crate contains functionality for managing a Python interpreter
+embedded in the current binary. This crate is typically used along with
+[PyOxidizer](https://github.com/indygreg/PyOxidizer) for producing
+self-contained binaries containing Python.
+
+The most important types are [`PythonConfig`](struct.PythonConfig.html) and
+[`MainPythonInterpreter`](struct.MainPythonInterpreter.html). A `PythonConfig`
+defines how a Python interpreter is to behave. A `MainPythonInterpreter`
+creates and manages that interpreter and serves as a high-level interface for
+running code in the interpreter.
+*/
+
+mod config;
+mod data;
+mod importer;
+mod osutils;
+mod pyalloc;
+mod pyinterp;
+mod pystr;
+
+#[allow(unused_imports)]
+pub use crate::config::PythonConfig;
+
+#[allow(unused_imports)]
+pub use crate::data::default_python_config;
+
+#[allow(unused_imports)]
+pub use crate::pyinterp::MainPythonInterpreter;
diff --git a/rust/pyembed/src/importer.rs b/rust/pyembed/src/importer.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/importer.rs
@@ -0,0 +1,911 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+/*!
+Functionality for a Python importer.
+
+This module defines a Python meta path importer and associated functionality
+for importing Python modules from memory.
+*/
+
+use std::cell::RefCell;
+use std::collections::{HashMap, HashSet};
+use std::ffi::CStr;
+use std::io::Cursor;
+use std::sync::Arc;
+
+use byteorder::{LittleEndian, ReadBytesExt};
+use cpython::exc::{FileNotFoundError, ImportError, RuntimeError, ValueError};
+use cpython::{
+ py_class, py_class_impl, py_coerce_item, py_fn, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr,
+ PyList, PyModule, PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
+};
+use python3_sys as pyffi;
+use python3_sys::{PyBUF_READ, PyMemoryView_FromMemory};
+
+use super::pyinterp::PYOXIDIZER_IMPORTER_NAME;
+
+/// Obtain a Python memoryview referencing a memory slice.
+///
+/// New memoryview allows Python to access the underlying memory without
+/// copying it.
+#[inline]
+fn get_memory_view(py: Python, data: &'static [u8]) -> Option<PyObject> {
+ let ptr = unsafe { PyMemoryView_FromMemory(data.as_ptr() as _, data.len() as _, PyBUF_READ) };
+ unsafe { PyObject::from_owned_ptr_opt(py, ptr) }
+}
+
+/// Holds pointers to Python module data in memory.
+#[derive(Debug)]
+struct PythonModuleData {
+ source: Option<&'static [u8]>,
+ bytecode: Option<&'static [u8]>,
+}
+
+impl PythonModuleData {
+ /// Obtain a PyMemoryView instance for source data.
+ fn get_source_memory_view(&self, py: Python) -> Option<PyObject> {
+ match self.source {
+ Some(data) => get_memory_view(py, data),
+ None => None,
+ }
+ }
+
+ /// Obtain a PyMemoryView instance for bytecode data.
+ fn get_bytecode_memory_view(&self, py: Python) -> Option<PyObject> {
+ match self.bytecode {
+ Some(data) => get_memory_view(py, data),
+ None => None,
+ }
+ }
+}
+
+/// Represents Python modules data in memory.
+///
+/// This is essentially an index over a raw backing blob.
+struct PythonModulesData {
+ /// Packages in this set of modules.
+ packages: HashSet<&'static str>,
+
+ /// Maps module name to source/bytecode.
+ data: HashMap<&'static str, PythonModuleData>,
+}
+
+impl PythonModulesData {
+ /// Construct a new instance from a memory slice.
+ fn from(data: &'static [u8]) -> Result<PythonModulesData, &'static str> {
+ let mut reader = Cursor::new(data);
+
+ let count = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading count"))?;
+
+ let mut index = Vec::with_capacity(count as usize);
+ let mut total_names_length = 0;
+ let mut total_sources_length = 0;
+ let mut package_count = 0;
+
+ for _ in 0..count {
+ let name_length = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading name length"))?
+ as usize;
+ let source_length = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading source length"))?
+ as usize;
+ let bytecode_length = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading bytecode length"))?
+ as usize;
+ let flags = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading module flags"))?;
+
+ let is_package = flags & 0x01 != 0;
+
+ if is_package {
+ package_count += 1;
+ }
+
+ index.push((name_length, source_length, bytecode_length, is_package));
+ total_names_length += name_length;
+ total_sources_length += source_length;
+ }
+
+ let mut res = HashMap::with_capacity(count as usize);
+ let mut packages = HashSet::with_capacity(package_count);
+ let sources_start_offset = reader.position() as usize + total_names_length;
+ let bytecodes_start_offset = sources_start_offset + total_sources_length;
+
+ let mut sources_current_offset: usize = 0;
+ let mut bytecodes_current_offset: usize = 0;
+
+ for (name_length, source_length, bytecode_length, is_package) in index {
+ let offset = reader.position() as usize;
+
+ let name =
+ unsafe { std::str::from_utf8_unchecked(&data[offset..offset + name_length]) };
+
+ let source_offset = sources_start_offset + sources_current_offset;
+ let source = if source_length > 0 {
+ Some(&data[source_offset..source_offset + source_length])
+ } else {
+ None
+ };
+
+ let bytecode_offset = bytecodes_start_offset + bytecodes_current_offset;
+ let bytecode = if bytecode_length > 0 {
+ Some(&data[bytecode_offset..bytecode_offset + bytecode_length])
+ } else {
+ None
+ };
+
+ reader.set_position(offset as u64 + name_length as u64);
+
+ sources_current_offset += source_length;
+ bytecodes_current_offset += bytecode_length;
+
+ if is_package {
+ packages.insert(name);
+ }
+
+ // Extension modules will have their names present to populate the
+ // packages set. So only populate module data if we have data for it.
+ if source.is_some() || bytecode.is_some() {
+ res.insert(name, PythonModuleData { source, bytecode });
+ }
+ }
+
+ Ok(PythonModulesData {
+ packages,
+ data: res,
+ })
+ }
+}
+
+/// Represents Python resources data in memory.
+///
+/// This is essentially an index over a raw backing blob.
+struct PythonResourcesData {
+ packages: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>,
+}
+
+impl PythonResourcesData {
+ fn from(data: &'static [u8]) -> Result<PythonResourcesData, &'static str> {
+ let mut reader = Cursor::new(data);
+
+ let package_count = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading package count"))? as usize;
+
+ let mut index = Vec::with_capacity(package_count);
+ let mut total_names_length = 0;
+
+ for _ in 0..package_count {
+ let package_name_length = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading package name length"))?
+ as usize;
+ let resource_count = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading resource count"))?
+ as usize;
+
+ total_names_length += package_name_length;
+
+ let mut package_index = Vec::with_capacity(resource_count);
+
+ for _ in 0..resource_count {
+ let resource_name_length = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading resource name length"))?
+ as usize;
+ let resource_data_length = reader
+ .read_u32::<LittleEndian>()
+ .or_else(|_| Err("failed reading resource data length"))?
+ as usize;
+
+ total_names_length += resource_name_length;
+
+ package_index.push((resource_name_length, resource_data_length));
+ }
+
+ index.push((package_name_length, package_index));
+ }
+
+ let mut name_offset = reader.position() as usize;
+ let mut data_offset = name_offset + total_names_length;
+ let mut res = HashMap::new();
+
+ for (package_name_length, package_index) in index {
+ let package_name = unsafe {
+ std::str::from_utf8_unchecked(&data[name_offset..name_offset + package_name_length])
+ };
+
+ name_offset += package_name_length;
+
+ let mut package_data = Box::new(HashMap::new());
+
+ for (resource_name_length, resource_data_length) in package_index {
+ let resource_name = unsafe {
+ std::str::from_utf8_unchecked(
+ &data[name_offset..name_offset + resource_name_length],
+ )
+ };
+
+ name_offset += resource_name_length;
+
+ let resource_data = &data[data_offset..data_offset + resource_data_length];
+
+ data_offset += resource_data_length;
+
+ package_data.insert(resource_name, resource_data);
+ }
+
+ res.insert(package_name, Arc::new(package_data));
+ }
+
+ Ok(PythonResourcesData { packages: res })
+ }
+}
+
+#[allow(unused_doc_comments)]
+/// Python type to import modules.
+///
+/// This type implements the importlib.abc.MetaPathFinder interface for
+/// finding/loading modules. It supports loading various flavors of modules,
+/// allowing it to be the only registered sys.meta_path importer.
+py_class!(class PyOxidizerFinder |py| {
+ data imp_module: PyModule;
+ data marshal_loads: PyObject;
+ data builtin_importer: PyObject;
+ data frozen_importer: PyObject;
+ data call_with_frames_removed: PyObject;
+ data module_spec_type: PyObject;
+ data decode_source: PyObject;
+ data exec_fn: PyObject;
+ data packages: HashSet<&'static str>;
+ data known_modules: KnownModules;
+ data resources: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>;
+ data resource_readers: RefCell<Box<HashMap<String, PyObject>>>;
+
+ // Start of importlib.abc.MetaPathFinder interface.
+
+ def find_spec(&self, fullname: &PyString, path: &PyObject, target: Option<PyObject> = None) -> PyResult<PyObject> {
+ let key = fullname.to_string(py)?;
+
+ if let Some(flavor) = self.known_modules(py).get(&*key) {
+ match flavor {
+ KnownModuleFlavor::Builtin => {
+ // BuiltinImporter.find_spec() always returns None if `path` is defined.
+ // And it doesn't use `target`. So don't proxy these values.
+ self.builtin_importer(py).call_method(py, "find_spec", (fullname,), None)
+ }
+ KnownModuleFlavor::Frozen => {
+ self.frozen_importer(py).call_method(py, "find_spec", (fullname, path, target), None)
+ }
+ KnownModuleFlavor::InMemory { .. } => {
+ let is_package = self.packages(py).contains(&*key);
+
+ // TODO consider setting origin and has_location so __file__ will be
+ // populated.
+
+ let kwargs = PyDict::new(py);
+ kwargs.set_item(py, "is_package", is_package)?;
+
+ self.module_spec_type(py).call(py, (fullname, self), Some(&kwargs))
+ }
+ }
+ } else {
+ Ok(py.None())
+ }
+ }
+
+ def find_module(&self, _fullname: &PyObject, _path: &PyObject) -> PyResult<PyObject> {
+ // Method is deprecated. Always returns None.
+ // We /could/ call find_spec(). Meh.
+ Ok(py.None())
+ }
+
+ def invalidate_caches(&self) -> PyResult<PyObject> {
+ Ok(py.None())
+ }
+
+ // End of importlib.abc.MetaPathFinder interface.
+
+ // Start of importlib.abc.Loader interface.
+
+ def create_module(&self, _spec: &PyObject) -> PyResult<PyObject> {
+ Ok(py.None())
+ }
+
+ def exec_module(&self, module: &PyObject) -> PyResult<PyObject> {
+ let name = module.getattr(py, "__name__")?;
+ let key = name.extract::<String>(py)?;
+
+ if let Some(flavor) = self.known_modules(py).get(&*key) {
+ match flavor {
+ KnownModuleFlavor::Builtin => {
+ self.builtin_importer(py).call_method(py, "exec_module", (module,), None)
+ },
+ KnownModuleFlavor::Frozen => {
+ self.frozen_importer(py).call_method(py, "exec_module", (module,), None)
+ },
+ KnownModuleFlavor::InMemory { module_data } => {
+ match module_data.get_bytecode_memory_view(py) {
+ Some(value) => {
+ let code = self.marshal_loads(py).call(py, (value,), None)?;
+ let exec_fn = self.exec_fn(py);
+ let dict = module.getattr(py, "__dict__")?;
+
+ self.call_with_frames_removed(py).call(py, (exec_fn, code, dict), None)
+ },
+ None => {
+ Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", name)))
+ }
+ }
+ },
+ }
+ } else {
+ // Raising here might make more sense, as exec_module() shouldn't
+ // be called on the Loader that didn't create the module.
+ Ok(py.None())
+ }
+ }
+
+ // End of importlib.abc.Loader interface.
+
+ // Start of importlib.abc.InspectLoader interface.
+
+ def get_code(&self, fullname: &PyString) -> PyResult<PyObject> {
+ let key = fullname.to_string(py)?;
+
+ if let Some(flavor) = self.known_modules(py).get(&*key) {
+ match flavor {
+ KnownModuleFlavor::Frozen => {
+ let imp_module = self.imp_module(py);
+
+ imp_module.call(py, "get_frozen_object", (fullname,), None)
+ },
+ KnownModuleFlavor::InMemory { module_data } => {
+ match module_data.get_bytecode_memory_view(py) {
+ Some(value) => {
+ self.marshal_loads(py).call(py, (value,), None)
+ }
+ None => {
+ Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", fullname)))
+ }
+ }
+ },
+ KnownModuleFlavor::Builtin => {
+ Ok(py.None())
+ }
+ }
+ } else {
+ Ok(py.None())
+ }
+ }
+
+ def get_source(&self, fullname: &PyString) -> PyResult<PyObject> {
+ let key = fullname.to_string(py)?;
+
+ if let Some(flavor) = self.known_modules(py).get(&*key) {
+ if let KnownModuleFlavor::InMemory { module_data } = flavor {
+ match module_data.get_source_memory_view(py) {
+ Some(value) => {
+ // decode_source (from importlib._bootstrap_external)
+ // can't handle memoryview. So we take the memory hit and
+ // cast to bytes.
+ let b = value.call_method(py, "tobytes", NoArgs, None)?;
+ self.decode_source(py).call(py, (b,), None)
+ },
+ None => {
+ Err(PyErr::new::<ImportError, _>(py, ("source not available", fullname)))
+ }
+ }
+ } else {
+ Ok(py.None())
+ }
+ } else {
+ Ok(py.None())
+ }
+ }
+
+ // End of importlib.abc.InspectLoader interface.
+
+ // Support obtaining ResourceReader instances.
+ def get_resource_reader(&self, fullname: &PyString) -> PyResult<PyObject> {
+ let key = fullname.to_string(py)?;
+
+ // This should not happen since code below should not be recursive into this
+ // function.
+ let mut resource_readers = match self.resource_readers(py).try_borrow_mut() {
+ Ok(v) => v,
+ Err(_) => {
+ return Err(PyErr::new::<RuntimeError, _>(py, "resource reader already borrowed"));
+ }
+ };
+
+ // Return an existing instance if we have one.
+ if let Some(reader) = resource_readers.get(&*key) {
+ return Ok(reader.clone_ref(py));
+ }
+
+ // Only create a reader if the name is a package.
+ if self.packages(py).contains(&*key) {
+
+ // Not all packages have known resources.
+ let resources = match self.resources(py).get(&*key) {
+ Some(v) => v.clone(),
+ None => {
+ let h: Box<HashMap<&'static str, &'static [u8]>> = Box::new(HashMap::new());
+ Arc::new(h)
+ }
+ };
+
+ let reader = PyOxidizerResourceReader::create_instance(py, resources)?.into_object();
+ resource_readers.insert(key.to_string(), reader.clone_ref(py));
+
+ Ok(reader)
+ } else {
+ Ok(py.None())
+ }
+ }
+});
+
+#[allow(unused_doc_comments)]
+/// Implements in-memory reading of resource data.
+///
+/// Implements importlib.abc.ResourceReader.
+py_class!(class PyOxidizerResourceReader |py| {
+ data resources: Arc<Box<HashMap<&'static str, &'static [u8]>>>;
+
+ /// Returns an opened, file-like object for binary reading of the resource.
+ ///
+ /// If the resource cannot be found, FileNotFoundError is raised.
+ def open_resource(&self, resource: &PyString) -> PyResult<PyObject> {
+ let key = resource.to_string(py)?;
+
+ if let Some(data) = self.resources(py).get(&*key) {
+ match get_memory_view(py, data) {
+ Some(mv) => {
+ let io_module = py.import("io")?;
+ let bytes_io = io_module.get(py, "BytesIO")?;
+
+ bytes_io.call(py, (mv,), None)
+ }
+ None => Err(PyErr::fetch(py))
+ }
+ } else {
+ Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
+ }
+ }
+
+ /// Returns the file system path to the resource.
+ ///
+ /// If the resource does not concretely exist on the file system, raise
+ /// FileNotFoundError.
+ def resource_path(&self, _resource: &PyString) -> PyResult<PyObject> {
+ Err(PyErr::new::<FileNotFoundError, _>(py, "in-memory resources do not have filesystem paths"))
+ }
+
+ /// Returns True if the named name is considered a resource. FileNotFoundError
+ /// is raised if name does not exist.
+ def is_resource(&self, name: &PyString) -> PyResult<PyObject> {
+ let key = name.to_string(py)?;
+
+ if self.resources(py).contains_key(&*key) {
+ Ok(py.True().as_object().clone_ref(py))
+ } else {
+ Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
+ }
+ }
+
+ /// Returns an iterable of strings over the contents of the package.
+ ///
+ /// Do note that it is not required that all names returned by the iterator be actual resources,
+ /// e.g. it is acceptable to return names for which is_resource() would be false.
+ ///
+ /// Allowing non-resource names to be returned is to allow for situations where how a package
+ /// and its resources are stored are known a priori and the non-resource names would be useful.
+ /// For instance, returning subdirectory names is allowed so that when it is known that the
+ /// package and resources are stored on the file system then those subdirectory names can be
+ /// used directly.
+ def contents(&self) -> PyResult<PyObject> {
+ let resources = self.resources(py);
+ let mut names = Vec::with_capacity(resources.len());
+
+ for name in resources.keys() {
+ names.push(name.to_py_object(py));
+ }
+
+ let names_list = names.to_py_object(py);
+
+ Ok(names_list.as_object().clone_ref(py))
+ }
+});
+
+const DOC: &[u8] = b"Binary representation of Python modules\0";
+
+/// Represents global module state to be passed at interpreter initialization time.
+#[derive(Debug)]
+pub struct InitModuleState {
+ /// Whether to register the filesystem importer on sys.meta_path.
+ pub register_filesystem_importer: bool,
+
+ /// Values to set on sys.path.
+ pub sys_paths: Vec<String>,
+
+ /// Raw data constituting Python module source code.
+ pub py_modules_data: &'static [u8],
+
+ /// Raw data constituting Python resources data.
+ pub py_resources_data: &'static [u8],
+}
+
+/// Holds reference to next module state struct.
+///
+/// This module state will be copied into the module's state when the
+/// Python module is initialized.
+pub static mut NEXT_MODULE_STATE: *const InitModuleState = std::ptr::null();
+
+/// Represents which importer to use for known modules.
+#[derive(Debug)]
+enum KnownModuleFlavor {
+ Builtin,
+ Frozen,
+ InMemory { module_data: PythonModuleData },
+}
+
+type KnownModules = HashMap<&'static str, KnownModuleFlavor>;
+
+/// State associated with each importer module instance.
+///
+/// We write per-module state to per-module instances of this struct so
+/// we don't rely on global variables and so multiple importer modules can
+/// exist without issue.
+#[derive(Debug)]
+struct ModuleState {
+ /// Whether to register PathFinder on sys.meta_path.
+ register_filesystem_importer: bool,
+
+ /// Values to set on sys.path.
+ sys_paths: Vec<String>,
+
+ /// Raw data constituting Python module source code.
+ py_modules_data: &'static [u8],
+
+ /// Raw data constituting Python resources data.
+ py_resources_data: &'static [u8],
+
+ /// Whether setup() has been called.
+ setup_called: bool,
+}
+
+/// Obtain the module state for an instance of our importer module.
+///
+/// Creates a Python exception on failure.
+///
+/// Doesn't do type checking that the PyModule is of the appropriate type.
+fn get_module_state<'a>(py: Python, m: &'a PyModule) -> Result<&'a mut ModuleState, PyErr> {
+ let ptr = m.as_object().as_ptr();
+ let state = unsafe { pyffi::PyModule_GetState(ptr) as *mut ModuleState };
+
+ if state.is_null() {
+ let err = PyErr::new::<ValueError, _>(py, "unable to retrieve module state");
+ return Err(err);
+ }
+
+ Ok(unsafe { &mut *state })
+}
+
+/// Initialize the Python module object.
+///
+/// This is called as part of the PyInit_* function to create the internal
+/// module object for the interpreter.
+///
+/// This receives a handle to the current Python interpreter and just-created
+/// Python module instance. It populates the internal module state and registers
+/// a _setup() on the module object for usage by Python.
+///
+/// Because this function accesses NEXT_MODULE_STATE, it should only be
+/// called during interpreter initialization.
+fn module_init(py: Python, m: &PyModule) -> PyResult<()> {
+ let mut state = get_module_state(py, m)?;
+
+ unsafe {
+ state.register_filesystem_importer = (*NEXT_MODULE_STATE).register_filesystem_importer;
+ // TODO we could move the value if we wanted to avoid the clone().
+ state.sys_paths = (*NEXT_MODULE_STATE).sys_paths.clone();
+ state.py_modules_data = (*NEXT_MODULE_STATE).py_modules_data;
+ state.py_resources_data = (*NEXT_MODULE_STATE).py_resources_data;
+ }
+
+ state.setup_called = false;
+
+ m.add(
+ py,
+ "_setup",
+ py_fn!(
+ py,
+ module_setup(
+ m: PyModule,
+ bootstrap_module: PyModule,
+ marshal_module: PyModule,
+ decode_source: PyObject
+ )
+ ),
+ )?;
+
+ Ok(())
+}
+
+/// Called after module import/initialization to configure the importing mechanism.
+///
+/// This does the heavy work of configuring the importing mechanism.
+///
+/// This function should only be called once as part of
+/// _frozen_importlib_external._install_external_importers().
+fn module_setup(
+ py: Python,
+ m: PyModule,
+ bootstrap_module: PyModule,
+ marshal_module: PyModule,
+ decode_source: PyObject,
+) -> PyResult<PyObject> {
+ let state = get_module_state(py, &m)?;
+
+ if state.setup_called {
+ return Err(PyErr::new::<RuntimeError, _>(
+ py,
+ "PyOxidizer _setup() already called",
+ ));
+ }
+
+ state.setup_called = true;
+
+ let imp_module = bootstrap_module.get(py, "_imp")?;
+ let imp_module = imp_module.cast_into::<PyModule>(py)?;
+ let sys_module = bootstrap_module.get(py, "sys")?;
+ let sys_module = sys_module.cast_as::<PyModule>(py)?;
+ let meta_path_object = sys_module.get(py, "meta_path")?;
+
+ // We should be executing as part of
+ // _frozen_importlib_external._install_external_importers().
+ // _frozen_importlib._install() should have already been called and set up
+ // sys.meta_path with [BuiltinImporter, FrozenImporter]. Those should be the
+ // only meta path importers present.
+
+ let meta_path = meta_path_object.cast_as::<PyList>(py)?;
+
+ if meta_path.len(py) != 2 {
+ return Err(PyErr::new::<ValueError, _>(
+ py,
+ "sys.meta_path does not contain 2 values",
+ ));
+ }
+
+ let builtin_importer = meta_path.get_item(py, 0);
+ let frozen_importer = meta_path.get_item(py, 1);
+
+ // It may seem inefficient to create a full HashMap of the parsed data instead of e.g.
+ // streaming it. But the overhead of iterators was measured to be more than building
+ // up a temporary HashMap.
+ let modules_data = match PythonModulesData::from(state.py_modules_data) {
+ Ok(v) => v,
+ Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
+ };
+
+ // Populate our known module lookup table with entries from builtins, frozens, and
+ // finally us. Last write wins and has the same effect as registering our
+ // meta path importer first. This should be safe. If nothing else, it allows
+ // some builtins to be overwritten by .py implemented modules.
+ let mut known_modules = KnownModules::with_capacity(modules_data.data.len() + 10);
+
+ for i in 0.. {
+ let record = unsafe { pyffi::PyImport_Inittab.offset(i) };
+
+ if unsafe { *record }.name.is_null() {
+ break;
+ }
+
+ let name = unsafe { CStr::from_ptr((*record).name as _) };
+ let name_str = match name.to_str() {
+ Ok(v) => v,
+ Err(_) => {
+ return Err(PyErr::new::<ValueError, _>(
+ py,
+ "unable to parse PyImport_Inittab",
+ ));
+ }
+ };
+
+ known_modules.insert(name_str, KnownModuleFlavor::Builtin);
+ }
+
+ for i in 0.. {
+ let record = unsafe { pyffi::PyImport_FrozenModules.offset(i) };
+
+ if unsafe { *record }.name.is_null() {
+ break;
+ }
+
+ let name = unsafe { CStr::from_ptr((*record).name as _) };
+ let name_str = match name.to_str() {
+ Ok(v) => v,
+ Err(_) => {
+ return Err(PyErr::new::<ValueError, _>(
+ py,
+ "unable to parse PyImport_FrozenModules",
+ ));
+ }
+ };
+
+ known_modules.insert(name_str, KnownModuleFlavor::Frozen);
+ }
+
+ for (name, record) in modules_data.data {
+ known_modules.insert(
+ name,
+ KnownModuleFlavor::InMemory {
+ module_data: record,
+ },
+ );
+ }
+
+ let resources_data = match PythonResourcesData::from(state.py_resources_data) {
+ Ok(v) => v,
+ Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
+ };
+
+ let marshal_loads = marshal_module.get(py, "loads")?;
+ let call_with_frames_removed = bootstrap_module.get(py, "_call_with_frames_removed")?;
+ let module_spec_type = bootstrap_module.get(py, "ModuleSpec")?;
+
+ let builtins_module =
+ match unsafe { PyObject::from_borrowed_ptr_opt(py, pyffi::PyEval_GetBuiltins()) } {
+ Some(o) => o.cast_into::<PyDict>(py),
+ None => {
+ return Err(PyErr::new::<ValueError, _>(
+ py,
+ "unable to obtain __builtins__",
+ ));
+ }
+ }?;
+
+ let exec_fn = match builtins_module.get_item(py, "exec") {
+ Some(v) => v,
+ None => {
+ return Err(PyErr::new::<ValueError, _>(
+ py,
+ "could not obtain __builtins__.exec",
+ ));
+ }
+ };
+
+ let resource_readers: RefCell<Box<HashMap<String, PyObject>>> =
+ RefCell::new(Box::new(HashMap::new()));
+
+ let unified_importer = PyOxidizerFinder::create_instance(
+ py,
+ imp_module,
+ marshal_loads,
+ builtin_importer,
+ frozen_importer,
+ call_with_frames_removed,
+ module_spec_type,
+ decode_source,
+ exec_fn,
+ modules_data.packages,
+ known_modules,
+ resources_data.packages,
+ resource_readers,
+ )?;
+ meta_path_object.call_method(py, "clear", NoArgs, None)?;
+ meta_path_object.call_method(py, "append", (unified_importer,), None)?;
+
+ // At this point the importing mechanism is fully initialized to use our
+ // unified importer, which handles built-in, frozen, and in-memory imports.
+
+ // Because we're probably running during Py_Initialize() and stdlib modules
+ // may not be in-memory, we need to register and configure additional importers
+ // here, before continuing with Py_Initialize(), otherwise we may not find
+ // the standard library!
+
+ if state.register_filesystem_importer {
+ // This is what importlib._bootstrap_external usually does:
+ // supported_loaders = _get_supported_file_loaders()
+ // sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
+ // sys.meta_path.append(PathFinder)
+ let frozen_importlib_external = py.import("_frozen_importlib_external")?;
+
+ let loaders =
+ frozen_importlib_external.call(py, "_get_supported_file_loaders", NoArgs, None)?;
+ let loaders_list = loaders.cast_as::<PyList>(py)?;
+ let loaders_vec: Vec<PyObject> = loaders_list.iter(py).collect();
+ let loaders_tuple = PyTuple::new(py, loaders_vec.as_slice());
+
+ let file_finder = frozen_importlib_external.get(py, "FileFinder")?;
+ let path_hook = file_finder.call_method(py, "path_hook", loaders_tuple, None)?;
+ let path_hooks = sys_module.get(py, "path_hooks")?;
+ path_hooks.call_method(py, "append", (path_hook,), None)?;
+
+ let path_finder = frozen_importlib_external.get(py, "PathFinder")?;
+ let meta_path = sys_module.get(py, "meta_path")?;
+ meta_path.call_method(py, "append", (path_finder,), None)?;
+ }
+
+ // Ideally we should be calling Py_SetPath() before Py_Initialize() to set sys.path.
+ // But we tried to do this and only ran into problems due to string conversions,
+ // unwanted side-effects. Updating sys.path directly before it is used by PathFinder
+ // (which was just registered above) should have the same effect.
+
+ // Always clear out sys.path.
+ let sys_path = sys_module.get(py, "path")?;
+ sys_path.call_method(py, "clear", NoArgs, None)?;
+
+ // And repopulate it with entries from the config.
+ for path in &state.sys_paths {
+ let py_path = PyString::new(py, path.as_str());
+
+ sys_path.call_method(py, "append", (py_path,), None)?;
+ }
+
+ Ok(py.None())
+}
+
+static mut MODULE_DEF: pyffi::PyModuleDef = pyffi::PyModuleDef {
+ m_base: pyffi::PyModuleDef_HEAD_INIT,
+ m_name: std::ptr::null(),
+ m_doc: std::ptr::null(),
+ m_size: std::mem::size_of::<ModuleState>() as isize,
+ m_methods: 0 as *mut _,
+ m_slots: 0 as *mut _,
+ m_traverse: None,
+ m_clear: None,
+ m_free: None,
+};
+
+/// Module initialization function.
+///
+/// This creates the Python module object.
+///
+/// We don't use the macros in the cpython crate because they are somewhat
+/// opinionated about how things should work. e.g. they call
+/// PyEval_InitThreads(), which is undesired. We want total control.
+#[allow(non_snake_case)]
+pub extern "C" fn PyInit__pyoxidizer_importer() -> *mut pyffi::PyObject {
+ let py = unsafe { cpython::Python::assume_gil_acquired() };
+
+ // TRACKING RUST1.32 We can't call as_ptr() in const fn in Rust 1.31.
+ unsafe {
+ if MODULE_DEF.m_name.is_null() {
+ MODULE_DEF.m_name = PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const _;
+ MODULE_DEF.m_doc = DOC.as_ptr() as *const _;
+ }
+ }
+
+ let module = unsafe { pyffi::PyModule_Create(&mut MODULE_DEF) };
+
+ if module.is_null() {
+ return module;
+ }
+
+ let module = match unsafe { PyObject::from_owned_ptr(py, module).cast_into::<PyModule>(py) } {
+ Ok(m) => m,
+ Err(e) => {
+ PyErr::from(e).restore(py);
+ return std::ptr::null_mut();
+ }
+ };
+
+ match module_init(py, &module) {
+ Ok(()) => module.into_object().steal_ptr(),
+ Err(e) => {
+ e.restore(py);
+ std::ptr::null_mut()
+ }
+ }
+}
diff --git a/rust/pyembed/src/data.rs b/rust/pyembed/src/data.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/data.rs
@@ -0,0 +1,5 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+include!(env!("PYEMBED_DATA_RS_PATH"));
diff --git a/rust/pyembed/src/config.rs b/rust/pyembed/src/config.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/src/config.rs
@@ -0,0 +1,195 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Data structures for configuring a Python interpreter.
+
+use python3_sys as pyffi;
+use std::ffi::CString;
+
+/// Defines which allocator to use for the raw domain.
+#[derive(Clone, Debug)]
+pub enum PythonRawAllocator {
+ /// Use jemalloc.
+ Jemalloc,
+ /// Use the Rust global allocator.
+ Rust,
+ /// Use the system allocator.
+ System,
+}
+
+/// Defines Python code to run.
+#[derive(Clone, Debug)]
+pub enum PythonRunMode {
+ /// No-op.
+ None,
+ /// Run a Python REPL.
+ Repl,
+ /// Run a Python module as the main module.
+ Module { module: String },
+ /// Evaluate Python code from a string.
+ Eval { code: String },
+}
+
+/// Defines `terminfo`` database resolution semantics.
+#[derive(Clone, Debug)]
+pub enum TerminfoResolution {
+ /// Resolve `terminfo` database using appropriate behavior for current OS.
+ Dynamic,
+ /// Do not attempt to resolve the `terminfo` database. Basically a no-op.
+ None,
+ /// Use a specified string as the `TERMINFO_DIRS` value.
+ Static(String),
+}
+
+/// Defines an extra extension module to load.
+#[derive(Clone, Debug)]
+pub struct ExtensionModule {
+ /// Name of the extension module.
+ pub name: CString,
+
+ /// Extension module initialization function.
+ pub init_func: unsafe extern "C" fn() -> *mut pyffi::PyObject,
+}
+
+/// Holds the configuration of an embedded Python interpreter.
+///
+/// Instances of this struct can be used to construct Python interpreters.
+///
+/// Each instance contains the total state to define the run-time behavior of
+/// a Python interpreter.
+#[derive(Clone, Debug)]
+pub struct PythonConfig {
+ /// Name of encoding for stdio handles.
+ pub standard_io_encoding: Option<String>,
+
+ /// Name of encoding error mode for stdio handles.
+ pub standard_io_errors: Option<String>,
+
+ /// Python optimization level.
+ pub opt_level: i32,
+
+ /// Whether to load our custom frozen importlib bootstrap modules.
+ pub use_custom_importlib: bool,
+
+ /// Whether to load the filesystem-based sys.meta_path finder.
+ pub filesystem_importer: bool,
+
+ /// Filesystem paths to add to sys.path.
+ ///
+ /// ``$ORIGIN`` will resolve to the directory of the application at
+ /// run-time.
+ pub sys_paths: Vec<String>,
+
+ /// Controls whether to detect comparing bytes/bytearray with str.
+ ///
+ /// If 1, issues a warning. If 2 or greater, raises a BytesWarning
+ /// exception.
+ pub bytes_warning: i32,
+
+ /// Whether to load the site.py module at initialization time.
+ pub import_site: bool,
+
+ /// Whether to load a user-specific site module at initialization time.
+ pub import_user_site: bool,
+
+ /// Whether to ignore various PYTHON* environment variables.
+ pub ignore_python_env: bool,
+
+ /// Whether to enter interactive mode after executing a script or a command.
+ pub inspect: bool,
+
+ /// Whether to put interpreter in interactive mode.
+ pub interactive: bool,
+
+ /// Whether to enable isolated mode.
+ pub isolated: bool,
+
+ /// If set, set the Windows filesystem encoding to mbcs and the filesystem
+ /// error handler to replace.
+ pub legacy_windows_fs_encoding: bool,
+
+ /// Whether io.File instead of io.WindowsConsoleIO for sys.stdin, sys.stdout,
+ /// and sys.stderr.
+ pub legacy_windows_stdio: bool,
+
+ /// Whether to suppress writing of ``.pyc`` files when importing ``.py``
+ /// files from the filesystem. This is typically irrelevant since modules
+ /// are imported from memory.
+ pub dont_write_bytecode: bool,
+
+ /// Whether stdout and stderr streams should be unbuffered.
+ pub unbuffered_stdio: bool,
+
+ /// Whether to enable parser debugging output.
+ pub parser_debug: bool,
+
+ /// Whether to enable quiet mode.
+ pub quiet: bool,
+
+ /// Whether to use the PYTHONHASHSEED environment variable to initialize the
+ /// hash seed.
+ pub use_hash_seed: bool,
+
+ /// Controls the level of the verbose mode for the interpreter.
+ pub verbose: i32,
+
+ /// Bytecode for the importlib._bootstrap / _frozen_importlib module.
+ pub frozen_importlib_data: &'static [u8],
+
+ /// Bytecode for the importlib._bootstrap_external / _frozen_importlib_external module.
+ pub frozen_importlib_external_data: &'static [u8],
+
+ /// Reference to raw Python modules data.
+ ///
+ /// The referenced data is produced as part of PyOxidizer packaging. This
+ /// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
+ pub py_modules_data: &'static [u8],
+
+ /// Reference to raw Python resources data.
+ ///
+ /// The referenced data is produced as part of PyOxidizer packaging. This
+ /// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
+ pub py_resources_data: &'static [u8],
+
+ /// Extra extension modules to make available to the interpreter.
+ ///
+ /// The values will effectively be passed to ``PyImport_ExtendInitTab()``.
+ pub extra_extension_modules: Vec<ExtensionModule>,
+
+ /// Whether to set sys.argvb with bytes versions of process arguments.
+ ///
+ /// On Windows, bytes will be UTF-16. On POSIX, bytes will be raw char*
+ /// values passed to `int main()`.
+ pub argvb: bool,
+
+ /// Whether to set sys.frozen=True.
+ ///
+ /// Setting this will enable Python to emulate "frozen" binaries, such as
+ /// those used by PyInstaller.
+ pub sys_frozen: bool,
+
+ /// Whether to set sys._MEIPASS to the directory of the executable.
+ ///
+ /// Setting this will enable Python to emulate PyInstaller's behavior
+ /// of setting this attribute.
+ pub sys_meipass: bool,
+
+ /// Which memory allocator to use for the raw domain.
+ pub raw_allocator: PythonRawAllocator,
+
+ /// How to resolve the `terminfo` database.
+ pub terminfo_resolution: TerminfoResolution,
+
+ /// Environment variable holding the directory to write a loaded modules file.
+ ///
+ /// If this value is set and the environment it refers to is set,
+ /// on interpreter shutdown, we will write a ``modules-<random>`` file to
+ /// the directory specified containing a ``\n`` delimited list of modules
+ /// loaded in ``sys.modules``.
+ pub write_modules_directory_env: Option<String>,
+
+ /// Defines what code to run by default.
+ ///
+ pub run: PythonRunMode,
+}
diff --git a/rust/pyembed/build.rs b/rust/pyembed/build.rs
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/build.rs
@@ -0,0 +1,65 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use std::env;
+use std::path::PathBuf;
+use std::process;
+
+/// Path to pyoxidizer executable this file was created with.
+const DEFAULT_PYOXIDIZER_EXE: &str = r#"/Users/gps/.cargo/bin/pyoxidizer"#;
+
+fn main() {
+ // We support using pre-built artifacts, in which case we emit the
+ // cargo metadata lines from the "original" build to "register" the
+ // artifacts with this cargo invocation.
+ if env::var("PYOXIDIZER_REUSE_ARTIFACTS").is_ok() {
+ let artifact_dir_env = env::var("PYOXIDIZER_ARTIFACT_DIR");
+
+ let artifact_dir_path = match artifact_dir_env {
+ Ok(ref v) => PathBuf::from(v),
+ Err(_) => {
+ let out_dir = env::var("OUT_DIR").unwrap();
+ PathBuf::from(&out_dir)
+ }
+ };
+
+ println!(
+ "using pre-built artifacts from {}",
+ artifact_dir_path.display()
+ );
+
+ println!("cargo:rerun-if-env-changed=PYOXIDIZER_REUSE_ARTIFACTS");
+ println!("cargo:rerun-if-env-changed=PYOXIDIZER_ARTIFACT_DIR");
+
+ // Emit the cargo metadata lines to register libraries for linking.
+ let cargo_metadata_path = artifact_dir_path.join("cargo_metadata.txt");
+ let metadata = std::fs::read_to_string(&cargo_metadata_path)
+ .expect(format!("failed to read {}", cargo_metadata_path.display()).as_str());
+ println!("{}", metadata);
+ } else {
+ let pyoxidizer_exe = match env::var("PYOXIDIZER_EXE") {
+ Ok(value) => value,
+ Err(_) => DEFAULT_PYOXIDIZER_EXE.to_string(),
+ };
+
+ let pyoxidizer_path = PathBuf::from(&pyoxidizer_exe);
+
+ if !pyoxidizer_path.exists() {
+ panic!("pyoxidizer executable does not exist: {}", &pyoxidizer_exe);
+ }
+
+ match process::Command::new(&pyoxidizer_exe)
+ .arg("run-build-script")
+ .arg("build.rs")
+ .status()
+ {
+ Ok(status) => {
+ if !status.success() {
+ panic!("`pyoxidizer run-build-script` failed");
+ }
+ }
+ Err(e) => panic!("`pyoxidizer run-build-script` failed: {}", e.to_string()),
+ }
+ }
+}
diff --git a/rust/pyembed/Cargo.toml b/rust/pyembed/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rust/pyembed/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "pyembed"
+version = "0.3.0"
+authors = ["Gregory Szorc <gregory.szorc at gmail.com>"]
+edition = "2018"
+build = "build.rs"
+
+[dependencies]
+byteorder = "1"
+jemalloc-sys = { version = "0.3", optional = true }
+lazy_static = "1.3"
+libc = "0.2"
+uuid = { version = "0.7", features = ["v4"] }
+
+[dependencies.python3-sys]
+git = "https://github.com/indygreg/PyOxidizer.git"
+tag = "v0.3.0"
+
+[dependencies.cpython]
+git = "https://github.com/indygreg/PyOxidizer.git"
+tag = "v0.3.0"
+features = ["link-mode-unresolved-static", "python3-sys", "no-auto-initialize"]
+
+[features]
+default = []
+jemalloc = ["jemalloc-sys"]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -1,3 +1,3 @@
[workspace]
members = ["hg-core", "hg-direct-ffi", "hg-cpython"]
-exclude = ["chg", "hgcli"]
+exclude = ["chg", "hgcli", "pyembed"]
To: indygreg, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list