D1581: [RFC] rust: Rust implementation of `hg` and standalone packaging

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Thu Dec 7 02:55:08 EST 2017


indygreg updated this revision to Diff 4165.
indygreg edited the summary of this revision.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D1581?vs=4112&id=4165

REVISION DETAIL
  https://phab.mercurial-scm.org/D1581

AFFECTED FILES
  .hgignore
  contrib/STANDALONE-MERCURIAL.rst
  contrib/build-standalone.py
  rust/.cargo/config
  rust/.hgignore
  rust/Cargo.lock
  rust/Cargo.toml
  rust/README.rst
  rust/hgcli/Cargo.toml
  rust/hgcli/build.rs
  rust/hgcli/src/main.rs
  tests/run-tests.py

CHANGE DETAILS

diff --git a/tests/run-tests.py b/tests/run-tests.py
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -2442,6 +2442,14 @@
             # executed in the test suite that needs to import 'mercurial'
             # ... which means it's not really redundant at all.
             self._pythondir = self._bindir
+
+            # The harness assumes we're running ./hg from the source
+            # directory or that Mercurial files are available in the
+            # directory where hg is. This isn't always the case.
+            # TODO Make this Rust support less hacky.
+            if re.search('|target/[^\/]+/hg', whg):
+                self._pythondir = os.path.dirname(self._testdir)
+
         else:
             self._installdir = os.path.join(self._hgtmp, b"install")
             self._bindir = os.path.join(self._installdir, b"bin")
diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/src/main.rs
@@ -0,0 +1,275 @@
+// main.rs -- Main routines for `hg` program
+//
+// Copyright 2017 Gregory Szorc <gregory.szorc at gmail.com>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+extern crate libc;
+extern crate cpython;
+extern crate python27_sys;
+
+use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python};
+use libc::{c_char, c_int};
+
+use std::env;
+use std::path::PathBuf;
+use std::ffi::CString;
+#[cfg(target_family = "unix")]
+use std::os::unix::ffi::OsStringExt;
+
+#[derive(Debug)]
+struct Environment {
+    _exe: PathBuf,
+    python_exe: PathBuf,
+    python_home: PathBuf,
+    mercurial_modules: PathBuf,
+}
+
+/// Run Mercurial locally from a source distribution or checkout.
+///
+/// hg is <srcdir>/rust/target/<target>/hg
+/// Python interpreter is detected by build script.
+/// Python home is relative to Python interpreter.
+/// Mercurial files are relative to hg binary, which is relative to source root.
+#[cfg(feature = "localdev")]
+fn get_environment() -> Environment {
+    let exe = env::current_exe().unwrap();
+
+    let mut mercurial_modules = exe.clone();
+    mercurial_modules.pop(); // /rust/target/<target>
+    mercurial_modules.pop(); // /rust/target
+    mercurial_modules.pop(); // /rust
+    mercurial_modules.pop(); // /
+
+    let python_exe: &'static str = env!("PYTHON_INTERPRETER");
+    let python_exe = PathBuf::from(python_exe);
+
+    let mut python_home = python_exe.clone();
+    python_home.pop();
+
+    // On Windows, python2.7.exe exists at the root directory of the Python
+    // install. Everywhere else, the Python install root is one level up.
+    if !python_exe.ends_with("python2.7.exe") {
+        python_home.pop();
+    }
+
+    Environment {
+        _exe: exe.clone(),
+        python_exe: python_exe.clone(),
+        python_home: python_home.clone(),
+        mercurial_modules: mercurial_modules.to_path_buf(),
+    }
+}
+
+/// Run Mercurial from a standalone environment on Windows.
+///
+/// The current hg.exe executable is somewhere. Let's say ./hg.exe.
+/// There is a ./python27.dll in the same directory as hg.exe.
+/// Python standard library and other files are in ./hgpython.
+/// Mercurial files are in ./mercurial.
+#[cfg(all(feature = "standalone", target_os = "windows"))]
+fn get_environment() -> Environment {
+    let exe = env::current_exe().unwrap();
+
+    let mercurial_modules = exe.parent().unwrap();
+
+    let mut python_home = exe.parent().unwrap().to_path_buf();
+    python_home.push("hgpython");
+
+    // TODO this value is wrong.
+    let mut python_exe = exe.parent().unwrap().to_path_buf();
+    python_exe.push("python2.7.exe");
+
+    Environment {
+        _exe: exe.clone(),
+        python_exe: python_exe.clone(),
+        python_home: python_home.clone(),
+        mercurial_modules: mercurial_modules.clone(),
+    }
+}
+
+/// Run Mercurial from a standalone environment on POSIX systems.
+///
+/// The current hg binary is assumed to be ./bin/hg.
+/// Python home is ./lib/hgpython.
+/// Mercurial files are in ./mercurial.
+///
+/// TODO the layout here is pretty poor. This code is not well-tested.
+#[cfg(all(feature = "standalone", not(target_os = "windows")))]
+fn get_environment() -> Environment {
+    let exe = env::current_exe().unwrap();
+
+    let root = exe.parent().unwrap().parent().unwrap();
+
+    let mut mercurial_modules = root.to_path_buf();
+    mercurial_modules.push("hgmodules");
+
+    let mut python_exe = root.to_path_buf();
+    python_exe.push("hgpython");
+    python_exe.push("bin");
+    python_exe.push("python2.7");
+
+    let mut python_home = root.to_path_buf();
+    python_home.push("hgpython");
+
+    Environment {
+        _exe: exe.clone(),
+        python_exe: python_exe.clone(),
+        python_home: python_home.clone(),
+        mercurial_modules: mercurial_modules.clone(),
+    }
+}
+
+// On UNIX, argv starts as an array of char*. So it is easy to convert
+// to C strings.
+#[cfg(target_family = "unix")]
+fn args_to_cstrings() -> Vec<CString> {
+    env::args_os().map(|a| CString::new(a.into_vec()).unwrap()).collect()
+}
+
+// Windows is more complicated. We will get wchar strings that are thin
+// wrappers around GetCommandLineW() + CommandLineToArgvW(). We need to
+// use WideCharToMultiByte() to convert these to the system default Windows
+// ANSI code page (CP_ACP) to feed into Python. Once our Rust is a bit
+// more established, we could do something better here, such as convert to
+// a PyUnicode and pass them to Mercurial and let Mercurial deal with the
+// decoding.
+//#[cfg(target_family = "windows")]
+//fn args_to_cstrings() -> Vec<CString> {
+//}
+
+fn set_python_home(env: &Environment) {
+    let raw = CString::new(env.python_home.to_str().unwrap())
+        .unwrap()
+        .into_raw();
+    unsafe {
+        python27_sys::Py_SetPythonHome(raw);
+    }
+}
+
+fn update_encoding(py: Python, sys_mod: &PyModule) {
+    // Call sys.setdefaultencoding("undefined") if HGUNICODEPEDANTRY is set.
+    let pedantry = env::var("HGUNICODEPEDANTRY").is_ok();
+
+    // TODO do we need to call reload(sys) here? Should we set Python encoding
+    // before we start Python interpreter?
+    if pedantry {
+        sys_mod
+            .call(py, "setdefaultencoding", ("undefined",), None)
+            .expect("sys.setdefaultencoding() failed");
+    }
+}
+
+fn update_modules_path(env: &Environment, py: Python, sys_mod: &PyModule) {
+    let sys_path = sys_mod.get(py, "path").unwrap();
+    sys_path
+        .call_method(py, "insert", (0, env.mercurial_modules.to_str()), None)
+        .expect("failed to update sys.path to location of Mercurial modules");
+}
+
+fn run() -> Result<(), i32> {
+    let env = get_environment();
+
+    //println!("{:?}", env);
+
+    // Tell Python where it is installed.
+    set_python_home(&env);
+
+    // Set program name. The backing memory needs to live for the duration of the
+    // interpreter.
+    //
+    // Yes, we use the path to the Python interpreter not argv[0] here. The
+    // reason is because Python uses the given path to find the location of
+    // Python files. Apparently we could define our own ``Py_GetPath()``
+    // implementation. But this may require statically linking Python, which is
+    // not desirable.
+    let program_name = CString::new(env.python_exe.to_str().unwrap())
+        .unwrap()
+        .as_ptr();
+    unsafe {
+        python27_sys::Py_SetProgramName(program_name as *mut i8);
+    }
+
+    unsafe {
+        python27_sys::Py_Initialize();
+    }
+
+    // https://docs.python.org/2/c-api/init.html#c.PySys_SetArgvEx has important
+    // usage information about PySys_SetArgvEx:
+    //
+    // * It says the first argument should be the script that is being executed.
+    //   If not a script, it can be empty. We are definitely not a script.
+    //   However, parts of Mercurial do look at sys.argv[0]. So we need to set
+    //   something here.
+    //
+    // * When embedding Python, we should use ``PySys_SetArgvEx()`` and set
+    //   ``updatepath=0`` for security reasons. Essentially, Python's default
+    //   logic will treat an empty argv[0] in a manner that could result in
+    //   sys.path picking up directories it shouldn't and this could lead to
+    //   loading untrusted modules.
+
+    // env::args() will panic if it sees a non-UTF-8 byte sequence. So we need
+    // to use env::args_os() and pass the raw bytes down to Python/Mercurial.
+
+    let args = args_to_cstrings();
+    let argv: Vec<*const c_char> = args.iter().map(|a| a.as_ptr()).collect();
+
+    unsafe {
+        python27_sys::PySys_SetArgvEx(args.len() as c_int, argv.as_ptr() as *mut *mut i8, 0);
+    }
+
+    // We need to do this to appease the cpython package.
+    unsafe {
+        python27_sys::PyEval_InitThreads();
+        let _state = python27_sys::PyEval_SaveThread();
+    }
+
+    let gil = Python::acquire_gil();
+    let py = gil.python();
+
+    let sys_mod = py.import("sys").unwrap();
+
+    update_encoding(py, &sys_mod);
+    update_modules_path(&env, py, &sys_mod);
+
+    // TODO we don't capture exit code from Mercurial.
+    let result = match run_py(py) {
+        Err(err) => {
+            err.print(py);
+            Err(255)
+        }
+        Ok(()) => Ok(()),
+    };
+
+    // The GIL needs to be held when we call this. So it needs to be in this
+    // scope with the active GILGuard.
+    // TODO this crashes when Python raises an uncaught exception. Unsure
+    // of why. Is this even needed???
+    /*
+    unsafe {
+        python27_sys::Py_Finalize();
+    }
+    */
+
+    result
+}
+
+fn run_py(py: Python) -> PyResult<()> {
+    let demand_mod = py.import("hgdemandimport")?;
+    demand_mod.call(py, "enable", NoArgs, None)?;
+
+    let dispatch_mod = py.import("mercurial.dispatch")?;
+    dispatch_mod.call(py, "run", NoArgs, None)?;
+
+    Ok(())
+}
+
+fn main() {
+    let exit_code = match run() {
+        Err(err) => err,
+        Ok(()) => 0,
+    };
+
+    std::process::exit(exit_code);
+}
diff --git a/rust/hgcli/build.rs b/rust/hgcli/build.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/build.rs
@@ -0,0 +1,146 @@
+// build.rs -- Configure build environment for `hgcli` Rust package.
+//
+// Copyright 2017 Gregory Szorc <gregory.szorc at gmail.com>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use std::collections::HashMap;
+use std::env;
+use std::path::Path;
+use std::process::Command;
+
+struct PythonConfig {
+    python: String,
+    config: HashMap<String, String>,
+}
+
+fn get_python_config() -> PythonConfig {
+    // The python27-sys crate exports a Cargo variable defining the full
+    // path to the interpreter being used.
+    let python = env::var("DEP_PYTHON27_PYTHON_INTERPRETER")
+        .expect("Missing environment variable defining Python interpreter path; are you using the correct python27-sys crate?");
+
+    if !Path::new(&python).exists() {
+        panic!("Python interpreter {} does not exist; this should never happen", python);
+    }
+
+    let separator = "SEPARATOR STRING";
+
+    let script = "import sysconfig; \
+c = sysconfig.get_config_vars(); \
+print('SEPARATOR STRING'.join('%s=%s' % i for i in c.items()))";
+
+    let mut command = Command::new(&python);
+    command.arg("-c").arg(script);
+
+    let out = command.output().unwrap();
+
+    if !out.status.success() {
+        panic!(
+            "python script failed: {}",
+            String::from_utf8_lossy(&out.stderr)
+        );
+    }
+
+    let stdout = String::from_utf8_lossy(&out.stdout);
+    let mut m = HashMap::new();
+
+    for entry in stdout.split(separator) {
+        let mut parts = entry.splitn(2, "=");
+        let key = parts.next().unwrap();
+        let value = parts.next().unwrap();
+        m.insert(String::from(key), String::from(value));
+    }
+
+    PythonConfig {
+        python: python,
+        config: m,
+    }
+}
+
+#[cfg(not(target_os = "windows"))]
+fn have_shared(config: &PythonConfig) -> bool {
+    match config.config.get("Py_ENABLE_SHARED") {
+        Some(value) => value == "1",
+        None => false,
+    }
+}
+
+#[cfg(target_os = "windows")]
+fn have_shared(config: &PythonConfig) -> bool {
+    // python27.dll should exist next to python2.7.exe.
+    let mut dll = config.python.clone();
+    dll.pop();
+    dll.push("python27.dll");
+
+    return dll.exists();
+}
+
+static REQUIRED_CONFIG_FLAGS: [&'static str; 2] = [
+    "Py_USING_UNICODE",
+    "WITH_THREAD",
+];
+
+static REQUIRED_UNSET_FLAGS: [&'static str; 4] = [
+    "Py_DEBUG",
+    "Py_REF_DEBUG",
+    "Py_TRACE_REFS",
+    "COUNT_ALLOCS",
+];
+
+fn main() {
+    let config = get_python_config();
+
+    println!("Using Python: {}", config.python);
+    println!("cargo:rustc-env=PYTHON_INTERPRETER={}", config.python);
+
+    let prefix = config.config.get("prefix").unwrap();
+
+    println!("Prefix: {}", prefix);
+
+    for key in REQUIRED_CONFIG_FLAGS.iter() {
+        let result = match config.config.get(*key) {
+            Some(value) => value == "1",
+            None => false,
+        };
+
+        if !result {
+            panic!("Detected Python required feature {}", key);
+        }
+    }
+
+    for key in REQUIRED_UNSET_FLAGS.iter() {
+        let result = match config.config.get(*key) {
+            Some(value) => value != "0",
+            None => false,
+        };
+
+        if result {
+            panic!("Detected Python feature {} is not supported", key);
+        }
+    }
+
+    // We need a Python shared library.
+    if !have_shared(&config) {
+        panic!("Detected Python lacks a shared library, which is required");
+    }
+
+    let ucs4 = match config.config.get("Py_UNICODE_SIZE") {
+        Some(value) => value == "4",
+        None => false,
+    };
+
+    if !ucs4 {
+        panic!("Detected Python doesn't support UCS-4 code points");
+    }
+
+    // If building standalone Mercurial, add an extra link path for
+    // native libraries.
+    if let Some(lib_path) = env::var_os("HG_STANDALONE_LINK_PATH") {
+        println!(
+            "cargo:rustc-link-search=native={}",
+            lib_path.to_str().unwrap()
+        );
+    }
+}
diff --git a/rust/hgcli/Cargo.toml b/rust/hgcli/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/Cargo.toml
@@ -0,0 +1,38 @@
+[package]
+name = "hgcli"
+version = "0.1.0"
+authors = ["Gregory Szorc <gregory.szorc at gmail.com>"]
+
+build = "build.rs"
+
+[[bin]]
+name = "hg"
+path = "src/main.rs"
+
+[features]
+# localdev: detect Python in PATH and use files from source checkout.
+# standalone: use bundled Python and files from bin-relative path.
+default = ["localdev"]
+standalone = []
+localdev = []
+
+[dependencies]
+libc = "0.2.34"
+
+# We currently use a custom build of cpython and python27-sys with the
+# following changes:
+# * Exports Cargo variable defining the full path to Python interpreter.
+# * sysmodule exports for PySys_SetArgv.
+#
+# TODO switch to official release when our changes are incorporated.
+[dependencies.cpython]
+version = "0.1"
+default-features = false
+features = ["python27-sys"]
+git = "https://github.com/indygreg/rust-cpython.git"
+rev = "94b357f2ec56270daa1aa7ab3c776ed8e409ceee"
+
+[dependencies.python27-sys]
+version = "0.1.2"
+git = "https://github.com/indygreg/rust-cpython.git"
+rev = "94b357f2ec56270daa1aa7ab3c776ed8e409ceee"
diff --git a/rust/README.rst b/rust/README.rst
new file mode 100644
--- /dev/null
+++ b/rust/README.rst
@@ -0,0 +1,58 @@
+===================
+Mercurial Rust Code
+===================
+
+This directory contains various Rust code for the Mercurial project.
+
+The top-level ``Cargo.toml`` file defines a workspace containing
+all primary Mercurial crates.
+
+Building
+========
+
+To build the Rust components::
+
+   $ cargo build
+
+If you prefer a non-debug / release configuration::
+
+   $ cargo build --release
+
+Features
+--------
+
+The following Cargo features are available:
+
+localdev (default)
+   Produce files that work with an in-source-tree build.
+
+   In this mode, the build finds and uses a ``python2.7`` binary from
+   ``PATH``. The ``hg`` binary assumes it runs from ``rust/target/<target>hg``
+   and it finds Mercurial files at ``dirname($0)/../../../``.
+
+standalone
+   Produce files that work in a standalone Mercurial distribution.
+
+   Standalone distributions are self-contained and contain their own
+   bundled version of Python and all Mercurial support files. Paths
+   to these dependencies are hard-coded as relative to the ``hg``
+   binary. The exact layout is platform/target dependent.
+
+Running
+=======
+
+The ``hgcli`` crate produces an ``hg`` binary. You can run this binary
+via ``cargo run``::
+
+   $ cargo run --manifest-path hgcli/Cargo.toml
+
+Or directly::
+
+   $ target/debug/hg
+   $ target/release/hg
+
+You can also run the test harness with this binary::
+
+   $ ./run-tests.py --with-hg ../rust/target/debug/hg
+
+Some tests are still failing when run with the Rust binary, however.
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rust/Cargo.toml
@@ -0,0 +1,2 @@
+[workspace]
+members = ["hgcli"]
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
new file mode 100644
--- /dev/null
+++ b/rust/Cargo.lock
@@ -0,0 +1,127 @@
+[[package]]
+name = "aho-corasick"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "cpython"
+version = "0.1.0"
+source = "git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee#94b357f2ec56270daa1aa7ab3c776ed8e409ceee"
+dependencies = [
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
+ "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)",
+]
+
+[[package]]
+name = "hgcli"
+version = "0.1.0"
+dependencies = [
+ "cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)",
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+ "python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)",
+]
+
+[[package]]
+name = "kernel32-sys"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "memchr"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "python27-sys"
+version = "0.1.2"
+source = "git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee#94b357f2ec56270daa1aa7ab3c776ed8e409ceee"
+dependencies = [
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "0.1.80"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "thread-id"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "thread_local"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "utf8-ranges"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi-build"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
+"checksum cpython 0.1.0 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)" = "<none>"
+"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
+"checksum libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)" = "36fbc8a8929c632868295d0178dd8f63fc423fd7537ad0738372bd010b3ac9b0"
+"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
+"checksum num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "cacfcab5eb48250ee7d0c7896b51a2c5eec99c1feea5f32025635f5ae4b00070"
+"checksum python27-sys 0.1.2 (git+https://github.com/indygreg/rust-cpython.git?rev=94b357f2ec56270daa1aa7ab3c776ed8e409ceee)" = "<none>"
+"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
+"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
+"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
+"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
+"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
+"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
+"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/rust/.hgignore b/rust/.hgignore
new file mode 100644
--- /dev/null
+++ b/rust/.hgignore
@@ -0,0 +1 @@
+target/
diff --git a/rust/.cargo/config b/rust/.cargo/config
new file mode 100644
--- /dev/null
+++ b/rust/.cargo/config
@@ -0,0 +1,7 @@
+# Rust builds with a modern MSVC and uses a newer CRT.
+# Python 2.7 has a shared library dependency on an older CRT (msvcr90.dll).
+# We statically link the modern CRT to avoid multiple msvcr*.dll libraries
+# being loaded and Python possibly picking up symbols from the newer runtime
+# (which would be loaded first).
+[target.'cfg(target_os = "windows")']
+rustflags = ["-Ctarget-feature=+crt-static"]
diff --git a/contrib/build-standalone.py b/contrib/build-standalone.py
new file mode 100755
--- /dev/null
+++ b/contrib/build-standalone.py
@@ -0,0 +1,390 @@
+#!/usr/bin/env python2.7
+# build-standalone.py - Create a standalone distribution of Mercurial.
+#
+# Copyright 2017 Gregory Szorc <gregory.szorc at gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Create a standalone Mercurial distribution.
+
+This script does the bulk of the work for creating a standalone Mercurial
+distribution.
+"""
+
+import errno
+import gzip
+import hashlib
+import io
+import multiprocessing
+import os
+import shutil
+import stat
+import subprocess
+import sys
+import tarfile
+import tempfile
+import urllib2
+
+try:
+    import lzma
+except ImportError:
+    lzma = None
+
+
+PYTHON_ARCHIVES = {
+    'version': '2.7.14',
+    'url': 'https://www.python.org/ftp/python/{version}/{prefix}-{version}.{suffix}',
+    'gz': {
+        'sha256': '304c9b202ea6fbd0a4a8e0ad3733715fbd4749f2204a9173a58ec53c32ea73e8',
+        'prefix': 'Python',
+        'suffix': 'tgz',
+        'tar_mode': 'r:gz',
+    },
+    'xz': {
+        'sha256': '71ffb26e09e78650e424929b2b457b9c912ac216576e6bd9e7d204ed03296a66',
+        'prefix': 'Python',
+        'suffix': 'xz',
+        'tar_mode': 'r:xz',
+    },
+    'msi32': {
+        'sha256': '450bde0540341d4f7a6ad2bb66639fd3fac1c53087e9844dc34ddf88057a17ca',
+        'prefix': 'python',
+        'suffix': 'msi',
+    },
+    'msi64': {
+        'sha256': 'af293df7728b861648162ba0cd4a067299385cb6a3f172569205ac0b33190693',
+        'prefix': 'python',
+        'suffix': 'amd64.msi',
+    }
+}
+
+
+def hash_file(fh):
+    hasher = hashlib.sha256()
+    while True:
+        chunk = fh.read(16384)
+        if not chunk:
+            break
+
+        hasher.update(chunk)
+
+    return hasher.hexdigest()
+
+
+def makedirs(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def _ensure_python_source(dest_dir):
+    """Ensure the Python source code is extracted to a path."""
+    makedirs(dest_dir)
+
+    if lzma:
+        archive = PYTHON_ARCHIVES['xz']
+    else:
+        archive = PYTHON_ARCHIVES['gz']
+
+    archive_path = os.path.join(dest_dir,
+                                'python-%s.%s' % (PYTHON_ARCHIVES['version'],
+                                                  archive['suffix']))
+
+    if os.path.exists(archive_path):
+        with open(archive_path, 'rb') as fh:
+            if hash_file(fh) != archive['sha256']:
+                print('%s has unexpected hash; removing' % archive_path)
+                os.unlink(archive_path)
+
+    if not os.path.exists(archive_path):
+        url = PYTHON_ARCHIVES['url'].format(
+            version=PYTHON_ARCHIVES['version'],
+            prefix=archive['prefix'],
+            suffix=archive['suffix'])
+
+        print('downloading %s' % url)
+
+        req = urllib2.urlopen(url)
+        if req.getcode() != 200:
+            raise Exception('non-200 HTTP response downloading Python: %d' % req.getcode())
+
+        buf = io.BytesIO()
+        while True:
+            chunk = req.read(16384)
+            if not chunk:
+                break
+            buf.write(chunk)
+
+        buf.seek(0)
+        if hash_file(buf) != archive['sha256']:
+            raise Exception('Python hash mismatch')
+
+        buf.seek(0)
+        with open(archive_path, 'wb') as fh:
+            fh.write(buf.getvalue())
+
+    # Assume if a single file from the archive is present that we don't need
+    # to re-extract.
+    if os.path.exists(os.path.join(dest_dir, 'configure')):
+        print('extracted python source code found; using without modifications')
+        return
+
+    print('extracting %s to %s' % (archive_path, dest_dir))
+    with tarfile.open(archive_path, archive['tar_mode']) as tf:
+        prefix = 'Python-%s' % PYTHON_ARCHIVES['version']
+        for ti in tf:
+            assert ti.name.startswith(prefix)
+            ti.name = ti.name[len(prefix):].lstrip('/')
+            tf.extract(ti, dest_dir)
+
+
+def _build_python(state):
+    source_dir = state['python_source_dir']
+    build_dir = state['python_build_dir']
+    _ensure_python_source(source_dir)
+
+    makedirs(build_dir)
+
+    # TODO use a more sensible filesystem layout for Python in cases
+    # where the files will be installed alongside other system files
+    # (e.g. when producing deb or rpm archives).
+    if not os.path.exists(os.path.join(build_dir, 'config.status')):
+        subprocess.check_call([
+            os.path.join(source_dir, 'configure'),
+            '--prefix', '/hgpython',
+            '--enable-shared',
+            '--enable-unicode=ucs4',
+            # TODO enable optimizations
+            # '--enable-optimizations',
+            # '--enable-lto',
+        ], cwd=build_dir)
+
+    subprocess.check_call([
+        'make', '-j%d' % multiprocessing.cpu_count(),
+    ], cwd=build_dir)
+
+
+def install_python(state):
+    """Installs Python in the standalone directory.
+
+    Python is installed to the `hgpython/` sub-directory. The layout of
+    this directory resembles a typical Python distribution. In fact, the
+    Python installation could be used on its own, just like any other
+    Python installation.
+    """
+    # TODO on Windows, obtain Python files from official, self-contained
+    # binary distribution (via an MSI).
+    _build_python(state)
+
+    build_dir = state['python_build_dir']
+    py_dir = state['python_install_dir']
+
+    if os.path.exists(os.path.join(py_dir, 'bin', 'python')):
+        print('python already installed in %s; skipping `make install`' %
+              py_dir)
+    else:
+        subprocess.check_call([
+            'make',
+            '-j%d' % multiprocessing.cpu_count(),
+            'install',
+            'DESTDIR=%s' % state['install_dir'],
+        ], cwd=build_dir)
+
+    # Update shared library references to be relative to binary.
+    # TODO compile Python in such a way that this isn't necessary.
+    if sys.platform.startswith('linux'):
+        subprocess.check_call([
+            'patchelf',
+            '--set-rpath',
+            '$ORIGIN/../lib',
+            state['python_bin'],
+        ])
+    elif sys.platform == 'darwin':
+        subprocess.check_call([
+            'install_name_tool', '-change',
+            '/hgpython/lib/libpython2.7.dylib',
+            '@loader_path/../lib/libpython2.7.dylib',
+            state['python_bin'],
+        ])
+
+
+def install_rust_components(state):
+    rust_dir = os.path.join(state['root_dir'], 'rust', 'hgcli')
+
+    env = dict(os.environ)
+
+    # Tell cpython's build.rs to use our Python binary.
+    env['PYTHON_SYS_EXECUTABLE'] = os.path.join(
+        state['python_install_dir'], 'bin', 'python2.7')
+
+    # Tell our build.rs where to find libpython.
+    env['HG_STANDALONE_LINK_PATH'] = os.path.join(
+        state['python_install_dir'], 'lib')
+
+    subprocess.check_call(['cargo', 'build', '--release', '-v'],
+                          cwd=rust_dir, env=env)
+
+    subprocess.check_call([
+        'cargo',
+        'install',
+        '--force',
+        '--root', state['install_dir'],
+    ], cwd=rust_dir, env=env)
+
+    # TODO figure out how to link properly via Cargo.
+    # Adjust rpath so libpython is loaded from a relative path.
+    if sys.platform.startswith('linux'):
+        subprocess.check_call([
+            'patchelf',
+            '--set-rpath',
+            '$ORIGIN/../hgpython/lib',
+            state['hg_bin'],
+        ])
+    elif sys.platform == 'darwin':
+        subprocess.check_call([
+            'install_name_tool', '-change',
+            '/System/Library/Frameworks/Python.framework/Versions/2.7/Python',
+            '@loader_path/../lib/libpython2.7.dylib',
+            state['hg_bin'],
+        ])
+
+def install_mercurial(state):
+    """Install Mercurial files into the distribution."""
+    install_dir = os.path.join(state['install_dir'])
+    python = os.path.join(state['python_install_dir'], 'bin', 'python')
+
+    temp_dir = tempfile.mkdtemp(dir=state['build_dir'])
+    try:
+        subprocess.check_call([
+            python, 'setup.py',
+            'build',
+            'install',
+                # These are the only files we care about.
+                '--install-lib', os.path.join(install_dir, 'mercurial'),
+
+                '--install-data', os.path.join(temp_dir, 'data'),
+                '--install-headers', os.path.join(temp_dir, 'headers'),
+                '--install-platlib', os.path.join(temp_dir, 'platlib'),
+                '--install-purelib', os.path.join(temp_dir, 'purelib'),
+                # `hg` is replaced by our binary version.
+                '--install-scripts', os.path.join(temp_dir, 'bin'),
+            ],
+            cwd=state['root_dir'])
+    finally:
+        temp_files = set()
+        for root, dirs, files in os.walk(temp_dir):
+            for f in files:
+                full = os.path.join(root, f)
+                temp_files.add(full[len(temp_dir)+1:])
+
+        shutil.rmtree(temp_dir)
+
+        expected = {
+            'bin/hg',
+        }
+        extra = temp_files - expected
+        if extra:
+            raise Exception('unknown extra files were installed: %s' %
+                            ', '.join(sorted(extra)))
+
+
+def _run_hg(args):
+    env = dict(os.environ)
+    env['HGPLAIN'] = '1'
+    env['HGRCPATH'] = ''
+
+    with open(os.devnull, 'wb') as devnull:
+        return subprocess.check_output([state['hg_bin']] + args,
+                                       env=env,
+                                       stderr=devnull)
+
+def verify_hg(state):
+    print('running `hg version`')
+    try:
+        print(_run_hg(['version']))
+    except subprocess.CalledProcessError as e:
+        print('error invoking `hg version`')
+        print(e.output)
+        sys.exit(1)
+
+
+def get_revision_info(state):
+    res = _run_hg(['-R', state['root_dir'], 'log', '-r', '.', '-T', '{node} {date}'])
+    node, date = res.split(' ')
+    return node, int(float(date))
+
+
+def _get_archive_files(state):
+    # Ideally we wouldn't have any ignores.
+    IGNORE = {
+        '.crates.toml',
+    }
+
+    for root, dirs, files in os.walk(state['install_dir']):
+        # sorts are here for determinism.
+        dirs.sort()
+        for f in sorted(files):
+            full = os.path.join(root, f)
+            rel = full[len(state['install_dir']) + 1:]
+
+            if rel in IGNORE:
+                continue
+
+            yield full, rel
+
+
+def create_tar(state, ts):
+    print('writing %s' % state['tar_path'])
+    with tarfile.TarFile(state['tar_path'], 'w') as tf:
+        for full, rel in _get_archive_files(state):
+            with open(full, 'rb') as fh:
+                ti = tf.gettarinfo(full, rel)
+
+                if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+                    print('setuid or setgid bits set: %s' % full)
+
+                # Normalize mtime to commit time.
+                ti.mtime = ts
+                # Normalize uid/gid to root:root.
+                ti.uid = 0
+                ti.gid = 0
+                ti.uname = ''
+                ti.gname = ''
+
+                tf.addfile(ti, fh)
+
+    #gz = state['tar_path'] + '.gz'
+    #print('writing %s' % gz)
+    #with open(state['tar_path'], 'rb') as ifh, gzip.GzipFile(gz, 'wb') as ofh:
+    #    shutil.copyfileobj(ifh, ofh)
+
+
+if __name__ == '__main__':
+    root = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
+    root = os.path.abspath(root)
+    build_dir = os.path.join(root, 'build')
+
+    python_install_dir = os.path.join(build_dir, 'standalone', 'hgpython')
+
+    state = {
+        'root_dir': root,
+        'build_dir': build_dir,
+        'install_dir': os.path.join(build_dir, 'standalone'),
+        'python_source_dir': os.path.join(build_dir, 'python-src'),
+        'python_build_dir': os.path.join(build_dir, 'python-build'),
+        'python_install_dir': python_install_dir,
+        'python_bin': os.path.join(python_install_dir, 'bin', 'python2.7'),
+        'hg_bin': os.path.join(build_dir, 'standalone', 'bin', 'hg'),
+        'tar_path': os.path.join(build_dir, 'standalone.tar'),
+    }
+
+    makedirs(state['install_dir'])
+    install_python(state)
+    install_rust_components(state)
+    install_mercurial(state)
+    verify_hg(state)
+    node, ts = get_revision_info(state)
+    create_tar(state, ts)
diff --git a/contrib/STANDALONE-MERCURIAL.rst b/contrib/STANDALONE-MERCURIAL.rst
new file mode 100644
--- /dev/null
+++ b/contrib/STANDALONE-MERCURIAL.rst
@@ -0,0 +1,70 @@
+====================
+Standalone Mercurial
+====================
+
+*Standalone Mercurial* is a generic term given to a distribution
+of Mercurial that is standalone and has minimal dependencies on
+the host (typically just the C runtime library). Instead, most of
+Mercurial's dependencies are included in the distribution. This
+includes a Python interpreter.
+
+Architecture
+============
+
+A standalone Mercurial distribution essentially consists of the
+following elements:
+
+* An `hg` binary executable
+* A Python interpreter shared library
+* The Python standard library
+* 3rd party Python packages to enhance the Mercurial experience
+* Mercurial's Python packages
+* Mercurial support files (help content, default config files, etc)
+* Any additional support files (e.g. shared library dependencies)
+
+From a high-level, the `hg` binary has a shared library dependency
+on `libpython`. The binary is configured to load the `libpython`
+that ships with the Mercurial distribution. When started, the
+`hg` binary assesses its state, configures an embedded Python
+interpreter, and essentially invoke Mercurial's `main()` function.
+
+Build Requirements
+==================
+
+Universal
+---------
+
+* Python 2.7 (to run the build script)
+* A working Rust and Cargo installation
+
+Linux
+-----
+
+* Dependencies to build Python 2.7 from source (GNU make, autoconf,
+  various dependencies for extensions)
+* The `patchelf` tool
+
+MacOS
+-----
+
+* Xcode
+
+Windows
+-------
+
+* Microsoft Visual C+ Compiler for Python 2.7 (https://www.microsoft.com/en-us/download/details.aspx?id=44266)
+
+Building
+========
+
+To build standalone Mercurial, run the following::
+
+   $ python2.7 contrib/build-standalone.py
+
+This will:
+
+1. Obtain a Python distribution (either by compiling from source
+   or downloading a pre-built distribution)
+2. Build Mercurial Rust components
+3. Build Mercurial Python components
+4. Produce an *archive* suitable for distribution
diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -66,3 +66,5 @@
 # hackable windows distribution additions
 ^hg-python
 ^hg.py$
+
+subinclude:rust/.hgignore



To: indygreg, #hg-reviewers
Cc: yuja, quark, durin42, dlax, mercurial-devel


More information about the Mercurial-devel mailing list