D1581: [RFC] rust: Rust implementation of `hg` and standalone packaging

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Tue Dec 5 02:44:40 EST 2017


indygreg updated this revision to Diff 4112.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D1581?vs=4090&id=4112

REVISION DETAIL
  https://phab.mercurial-scm.org/D1581

AFFECTED FILES
  .hgignore
  contrib/STANDALONE-MERCURIAL.rst
  contrib/build-standalone.py
  rust/.cargo/config
  rust/.hgignore
  rust/Cargo.lock
  rust/Cargo.toml
  rust/README.rst
  rust/hgcli/Cargo.toml
  rust/hgcli/build.rs
  rust/hgcli/src/main.rs

CHANGE DETAILS

diff --git a/rust/hgcli/src/main.rs b/rust/hgcli/src/main.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/src/main.rs
@@ -0,0 +1,200 @@
+// main.rs -- Main routines for `hg` program
+//
+// Copyright 2017 Gregory Szorc <gregory.szorc at gmail.com>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+extern crate libc;
+extern crate cpython;
+extern crate which;
+
+use cpython::{NoArgs, ObjectProtocol, PyModule, PyResult, Python};
+use libc::{c_char, c_int};
+
+use std::env;
+use std::path::PathBuf;
+use std::ffi::CString;
+
+extern "C" {
+    pub fn Py_SetPythonHome(arg1: *mut c_char);
+    pub fn Py_SetProgramName(arg1: *const c_char);
+    pub fn Py_Initialize();
+    pub fn Py_Finalize();
+    pub fn PyEval_InitThreads();
+    // This actually returns a pointer to a struct.
+    pub fn PyEval_SaveThread() -> *mut c_char;
+    pub fn PySys_SetArgv(arg1: c_int, arg2: *const *const c_char) -> ();
+    pub fn PySys_SetArgvEx(arg1: c_int, arg2: *const *const c_char, arg3: c_int) -> ();
+}
+
+#[derive(Debug)]
+struct Environment {
+    _exe: PathBuf,
+    python_exe: PathBuf,
+    python_home: PathBuf,
+    mercurial_modules: PathBuf,
+}
+
+// TODO we probably want to customize the behavior of this function
+// based on cargo features/config.
+fn get_environment() -> Environment {
+    let exe_buf = env::current_exe().unwrap();
+
+    // Standalone layout is:
+    // bin/hg
+    // hgpython/bin/python2.7
+    // hgpython/lib/libpython2.7.so
+    let mut standalone_root = exe_buf.clone();
+    standalone_root.pop();
+    standalone_root.pop();
+
+    let mut standalone_python_exe = standalone_root.clone();
+    standalone_python_exe.push("hgpython");
+    standalone_python_exe.push("bin");
+    standalone_python_exe.push("python2.7");
+
+    let (is_standalone, python_exe) = match standalone_python_exe.exists() {
+        true => (true, standalone_python_exe),
+        // TODO handle failure gracefully.
+        false => (false, which::which("python2.7").unwrap()),
+    };
+
+    let mut python_home = python_exe.clone();
+    python_home.pop();
+    // On Windows, python2.7.exe exists at the root directory of the Python
+    // install. Everywhere else, the Python install root is one level up.
+    if !python_exe.ends_with("python2.7.exe") {
+        python_home.pop();
+    }
+
+    let mercurial_modules = if is_standalone {
+        let mut p = standalone_root.clone();
+        p.push("mercurial");
+        p
+    } else {
+        // rust/target/<build>/hg
+        let mut p = exe_buf.clone();
+        p.pop();
+        p.pop();
+        p.pop();
+        p.pop();
+
+        p.push("mercurial");
+        if !p.exists() {
+            panic!("could not find Mercurial modules");
+        }
+        p.pop();
+
+        p
+    };
+
+    Environment {
+        _exe: exe_buf.clone(),
+        python_exe: python_exe.clone(),
+        python_home: python_home.clone(),
+        mercurial_modules: mercurial_modules.clone(),
+    }
+}
+
+fn set_python_home(env: &Environment) {
+    let raw = CString::new(env.python_home.to_str().unwrap())
+        .unwrap()
+        .into_raw();
+    unsafe {
+        Py_SetPythonHome(raw);
+    }
+}
+
+fn update_encoding(py: Python, sys_mod: &PyModule) {
+    // Call sys.setdefaultencoding("undefined") if HGUNICODEPEDANTRY is set.
+    let pedantry = env::var("HGUNICODEPEDANTRY").is_ok();
+
+    // TODO do we need to call reload(sys) here? Should we set Python encoding
+    // before we start Python interpreter?
+    if pedantry {
+        sys_mod
+            .call(py, "setdefaultencoding", ("undefined",), None)
+            .expect("sys.setdefaultencoding() failed");
+    }
+}
+
+fn update_modules_path(env: &Environment, py: Python, sys_mod: &PyModule) {
+    let sys_path = sys_mod.get(py, "path").unwrap();
+    sys_path
+        .call_method(py, "insert", (0, env.mercurial_modules.to_str()), None)
+        .expect("failed to update sys.path to location of Mercurial modules");
+}
+
+fn run() -> Result<(), i32> {
+    let env = get_environment();
+
+    //println!("{:?}", env);
+
+    // Tell Python where it is installed.
+    set_python_home(&env);
+
+    // Set program name. The backing memory needs to live for the duration of the
+    // interpreter.
+    let program_name = CString::new(env.python_exe.to_str().unwrap())
+        .unwrap()
+        .as_ptr();
+    unsafe {
+        Py_SetProgramName(program_name);
+    }
+
+    // TODO https://docs.python.org/2/c-api/init.html#c.PySys_SetArgvEx says
+    // 1. We may wish to not update sys.path as part of setting args
+    // 2. Initial argument should be empty string since we're not executing a Python script
+    let args: Vec<CString> = env::args().map(|s| CString::new(s).unwrap()).collect();
+    let argv: Vec<*const c_char> = args.iter().map(|a| a.as_ptr()).collect();
+
+    unsafe {
+        Py_Initialize();
+        PySys_SetArgv(args.len() as c_int, argv.as_ptr());
+        PyEval_InitThreads();
+        let _thread_state = PyEval_SaveThread();
+    }
+
+    let gil = Python::acquire_gil();
+    let py = gil.python();
+
+    let sys_mod = py.import("sys").unwrap();
+
+    update_encoding(py, &sys_mod);
+    update_modules_path(&env, py, &sys_mod);
+
+    // TODO we don't capture exit code from Mercurial.
+    match run_py(py) {
+        Err(err) => {
+            err.print(py);
+            return Err(255)
+        }
+        Ok(()) => Ok(()),
+    }
+}
+
+fn run_py(py: Python) -> PyResult<()> {
+    let demand_mod = py.import("hgdemandimport")?;
+    demand_mod.call(py, "enable", NoArgs, None)?;
+
+    let dispatch_mod = py.import("mercurial.dispatch")?;
+    dispatch_mod.call(py, "run", NoArgs, None)?;
+
+    Ok(())
+}
+
+fn main() {
+    let exit_code = match run() {
+        Err(err) => err,
+        Ok(()) => 0,
+    };
+
+    // If Python isn't initialized (we could hit an error before
+    // Py_initialize()), this will no-op. So it is safe to always call.
+    unsafe {
+        Py_Finalize();
+    }
+
+    std::process::exit(exit_code);
+}
diff --git a/rust/hgcli/build.rs b/rust/hgcli/build.rs
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/build.rs
@@ -0,0 +1,106 @@
+// build.rs -- Configure build environment for `hgcli` Rust package.
+//
+// Copyright 2017 Gregory Szorc <gregory.szorc at gmail.com>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+extern crate which;
+
+use std::collections::HashMap;
+use std::env;
+use std::path::PathBuf;
+use std::process::Command;
+
+struct PythonConfig {
+    python: PathBuf,
+    config: HashMap<String, String>,
+}
+
+fn get_python_config() -> PythonConfig {
+    let python;
+
+    if let Ok(env_python) = env::var("PYTHON_SYS_EXECUTABLE") {
+        python = PathBuf::from(env_python);
+    } else {
+        if let Ok(path_python) = which::which("python2.7") {
+            python = path_python;
+        } else {
+            panic!("could not find python2.7 executable");
+        }
+    }
+
+    let separator = "SEPARATOR STRING";
+
+    let script = "import sysconfig; \
+c = sysconfig.get_config_vars(); \
+print('SEPARATOR STRING'.join('%s=%s' % i for i in c.items()))";
+
+    let mut command = Command::new(&python);
+    command.arg("-c").arg(script);
+
+    let out = command.output().unwrap();
+
+    if !out.status.success() {
+        panic!(
+            "python script failed: {}",
+            String::from_utf8_lossy(&out.stderr)
+        );
+    }
+
+    let stdout = String::from_utf8_lossy(&out.stdout);
+    let mut m = HashMap::new();
+
+    for entry in stdout.split(separator) {
+        let mut parts = entry.splitn(2, "=");
+        let key = parts.next().unwrap();
+        let value = parts.next().unwrap();
+        m.insert(String::from(key), String::from(value));
+    }
+
+    PythonConfig {
+        python: python,
+        config: m,
+    }
+}
+
+#[cfg(not(target_os = "windows"))]
+fn have_shared(config: &PythonConfig) -> bool {
+    match config.config.get("Py_ENABLE_SHARED") {
+        Some(value) => value == "1",
+        None => false,
+    }
+}
+
+#[cfg(target_os = "windows")]
+fn have_shared(config: &PythonConfig) -> bool {
+    // python27.dll should exist next to python2.7.exe.
+    let mut dll = config.python.clone();
+    dll.pop();
+    dll.push("python27.dll");
+
+    return dll.exists();
+}
+
+fn main() {
+    let config = get_python_config();
+
+    println!("Using Python: {}", config.python.to_string_lossy());
+
+    let prefix = config.config.get("prefix").unwrap();
+
+    println!("Prefix: {}", prefix);
+
+    if !have_shared(&config) {
+        panic!("Detected Python lacks a shared library, which is required");
+    }
+
+    // If building standalone Mercurial, add an extra link path for
+    // native libraries.
+    if let Some(lib_path) = env::var_os("HG_STANDALONE_LINK_PATH") {
+        println!(
+            "cargo:rustc-link-search=native={}",
+            lib_path.to_str().unwrap()
+        );
+    }
+}
diff --git a/rust/hgcli/Cargo.toml b/rust/hgcli/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rust/hgcli/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "hgcli"
+version = "0.1.0"
+authors = ["Gregory Szorc <gregory.szorc at gmail.com>"]
+
+build = "build.rs"
+
+[[bin]]
+name = "hg"
+path = "src/main.rs"
+
+[dependencies]
+libc = "0.2.34"
+which = "1.0.3"
+
+[build-dependencies]
+which = "1.0.3"
+
+[dependencies.cpython]
+version = "0.1"
+default-features = false
+features = ["python27-sys"]
+git = "https://github.com/dgrunwald/rust-cpython.git"
+rev = "b35031e2670d7571f03c313cde8fd91105bd5322"
diff --git a/rust/README.rst b/rust/README.rst
new file mode 100644
--- /dev/null
+++ b/rust/README.rst
@@ -0,0 +1,32 @@
+===================
+Mercurial Rust Code
+===================
+
+This directory contains various Rust code for the Mercurial project.
+
+The top-level ``Cargo.toml`` file defines a workspace containing
+all primary Mercurial crates.
+
+Building
+========
+
+To build the Rust components::
+
+   $ cargo build
+
+If you prefer a non-debug / release configuration::
+
+   $ cargo build --release
+
+Running
+=======
+
+The ``hgcli`` crate produces an ``hg`` binary. You can run this binary
+via ``cargo run``::
+
+   $ cargo run --manifest-path hgcli/Cargo.toml
+
+Or directly::
+
+   $ target/debug/hg
+   $ target/release/hg
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
new file mode 100644
--- /dev/null
+++ b/rust/Cargo.toml
@@ -0,0 +1,2 @@
+[workspace]
+members = ["hgcli"]
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
new file mode 100644
--- /dev/null
+++ b/rust/Cargo.lock
@@ -0,0 +1,136 @@
+[[package]]
+name = "aho-corasick"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "cpython"
+version = "0.1.0"
+source = "git+https://github.com/dgrunwald/rust-cpython.git?rev=b35031e2670d7571f03c313cde8fd91105bd5322#b35031e2670d7571f03c313cde8fd91105bd5322"
+dependencies = [
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)",
+ "python27-sys 0.1.2 (git+https://github.com/dgrunwald/rust-cpython.git?rev=b35031e2670d7571f03c313cde8fd91105bd5322)",
+]
+
+[[package]]
+name = "hgcli"
+version = "0.1.0"
+dependencies = [
+ "cpython 0.1.0 (git+https://github.com/dgrunwald/rust-cpython.git?rev=b35031e2670d7571f03c313cde8fd91105bd5322)",
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+ "which 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "kernel32-sys"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "memchr"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.1.41"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "python27-sys"
+version = "0.1.2"
+source = "git+https://github.com/dgrunwald/rust-cpython.git?rev=b35031e2670d7571f03c313cde8fd91105bd5322#b35031e2670d7571f03c313cde8fd91105bd5322"
+dependencies = [
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "0.1.80"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "thread-id"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "thread_local"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "utf8-ranges"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "which"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "winapi"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi-build"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
+"checksum cpython 0.1.0 (git+https://github.com/dgrunwald/rust-cpython.git?rev=b35031e2670d7571f03c313cde8fd91105bd5322)" = "<none>"
+"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
+"checksum libc 0.2.34 (registry+https://github.com/rust-lang/crates.io-index)" = "36fbc8a8929c632868295d0178dd8f63fc423fd7537ad0738372bd010b3ac9b0"
+"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
+"checksum num-traits 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "cacfcab5eb48250ee7d0c7896b51a2c5eec99c1feea5f32025635f5ae4b00070"
+"checksum python27-sys 0.1.2 (git+https://github.com/dgrunwald/rust-cpython.git?rev=b35031e2670d7571f03c313cde8fd91105bd5322)" = "<none>"
+"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
+"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
+"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
+"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
+"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
+"checksum which 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "4be6cfa54dab45266e98b5d7be2f8ce959ddd49abd141a05d52dce4b07f803bb"
+"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
+"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/rust/.hgignore b/rust/.hgignore
new file mode 100644
--- /dev/null
+++ b/rust/.hgignore
@@ -0,0 +1 @@
+target/
diff --git a/rust/.cargo/config b/rust/.cargo/config
new file mode 100644
--- /dev/null
+++ b/rust/.cargo/config
@@ -0,0 +1,7 @@
+# Rust builds with a modern MSVC and uses a newer CRT.
+# Python 2.7 has a shared library dependency on an older CRT (msvcr90.dll).
+# We statically link the modern CRT to avoid multiple msvcr*.dll libraries
+# being loaded and Python possibly picking up symbols from the newer runtime
+# (which would be loaded first).
+[target.'cfg(target_os = "windows")']
+rustflags = ["-Ctarget-feature=+crt-static"]
diff --git a/contrib/build-standalone.py b/contrib/build-standalone.py
new file mode 100755
--- /dev/null
+++ b/contrib/build-standalone.py
@@ -0,0 +1,390 @@
+#!/usr/bin/env python2.7
+# build-standalone.py - Create a standalone distribution of Mercurial.
+#
+# Copyright 2017 Gregory Szorc <gregory.szorc at gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Create a standalone Mercurial distribution.
+
+This script does the bulk of the work for creating a standalone Mercurial
+distribution.
+"""
+
+import errno
+import gzip
+import hashlib
+import io
+import multiprocessing
+import os
+import shutil
+import stat
+import subprocess
+import sys
+import tarfile
+import tempfile
+import urllib2
+
+try:
+    import lzma
+except ImportError:
+    lzma = None
+
+
+PYTHON_ARCHIVES = {
+    'version': '2.7.14',
+    'url': 'https://www.python.org/ftp/python/{version}/{prefix}-{version}.{suffix}',
+    'gz': {
+        'sha256': '304c9b202ea6fbd0a4a8e0ad3733715fbd4749f2204a9173a58ec53c32ea73e8',
+        'prefix': 'Python',
+        'suffix': 'tgz',
+        'tar_mode': 'r:gz',
+    },
+    'xz': {
+        'sha256': '71ffb26e09e78650e424929b2b457b9c912ac216576e6bd9e7d204ed03296a66',
+        'prefix': 'Python',
+        'suffix': 'xz',
+        'tar_mode': 'r:xz',
+    },
+    'msi32': {
+        'sha256': '450bde0540341d4f7a6ad2bb66639fd3fac1c53087e9844dc34ddf88057a17ca',
+        'prefix': 'python',
+        'suffix': 'msi',
+    },
+    'msi64': {
+        'sha256': 'af293df7728b861648162ba0cd4a067299385cb6a3f172569205ac0b33190693',
+        'prefix': 'python',
+        'suffix': 'amd64.msi',
+    }
+}
+
+
+def hash_file(fh):
+    hasher = hashlib.sha256()
+    while True:
+        chunk = fh.read(16384)
+        if not chunk:
+            break
+
+        hasher.update(chunk)
+
+    return hasher.hexdigest()
+
+
+def makedirs(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def _ensure_python_source(dest_dir):
+    """Ensure the Python source code is extracted to a path."""
+    makedirs(dest_dir)
+
+    if lzma:
+        archive = PYTHON_ARCHIVES['xz']
+    else:
+        archive = PYTHON_ARCHIVES['gz']
+
+    archive_path = os.path.join(dest_dir,
+                                'python-%s.%s' % (PYTHON_ARCHIVES['version'],
+                                                  archive['suffix']))
+
+    if os.path.exists(archive_path):
+        with open(archive_path, 'rb') as fh:
+            if hash_file(fh) != archive['sha256']:
+                print('%s has unexpected hash; removing' % archive_path)
+                os.unlink(archive_path)
+
+    if not os.path.exists(archive_path):
+        url = PYTHON_ARCHIVES['url'].format(
+            version=PYTHON_ARCHIVES['version'],
+            prefix=archive['prefix'],
+            suffix=archive['suffix'])
+
+        print('downloading %s' % url)
+
+        req = urllib2.urlopen(url)
+        if req.getcode() != 200:
+            raise Exception('non-200 HTTP response downloading Python: %d' % req.getcode())
+
+        buf = io.BytesIO()
+        while True:
+            chunk = req.read(16384)
+            if not chunk:
+                break
+            buf.write(chunk)
+
+        buf.seek(0)
+        if hash_file(buf) != archive['sha256']:
+            raise Exception('Python hash mismatch')
+
+        buf.seek(0)
+        with open(archive_path, 'wb') as fh:
+            fh.write(buf.getvalue())
+
+    # Assume if a single file from the archive is present that we don't need
+    # to re-extract.
+    if os.path.exists(os.path.join(dest_dir, 'configure')):
+        print('extracted python source code found; using without modifications')
+        return
+
+    print('extracting %s to %s' % (archive_path, dest_dir))
+    with tarfile.open(archive_path, archive['tar_mode']) as tf:
+        prefix = 'Python-%s' % PYTHON_ARCHIVES['version']
+        for ti in tf:
+            assert ti.name.startswith(prefix)
+            ti.name = ti.name[len(prefix):].lstrip('/')
+            tf.extract(ti, dest_dir)
+
+
+def _build_python(state):
+    source_dir = state['python_source_dir']
+    build_dir = state['python_build_dir']
+    _ensure_python_source(source_dir)
+
+    makedirs(build_dir)
+
+    # TODO use a more sensible filesystem layout for Python in cases
+    # where the files will be installed alongside other system files
+    # (e.g. when producing deb or rpm archives).
+    if not os.path.exists(os.path.join(build_dir, 'config.status')):
+        subprocess.check_call([
+            os.path.join(source_dir, 'configure'),
+            '--prefix', '/hgpython',
+            '--enable-shared',
+            '--enable-unicode=ucs4',
+            # TODO enable optimizations
+            # '--enable-optimizations',
+            # '--enable-lto',
+        ], cwd=build_dir)
+
+    subprocess.check_call([
+        'make', '-j%d' % multiprocessing.cpu_count(),
+    ], cwd=build_dir)
+
+
+def install_python(state):
+    """Installs Python in the standalone directory.
+
+    Python is installed to the `hgpython/` sub-directory. The layout of
+    this directory resembles a typical Python distribution. In fact, the
+    Python installation could be used on its own, just like any other
+    Python installation.
+    """
+    # TODO on Windows, obtain Python files from official, self-contained
+    # binary distribution (via an MSI).
+    _build_python(state)
+
+    build_dir = state['python_build_dir']
+    py_dir = state['python_install_dir']
+
+    if os.path.exists(os.path.join(py_dir, 'bin', 'python')):
+        print('python already installed in %s; skipping `make install`' %
+              py_dir)
+    else:
+        subprocess.check_call([
+            'make',
+            '-j%d' % multiprocessing.cpu_count(),
+            'install',
+            'DESTDIR=%s' % state['install_dir'],
+        ], cwd=build_dir)
+
+    # Update shared library references to be relative to binary.
+    # TODO compile Python in such a way that this isn't necessary.
+    if sys.platform.startswith('linux'):
+        subprocess.check_call([
+            'patchelf',
+            '--set-rpath',
+            '$ORIGIN/../lib',
+            state['python_bin'],
+        ])
+    elif sys.platform == 'darwin':
+        subprocess.check_call([
+            'install_name_tool', '-change',
+            '/hgpython/lib/libpython2.7.dylib',
+            '@loader_path/../lib/libpython2.7.dylib',
+            state['python_bin'],
+        ])
+
+
+def install_rust_components(state):
+    rust_dir = os.path.join(state['root_dir'], 'rust', 'hgcli')
+
+    env = dict(os.environ)
+
+    # Tell cpython's build.rs to use our Python binary.
+    env['PYTHON_SYS_EXECUTABLE'] = os.path.join(
+        state['python_install_dir'], 'bin', 'python2.7')
+
+    # Tell our build.rs where to find libpython.
+    env['HG_STANDALONE_LINK_PATH'] = os.path.join(
+        state['python_install_dir'], 'lib')
+
+    subprocess.check_call(['cargo', 'build', '--release', '-v'],
+                          cwd=rust_dir, env=env)
+
+    subprocess.check_call([
+        'cargo',
+        'install',
+        '--force',
+        '--root', state['install_dir'],
+    ], cwd=rust_dir, env=env)
+
+    # TODO figure out how to link properly via Cargo.
+    # Adjust rpath so libpython is loaded from a relative path.
+    if sys.platform.startswith('linux'):
+        subprocess.check_call([
+            'patchelf',
+            '--set-rpath',
+            '$ORIGIN/../hgpython/lib',
+            state['hg_bin'],
+        ])
+    elif sys.platform == 'darwin':
+        subprocess.check_call([
+            'install_name_tool', '-change',
+            '/System/Library/Frameworks/Python.framework/Versions/2.7/Python',
+            '@loader_path/../lib/libpython2.7.dylib',
+            state['hg_bin'],
+        ])
+
+def install_mercurial(state):
+    """Install Mercurial files into the distribution."""
+    install_dir = os.path.join(state['install_dir'])
+    python = os.path.join(state['python_install_dir'], 'bin', 'python')
+
+    temp_dir = tempfile.mkdtemp(dir=state['build_dir'])
+    try:
+        subprocess.check_call([
+            python, 'setup.py',
+            'build',
+            'install',
+                # These are the only files we care about.
+                '--install-lib', os.path.join(install_dir, 'mercurial'),
+
+                '--install-data', os.path.join(temp_dir, 'data'),
+                '--install-headers', os.path.join(temp_dir, 'headers'),
+                '--install-platlib', os.path.join(temp_dir, 'platlib'),
+                '--install-purelib', os.path.join(temp_dir, 'purelib'),
+                # `hg` is replaced by our binary version.
+                '--install-scripts', os.path.join(temp_dir, 'bin'),
+            ],
+            cwd=state['root_dir'])
+    finally:
+        temp_files = set()
+        for root, dirs, files in os.walk(temp_dir):
+            for f in files:
+                full = os.path.join(root, f)
+                temp_files.add(full[len(temp_dir)+1:])
+
+        shutil.rmtree(temp_dir)
+
+        expected = {
+            'bin/hg',
+        }
+        extra = temp_files - expected
+        if extra:
+            raise Exception('unknown extra files were installed: %s' %
+                            ', '.join(sorted(extra)))
+
+
+def _run_hg(args):
+    env = dict(os.environ)
+    env['HGPLAIN'] = '1'
+    env['HGRCPATH'] = ''
+
+    with open(os.devnull, 'wb') as devnull:
+        return subprocess.check_output([state['hg_bin']] + args,
+                                       env=env,
+                                       stderr=devnull)
+
+def verify_hg(state):
+    print('running `hg version`')
+    try:
+        print(_run_hg(['version']))
+    except subprocess.CalledProcessError as e:
+        print('error invoking `hg version`')
+        print(e.output)
+        sys.exit(1)
+
+
+def get_revision_info(state):
+    res = _run_hg(['-R', state['root_dir'], 'log', '-r', '.', '-T', '{node} {date}'])
+    node, date = res.split(' ')
+    return node, int(float(date))
+
+
+def _get_archive_files(state):
+    # Ideally we wouldn't have any ignores.
+    IGNORE = {
+        '.crates.toml',
+    }
+
+    for root, dirs, files in os.walk(state['install_dir']):
+        # sorts are here for determinism.
+        dirs.sort()
+        for f in sorted(files):
+            full = os.path.join(root, f)
+            rel = full[len(state['install_dir']) + 1:]
+
+            if rel in IGNORE:
+                continue
+
+            yield full, rel
+
+
+def create_tar(state, ts):
+    print('writing %s' % state['tar_path'])
+    with tarfile.TarFile(state['tar_path'], 'w') as tf:
+        for full, rel in _get_archive_files(state):
+            with open(full, 'rb') as fh:
+                ti = tf.gettarinfo(full, rel)
+
+                if ti.mode & (stat.S_ISUID | stat.S_ISGID):
+                    print('setuid or setgid bits set: %s' % full)
+
+                # Normalize mtime to commit time.
+                ti.mtime = ts
+                # Normalize uid/gid to root:root.
+                ti.uid = 0
+                ti.gid = 0
+                ti.uname = ''
+                ti.gname = ''
+
+                tf.addfile(ti, fh)
+
+    #gz = state['tar_path'] + '.gz'
+    #print('writing %s' % gz)
+    #with open(state['tar_path'], 'rb') as ifh, gzip.GzipFile(gz, 'wb') as ofh:
+    #    shutil.copyfileobj(ifh, ofh)
+
+
+if __name__ == '__main__':
+    root = os.path.normpath(os.path.join(os.path.dirname(__file__), '..'))
+    root = os.path.abspath(root)
+    build_dir = os.path.join(root, 'build')
+
+    python_install_dir = os.path.join(build_dir, 'standalone', 'hgpython')
+
+    state = {
+        'root_dir': root,
+        'build_dir': build_dir,
+        'install_dir': os.path.join(build_dir, 'standalone'),
+        'python_source_dir': os.path.join(build_dir, 'python-src'),
+        'python_build_dir': os.path.join(build_dir, 'python-build'),
+        'python_install_dir': python_install_dir,
+        'python_bin': os.path.join(python_install_dir, 'bin', 'python2.7'),
+        'hg_bin': os.path.join(build_dir, 'standalone', 'bin', 'hg'),
+        'tar_path': os.path.join(build_dir, 'standalone.tar'),
+    }
+
+    makedirs(state['install_dir'])
+    install_python(state)
+    install_rust_components(state)
+    install_mercurial(state)
+    verify_hg(state)
+    node, ts = get_revision_info(state)
+    create_tar(state, ts)
diff --git a/contrib/STANDALONE-MERCURIAL.rst b/contrib/STANDALONE-MERCURIAL.rst
new file mode 100644
--- /dev/null
+++ b/contrib/STANDALONE-MERCURIAL.rst
@@ -0,0 +1,70 @@
+====================
+Standalone Mercurial
+====================
+
+*Standalone Mercurial* is a generic term given to a distribution
+of Mercurial that is standalone and has minimal dependencies on
+the host (typically just the C runtime library). Instead, most of
+Mercurial's dependencies are included in the distribution. This
+includes a Python interpreter.
+
+Architecture
+============
+
+A standalone Mercurial distribution essentially consists of the
+following elements:
+
+* An `hg` binary executable
+* A Python interpreter shared library
+* The Python standard library
+* 3rd party Python packages to enhance the Mercurial experience
+* Mercurial's Python packages
+* Mercurial support files (help content, default config files, etc)
+* Any additional support files (e.g. shared library dependencies)
+
+From a high-level, the `hg` binary has a shared library dependency
+on `libpython`. The binary is configured to load the `libpython`
+that ships with the Mercurial distribution. When started, the
+`hg` binary assesses its state, configures an embedded Python
+interpreter, and essentially invoke Mercurial's `main()` function.
+
+Build Requirements
+==================
+
+Universal
+---------
+
+* Python 2.7 (to run the build script)
+* A working Rust and Cargo installation
+
+Linux
+-----
+
+* Dependencies to build Python 2.7 from source (GNU make, autoconf,
+  various dependencies for extensions)
+* The `patchelf` tool
+
+MacOS
+-----
+
+* Xcode
+
+Windows
+-------
+
+* Microsoft Visual C+ Compiler for Python 2.7 (https://www.microsoft.com/en-us/download/details.aspx?id=44266)
+
+Building
+========
+
+To build standalone Mercurial, run the following::
+
+   $ python2.7 contrib/build-standalone.py
+
+This will:
+
+1. Obtain a Python distribution (either by compiling from source
+   or downloading a pre-built distribution)
+2. Build Mercurial Rust components
+3. Build Mercurial Python components
+4. Produce an *archive* suitable for distribution
diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -66,3 +66,5 @@
 # hackable windows distribution additions
 ^hg-python
 ^hg.py$
+
+subinclude:rust/.hgignore



To: indygreg, #hg-reviewers
Cc: quark, durin42, dlax, mercurial-devel


More information about the Mercurial-devel mailing list