D6349: rust-dirstate: add rust-cpython bindings to the new parse/pack functions

Alphare (Raphaël Gomès) phabricator at mercurial-scm.org
Mon May 6 21:04:05 UTC 2019


Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This allows for Python code to call `parse/pack_dirstate` transparently.
  
  These bindings are heavy given the relatively simple task, as they are bound
  to implementation details of both the C and Python code. They will be slimmed
  down in future patches and eventually completely removed once more of the
  dirstate code has been refactored/rewritten in Rust.
  
  Both functions emulate the mutate-on-loop style of the Python and C
  implementations by looping over changed items in the compatibility layer,
  instead of at the core functions.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6349

AFFECTED FILES
  mercurial/cext/parsers.c
  rust/Cargo.lock
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/dirstate.rs
  rust/hg-cpython/src/lib.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs
--- a/rust/hg-cpython/src/lib.rs
+++ b/rust/hg-cpython/src/lib.rs
@@ -23,13 +23,15 @@
 extern crate cpython;
 extern crate hg;
 extern crate libc;
+extern crate python27_sys;
 
 pub mod ancestors;
 mod cindex;
 mod conversion;
 pub mod dagops;
 pub mod discovery;
 pub mod exceptions;
+pub mod dirstate;
 
 py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| {
     m.add(
@@ -42,6 +44,7 @@
     m.add(py, "ancestor", ancestors::init_module(py, &dotted_name)?)?;
     m.add(py, "dagop", dagops::init_module(py, &dotted_name)?)?;
     m.add(py, "discovery", discovery::init_module(py, &dotted_name)?)?;
+    m.add(py, "dirstate", dirstate::init_module(py, &dotted_name)?)?;
     m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
     Ok(())
 });
diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate.rs
@@ -0,0 +1,203 @@
+// dirstate.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for the `hg::dirstate` module provided by the
+//! `hg-core` package.
+//!
+//! From Python, this will be seen as `mercurial.rustext.dirstate`
+
+use cpython::{
+    exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
+    PySequence, PyTuple, Python, ToPyObject,
+};
+use hg::{
+    pack_dirstate, parse_dirstate, DirstatePackError, DirstateParseError,
+    DirstateVec,
+};
+use std::collections::HashMap;
+use std::ffi::CStr;
+#[cfg(feature = "python27")]
+extern crate python27_sys as python_sys;
+#[cfg(feature = "python3")]
+extern crate python3_sys as python_sys;
+use self::python_sys::PyCapsule_Import;
+use libc::{c_char, c_int};
+use std::mem::transmute;
+
+/// C code uses a custom `dirstate_tuple` type, checks in multiple instances
+/// for this type, and raises a Python `Exception` if the check does not pass.
+/// Because this type differs only in name from the regular Python tuple, it
+/// would be a good idea in the near future to remove it entirely to allow
+/// for a pure Python tuple of the same effective structure to be used,
+/// rendering this type and the capsule below useless.
+type MakeDirstateTupleFn = extern "C" fn(
+    state: c_char,
+    mode: c_int,
+    size: c_int,
+    mtime: c_int,
+) -> PyObject;
+
+/// This is largely a copy/paste from cindex.rs, pending the merge of a
+/// `py_capsule_fn!` macro in the rust-cpython project:
+/// https://github.com/dgrunwald/rust-cpython/pull/169
+fn decapsule_make_dirstate_tuple(py: Python) -> PyResult<MakeDirstateTupleFn> {
+    unsafe {
+        let caps_name = CStr::from_bytes_with_nul_unchecked(
+            b"mercurial.cext.parsers.make_dirstate_tuple_CAPI\0",
+        );
+        let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0);
+        if from_caps.is_null() {
+            return Err(PyErr::fetch(py));
+        }
+        Ok(transmute(from_caps))
+    }
+}
+
+fn parse_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    st: PyBytes,
+) -> PyResult<PyTuple> {
+    match parse_dirstate(st.data(py)) {
+        Ok(((p1, p2), dirstate_vec, copies)) => {
+            for (filename, (state, mode, size, mtime)) in dirstate_vec {
+                dmap.set_item(
+                    py,
+                    PyBytes::new(py, &filename[..]),
+                    decapsule_make_dirstate_tuple(py)?(
+                        state, mode, size, mtime,
+                    ),
+                )?;
+            }
+            for (filename, copy) in copies {
+                copymap.set_item(
+                    py,
+                    PyBytes::new(py, filename),
+                    PyBytes::new(py, copy),
+                )?;
+            }
+            Ok((PyBytes::new(py, p1), PyBytes::new(py, p2)).to_py_object(py))
+        }
+        Err(e) => Err(PyErr::new::<exc::ValueError, _>(
+            py,
+            match e {
+                DirstateParseError::TooLittleData => {
+                    "too little data for parents"
+                }
+                DirstateParseError::Overflow => "overflow in dirstate",
+            },
+        )),
+    }
+}
+
+fn pack_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    pl: PyTuple,
+    now: PyInt,
+) -> PyResult<PyBytes> {
+    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
+    let p1: &[u8] = p1.data(py);
+    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
+    let p2: &[u8] = p2.data(py);
+
+    let dirstate_vec: Result<DirstateVec, PyErr> = dmap
+        .items(py)
+        .iter()
+        .map(|(filename, stats)| {
+            let stats = stats.extract::<PySequence>(py)?;
+            let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
+            let state = state.data(py)[0] as i8;
+            let mode = stats.get_item(py, 1)?.extract(py)?;
+            let size = stats.get_item(py, 2)?.extract(py)?;
+            let mtime = stats.get_item(py, 3)?.extract(py)?;
+            let filename = filename.extract::<PyBytes>(py)?;
+            let filename = filename.data(py);
+            Ok((filename.to_owned(), (state, mode, size, mtime)))
+        })
+        .collect();
+
+    let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
+        .items(py)
+        .iter()
+        .map(|(key, value)| {
+            Ok((
+                key.extract::<PyBytes>(py)?.data(py).to_owned(),
+                value.extract::<PyBytes>(py)?.data(py).to_owned(),
+            ))
+        })
+        .collect();
+
+    match pack_dirstate(
+        &dirstate_vec?,
+        &copies?,
+        (p1, p2),
+        now.value(py) as i32,
+    ) {
+        Ok((packed, new_dirstate_vec)) => {
+            for (filename, (state, mode, size, mtime)) in new_dirstate_vec {
+                dmap.set_item(
+                    py,
+                    PyBytes::new(py, &filename[..]),
+                    decapsule_make_dirstate_tuple(py)?(
+                        state, mode, size, mtime,
+                    ),
+                )?;
+            }
+            Ok(PyBytes::new(py, &packed))
+        }
+        Err(error) => Err(PyErr::new::<exc::ValueError, _>(
+            py,
+            match error {
+                DirstatePackError::CorruptedParent => {
+                    "expected a 20-byte hash".to_string()
+                }
+                DirstatePackError::CorruptedEntry(e) => e,
+                DirstatePackError::BadSize(expected, actual) => {
+                    format!("bad dirstate size: {} != {}", actual, expected)
+                }
+            },
+        )),
+    }
+}
+
+/// Create the module, with `__package__` given from parent
+pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
+    let dotted_name = &format!("{}.dirstate", package);
+    let m = PyModule::new(py, dotted_name)?;
+    m.add(py, "__package__", package)?;
+    m.add(py, "__doc__", "Dirstate - Rust implementation")?;
+    m.add(
+        py,
+        "parse_dirstate",
+        py_fn!(
+            py,
+            parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
+        ),
+    )?;
+    m.add(
+        py,
+        "pack_dirstate",
+        py_fn!(
+            py,
+            pack_dirstate_wrapper(
+                dmap: PyDict,
+                copymap: PyDict,
+                pl: PyTuple,
+                now: PyInt
+            )
+        ),
+    )?;
+
+    let sys = PyModule::import(py, "sys")?;
+    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
+    sys_modules.set_item(py, dotted_name, &m)?;
+
+    Ok(m)
+}
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -8,16 +8,19 @@
 mod ancestors;
 pub mod dagops;
 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
-pub mod testing;  // unconditionally built, for use from integration tests
+mod dirstate;
 pub mod discovery;
+pub mod testing; // unconditionally built, for use from integration tests
+pub use dirstate::{
+    pack_dirstate, parse_dirstate, CopyMap, DirstateVec, DirStateParents,
+};
 
 /// Mercurial revision numbers
 ///
 /// As noted in revlog.c, revision numbers are actually encoded in
 /// 4 bytes, and are liberally converted to ints, whence the i32
 pub type Revision = i32;
 
-
 /// Marker expressing the absence of a parent
 ///
 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -3,7 +3,7 @@
 version = "0.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -17,8 +17,8 @@
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
-name = "cfg-if"
-version = "0.1.6"
+name = "byteorder"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
@@ -49,6 +49,8 @@
 name = "hg-core"
 version = "0.1.0"
 dependencies = [
+ "byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_pcg 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -84,13 +86,8 @@
 
 [[package]]
 name = "memchr"
-version = "2.1.2"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
- "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
-]
 
 [[package]]
 name = "num-traits"
@@ -225,7 +222,7 @@
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -279,11 +276,6 @@
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
-name = "version_check"
-version = "0.1.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
 name = "winapi"
 version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -306,13 +298,13 @@
 "checksum aho-corasick 0.6.9 (registry+https://github.com/rust-lang/crates.io-index)" = "1e9a933f4e58658d7b12defcf96dc5c720f20832deebe3e0a19efd3b6aaeeb9e"
 "checksum autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a6d640bee2da49f60a4068a7fae53acde8982514ab7bae8b8cea9e88cbcfd799"
 "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
-"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
+"checksum byteorder 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a019b10a2a7cdeb292db131fc8113e57ea2a908f6e7894b0c3c671893b65dbeb"
 "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
 "checksum cpython 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b489034e723e7f5109fecd19b719e664f89ef925be785885252469e9822fa940"
 "checksum fuchsia-cprng 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "81f7f8eb465745ea9b02e2704612a9946a59fa40572086c6fd49d6ddcf30bf31"
 "checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
 "checksum libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)" = "2d2857ec59fadc0773853c664d2d18e7198e83883e7060b63c924cb077bd5c74"
-"checksum memchr 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "db4c41318937f6e76648f42826b1d9ade5c09cafb5aef7e351240a70f39206e9"
+"checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
 "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
 "checksum python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "56114c37d4dca82526d74009df7782a28c871ac9d36b19d4cb9e67672258527e"
 "checksum python3-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "61e4aac43f833fd637e429506cb2ac9d7df672c4b68f2eaaa163649b7fdc0444"
@@ -335,7 +327,6 @@
 "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
 "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
 "checksum utf8-ranges 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
-"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"
 "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0"
 "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -671,6 +671,7 @@
 
 static void module_init(PyObject *mod)
 {
+	PyObject *capsule = NULL;
 	PyModule_AddIntConstant(mod, "version", version);
 
 	/* This module constant has two purposes.  First, it lets us unit test
@@ -687,6 +688,12 @@
 	manifest_module_init(mod);
 	revlog_module_init(mod);
 
+	capsule = PyCapsule_New(
+	    make_dirstate_tuple,
+	    "mercurial.cext.parsers.make_dirstate_tuple_CAPI", NULL);
+	if (capsule != NULL)
+		PyModule_AddObject(mod, "make_dirstate_tuple_CAPI", capsule);
+
 	if (PyType_Ready(&dirstateTupleType) < 0) {
 		return;
 	}



To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list