D6394: rust-dirstate: add "dirs" rust-cpython binding

Alphare (Raphaël Gomès) phabricator at mercurial-scm.org
Fri May 17 10:10:40 UTC 2019


Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  There is an obvious performance and memory issue with those bindings on larger
  repos as it copies and allocates everything at once, round-trip. Like in the
  previous patch series, this is only temporary and will only get better once
  we don't have large data structures going to and from Python.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6394

AFFECTED FILES
  rust/hg-cpython/src/dirstate.rs

CHANGE DETAILS

diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
--- a/rust/hg-cpython/src/dirstate.rs
+++ b/rust/hg-cpython/src/dirstate.rs
@@ -11,21 +11,25 @@
 //! From Python, this will be seen as `mercurial.rustext.dirstate`
 
 use cpython::{
-    exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
-    PySequence, PyTuple, Python, ToPyObject,
+    exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject,
+    PyResult, PySequence, PyTuple, Python, ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry,
-    DirstatePackError, DirstateParents, DirstateParseError, DirstateVec,
+    pack_dirstate, parse_dirstate, CopyVecEntry, DirsIterable, DirsMultiset,
+    DirstateEntry, DirstateMapError, DirstatePackError, DirstateParents,
+    DirstateParseError, DirstateVec,
 };
 use std::collections::HashMap;
 use std::ffi::CStr;
+
 #[cfg(feature = "python27")]
 extern crate python27_sys as python_sys;
 #[cfg(feature = "python3")]
 extern crate python3_sys as python_sys;
+
 use self::python_sys::PyCapsule_Import;
 use libc::{c_char, c_int};
+use std::cell::RefCell;
 use std::mem::transmute;
 
 /// C code uses a custom `dirstate_tuple` type, checks in multiple instances
@@ -102,20 +106,11 @@
     }
 }
 
-fn pack_dirstate_wrapper(
+fn extract_dirstate_vec(
     py: Python,
-    dmap: PyDict,
-    copymap: PyDict,
-    pl: PyTuple,
-    now: PyInt,
-) -> PyResult<PyBytes> {
-    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
-    let p1: &[u8] = p1.data(py);
-    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
-    let p2: &[u8] = p2.data(py);
-
-    let dirstate_vec: Result<DirstateVec, PyErr> = dmap
-        .items(py)
+    dmap: &PyDict,
+) -> Result<DirstateVec, PyErr> {
+    dmap.items(py)
         .iter()
         .map(|(filename, stats)| {
             let stats = stats.extract::<PySequence>(py)?;
@@ -136,7 +131,22 @@
                 },
             ))
         })
-        .collect();
+        .collect()
+}
+
+fn pack_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    pl: PyTuple,
+    now: PyInt,
+) -> PyResult<PyBytes> {
+    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
+    let p1: &[u8] = p1.data(py);
+    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
+    let p2: &[u8] = p2.data(py);
+
+    let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
 
     let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
         .items(py)
@@ -150,7 +160,7 @@
         .collect();
 
     match pack_dirstate(
-        &dirstate_vec?,
+        &dirstate_vec,
         &copies?,
         DirstateParents { p1, p2 },
         now.value(py) as i32,
@@ -191,10 +201,103 @@
     }
 }
 
+py_class!(pub class Dirs |py| {
+    data dirs_map: RefCell<DirsMultiset>;
+
+    // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
+    // a `list`)
+    def __new__(
+        _cls,
+        map: PyObject,
+        skip: Option<PyObject> = None
+    ) -> PyResult<Self> {
+        let mut skip_state: Option<i8> = None;
+        if let Some(skip) = skip {
+            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
+        }
+        let dirs_map;
+
+        if let Ok(map) = map.cast_as::<PyDict>(py) {
+            let dirstate_vec = extract_dirstate_vec(py, &map)?;
+            dirs_map = DirsMultiset::new(
+                DirsIterable::Dirstate(dirstate_vec),
+                skip_state,
+            )
+        } else {
+            let map: Result<Vec<Vec<u8>>, PyErr> = map
+                .iter(py)?
+                .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
+                .collect();
+            dirs_map = DirsMultiset::new(
+                DirsIterable::Manifest(map?),
+                skip_state,
+            )
+        }
+
+        Self::create_instance(py, RefCell::new(dirs_map))
+    }
+
+    def addpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.dirs_map(py).borrow_mut().add_path(
+            path.extract::<PyBytes>(py)?.data(py).to_owned(),
+        );
+        Ok(py.None())
+    }
+
+    def delpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.dirs_map(py).borrow_mut().delete_path(
+            path.extract::<PyBytes>(py)?.data(py).to_owned(),
+        )
+            .and(Ok(py.None()))
+            .or_else(|e| {
+                match e {
+                    DirstateMapError::PathNotFound(_p) => {
+                        Err(PyErr::new::<exc::ValueError, _>(
+                            py,
+                            "expected a value, found none".to_string(),
+                        ))
+                    }
+                    DirstateMapError::EmptyPath => {
+                        Ok(py.None())
+                    }
+                }
+            })
+    }
+
+    // This is really inefficient on top of being ugly, but it's an easy way
+    // of having it work to continue working on the rest of the module
+    // hopefully bypassing Python entirely pretty soon.
+    def __iter__(&self) -> PyResult<PyObject> {
+        let dict = PyDict::new(py);
+
+        for (key, value) in self.dirs_map(py).borrow().iter() {
+            dict.set_item(
+                py,
+                PyBytes::new(py, &key[..]),
+                value.to_py_object(py),
+            )?;
+        }
+
+        let locals = PyDict::new(py);
+        locals.set_item(py, "obj", dict)?;
+
+        py.eval("iter(obj)", None, Some(&locals))
+    }
+
+    def __contains__(&self, item: PyObject) -> PyResult<bool> {
+        Ok(self
+            .dirs_map(py)
+            .borrow()
+            .get(&item.extract::<PyBytes>(py)?.data(py).to_owned())
+            .is_some())
+    }
+});
+
 /// Create the module, with `__package__` given from parent
 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
     let dotted_name = &format!("{}.dirstate", package);
     let m = PyModule::new(py, dotted_name)?;
+
     m.add(py, "__package__", package)?;
     m.add(py, "__doc__", "Dirstate - Rust implementation")?;
     m.add(
@@ -219,6 +322,8 @@
         ),
     )?;
 
+    m.add_class::<Dirs>(py)?;
+
     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
     sys_modules.set_item(py, dotted_name, &m)?;



To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list