D6394: rust-dirstate: add "dirs" rust-cpython binding
Alphare (Raphaël Gomès)
phabricator at mercurial-scm.org
Fri May 17 10:10:40 UTC 2019
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
There is an obvious performance and memory issue with those bindings on larger
repos as it copies and allocates everything at once, round-trip. Like in the
previous patch series, this is only temporary and will only get better once
we don't have large data structures going to and from Python.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D6394
AFFECTED FILES
rust/hg-cpython/src/dirstate.rs
CHANGE DETAILS
diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
--- a/rust/hg-cpython/src/dirstate.rs
+++ b/rust/hg-cpython/src/dirstate.rs
@@ -11,21 +11,25 @@
//! From Python, this will be seen as `mercurial.rustext.dirstate`
use cpython::{
- exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
- PySequence, PyTuple, Python, ToPyObject,
+ exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject,
+ PyResult, PySequence, PyTuple, Python, ToPyObject,
};
use hg::{
- pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry,
- DirstatePackError, DirstateParents, DirstateParseError, DirstateVec,
+ pack_dirstate, parse_dirstate, CopyVecEntry, DirsIterable, DirsMultiset,
+ DirstateEntry, DirstateMapError, DirstatePackError, DirstateParents,
+ DirstateParseError, DirstateVec,
};
use std::collections::HashMap;
use std::ffi::CStr;
+
#[cfg(feature = "python27")]
extern crate python27_sys as python_sys;
#[cfg(feature = "python3")]
extern crate python3_sys as python_sys;
+
use self::python_sys::PyCapsule_Import;
use libc::{c_char, c_int};
+use std::cell::RefCell;
use std::mem::transmute;
/// C code uses a custom `dirstate_tuple` type, checks in multiple instances
@@ -102,20 +106,11 @@
}
}
-fn pack_dirstate_wrapper(
+fn extract_dirstate_vec(
py: Python,
- dmap: PyDict,
- copymap: PyDict,
- pl: PyTuple,
- now: PyInt,
-) -> PyResult<PyBytes> {
- let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
- let p1: &[u8] = p1.data(py);
- let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
- let p2: &[u8] = p2.data(py);
-
- let dirstate_vec: Result<DirstateVec, PyErr> = dmap
- .items(py)
+ dmap: &PyDict,
+) -> Result<DirstateVec, PyErr> {
+ dmap.items(py)
.iter()
.map(|(filename, stats)| {
let stats = stats.extract::<PySequence>(py)?;
@@ -136,7 +131,22 @@
},
))
})
- .collect();
+ .collect()
+}
+
+fn pack_dirstate_wrapper(
+ py: Python,
+ dmap: PyDict,
+ copymap: PyDict,
+ pl: PyTuple,
+ now: PyInt,
+) -> PyResult<PyBytes> {
+ let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
+ let p1: &[u8] = p1.data(py);
+ let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
+ let p2: &[u8] = p2.data(py);
+
+ let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
.items(py)
@@ -150,7 +160,7 @@
.collect();
match pack_dirstate(
- &dirstate_vec?,
+ &dirstate_vec,
&copies?,
DirstateParents { p1, p2 },
now.value(py) as i32,
@@ -191,10 +201,103 @@
}
}
+py_class!(pub class Dirs |py| {
+ data dirs_map: RefCell<DirsMultiset>;
+
+ // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
+ // a `list`)
+ def __new__(
+ _cls,
+ map: PyObject,
+ skip: Option<PyObject> = None
+ ) -> PyResult<Self> {
+ let mut skip_state: Option<i8> = None;
+ if let Some(skip) = skip {
+ skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
+ }
+ let dirs_map;
+
+ if let Ok(map) = map.cast_as::<PyDict>(py) {
+ let dirstate_vec = extract_dirstate_vec(py, &map)?;
+ dirs_map = DirsMultiset::new(
+ DirsIterable::Dirstate(dirstate_vec),
+ skip_state,
+ )
+ } else {
+ let map: Result<Vec<Vec<u8>>, PyErr> = map
+ .iter(py)?
+ .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
+ .collect();
+ dirs_map = DirsMultiset::new(
+ DirsIterable::Manifest(map?),
+ skip_state,
+ )
+ }
+
+ Self::create_instance(py, RefCell::new(dirs_map))
+ }
+
+ def addpath(&self, path: PyObject) -> PyResult<PyObject> {
+ self.dirs_map(py).borrow_mut().add_path(
+ path.extract::<PyBytes>(py)?.data(py).to_owned(),
+ );
+ Ok(py.None())
+ }
+
+ def delpath(&self, path: PyObject) -> PyResult<PyObject> {
+ self.dirs_map(py).borrow_mut().delete_path(
+ path.extract::<PyBytes>(py)?.data(py).to_owned(),
+ )
+ .and(Ok(py.None()))
+ .or_else(|e| {
+ match e {
+ DirstateMapError::PathNotFound(_p) => {
+ Err(PyErr::new::<exc::ValueError, _>(
+ py,
+ "expected a value, found none".to_string(),
+ ))
+ }
+ DirstateMapError::EmptyPath => {
+ Ok(py.None())
+ }
+ }
+ })
+ }
+
+ // This is really inefficient on top of being ugly, but it's an easy way
+ // of having it work to continue working on the rest of the module
+ // hopefully bypassing Python entirely pretty soon.
+ def __iter__(&self) -> PyResult<PyObject> {
+ let dict = PyDict::new(py);
+
+ for (key, value) in self.dirs_map(py).borrow().iter() {
+ dict.set_item(
+ py,
+ PyBytes::new(py, &key[..]),
+ value.to_py_object(py),
+ )?;
+ }
+
+ let locals = PyDict::new(py);
+ locals.set_item(py, "obj", dict)?;
+
+ py.eval("iter(obj)", None, Some(&locals))
+ }
+
+ def __contains__(&self, item: PyObject) -> PyResult<bool> {
+ Ok(self
+ .dirs_map(py)
+ .borrow()
+ .get(&item.extract::<PyBytes>(py)?.data(py).to_owned())
+ .is_some())
+ }
+});
+
/// Create the module, with `__package__` given from parent
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let dotted_name = &format!("{}.dirstate", package);
let m = PyModule::new(py, dotted_name)?;
+
m.add(py, "__package__", package)?;
m.add(py, "__doc__", "Dirstate - Rust implementation")?;
m.add(
@@ -219,6 +322,8 @@
),
)?;
+ m.add_class::<Dirs>(py)?;
+
let sys = PyModule::import(py, "sys")?;
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
sys_modules.set_item(py, dotted_name, &m)?;
To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list