D6594: RFC dirstatemap

Alphare (Raphaël Gomès) phabricator at mercurial-scm.org
Thu Jul 4 11:00:19 EDT 2019


Alphare created this revision.
Herald added subscribers: mercurial-devel, mjpieters, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
Alphare added a comment.


  If that helps, I just wrote an article on sharing references between Rust and Python: https://raphaelgomes.dev/blog/articles/2019-07-01-sharing-references-between-python-and-rust.html.

REVISION SUMMARY
  This is a Rust implementation of the `dirstatemap` class.
  
  The Python implementation uses propertycache for lazy initialization, some of
  which have side effects, all for performance reasons. While meant for
  encapsulation, callers from different parts of the code break said
  encapsulation. Lastly, most of its inner datastructures are used as iterators
  from the rest of the code base.
  All of the above proved to be a real challenge and the reason why this is an
  RFC patch. While the code works (read: the tests pass), it is slower and
  harder to maintain than it should be.
  
  I will direct your attention to two files to start:
  
  - rust/hg-cpython/src/dirstate/dirstate_map.rs
  - rust/hg-cpython/src/dirstate/macros.rs
  
  These files contain documentation and TODO information that contextualize the
  related code. I hope there is enough information to help you get an idea of
  the issue.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6594

AFFECTED FILES
  hgext/largefiles/overrides.py
  mercurial/dirstate.py
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/lib.rs
  rust/hg-core/src/utils/files.rs
  rust/hg-core/src/utils/mod.rs
  rust/hg-cpython/src/dirstate.rs
  rust/hg-cpython/src/dirstate/copymap.rs
  rust/hg-cpython/src/dirstate/dirs_multiset.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs
  rust/hg-cpython/src/dirstate/macros.rs
  rust/hg-cpython/src/dirstate/mod.rs
  rust/hg-cpython/src/exceptions.rs
  rust/hg-cpython/src/lib.rs
  tests/fakedirstatewritetime.py

CHANGE DETAILS

diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py
--- a/tests/fakedirstatewritetime.py
+++ b/tests/fakedirstatewritetime.py
@@ -16,12 +16,6 @@
 )
 from mercurial.utils import dateutil
 
-try:
-    from mercurial import rustext
-    rustext.__name__  # force actual import (see hgdemandimport)
-except ImportError:
-    rustext = None
-
 configtable = {}
 configitem = registrar.configitem(configtable)
 
@@ -30,6 +24,7 @@
 )
 
 parsers = policy.importmod(r'parsers')
+rustmod = policy.importrust(r'parsers')
 
 def pack_dirstate(fakenow, orig, dmap, copymap, pl, now):
     # execute what original parsers.pack_dirstate should do actually
@@ -57,12 +52,17 @@
     # 'fakenow' value and 'touch -t YYYYmmddHHMM' argument easy
     fakenow = dateutil.parsedate(fakenow, [b'%Y%m%d%H%M'])[0]
 
-    if rustext is not None:
-        orig_module = rustext.dirstate
-        orig_pack_dirstate = rustext.dirstate.pack_dirstate
-    else:
-        orig_module = parsers
-        orig_pack_dirstate = parsers.pack_dirstate
+    if rustmod is not None:
+        # The Rust implementation does not use public parse/pack dirstate
+        # to prevent conversion round-trips
+        orig_dirstatemap_write = dirstate.dirstatemap.write
+        wrapper = lambda self, st, now: orig_dirstatemap_write(self,
+                                                               st,
+                                                               fakenow)
+        dirstate.dirstatemap.write = wrapper
+
+    orig_module = parsers
+    orig_pack_dirstate = parsers.pack_dirstate
 
     orig_dirstate_getfsnow = dirstate._getfsnow
     wrapper = lambda *args: pack_dirstate(fakenow, orig_pack_dirstate, *args)
@@ -74,6 +74,8 @@
     finally:
         orig_module.pack_dirstate = orig_pack_dirstate
         dirstate._getfsnow = orig_dirstate_getfsnow
+        if rustmod is not None:
+            dirstate.dirstatemap.write = orig_dirstatemap_write
 
 def _poststatusfixup(orig, workingctx, status, fixup):
     ui = workingctx.repo().ui
diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs
--- a/rust/hg-cpython/src/lib.rs
+++ b/rust/hg-cpython/src/lib.rs
@@ -50,6 +50,11 @@
         "filepatterns",
         filepatterns::init_module(py, &dotted_name)?,
     )?;
+    m.add(
+        py,
+        "parsers",
+        dirstate::init_parsers_module(py, &dotted_name)?,
+    )?;
     m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
     m.add(
         py,
diff --git a/rust/hg-cpython/src/exceptions.rs b/rust/hg-cpython/src/exceptions.rs
--- a/rust/hg-cpython/src/exceptions.rs
+++ b/rust/hg-cpython/src/exceptions.rs
@@ -65,3 +65,5 @@
         }
     }
 }
+
+py_exception!(shared_ref, AlreadyBorrowed, RuntimeError);
diff --git a/rust/hg-cpython/src/dirstate/mod.rs b/rust/hg-cpython/src/dirstate/mod.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/mod.rs
@@ -0,0 +1,277 @@
+// dirstate module
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for the `hg::dirstate` module provided by the
+//! `hg-core` package.
+//!
+//! From Python, this will be seen as `mercurial.rustext.dirstate`
+use cpython::{
+    exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
+    PySequence, PyTuple, Python, ToPyObject,
+};
+use hg::{
+    DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
+    StateMap,
+};
+use std::ffi::CStr;
+
+#[cfg(feature = "python27")]
+extern crate python27_sys as python_sys;
+#[cfg(feature = "python3")]
+extern crate python3_sys as python_sys;
+
+use self::python_sys::PyCapsule_Import;
+use hg::parsers::{pack_dirstate, parse_dirstate};
+use libc::{c_char, c_int};
+use std::collections::HashMap;
+use std::mem::transmute;
+use std::time::Duration;
+
+#[macro_use]
+mod macros;
+mod copymap;
+mod dirs_multiset;
+mod dirstate_map;
+use dirstate::dirs_multiset::Dirs;
+use dirstate::dirstate_map::DirstateMap;
+use exceptions::AlreadyBorrowed;
+
+/// C code uses a custom `dirstate_tuple` type, checks in multiple instances
+/// for this type, and raises a Python `Exception` if the check does not pass.
+/// Because this type differs only in name from the regular Python tuple, it
+/// would be a good idea in the near future to remove it entirely to allow
+/// for a pure Python tuple of the same effective structure to be used,
+/// rendering this type and the capsule below useless.
+type MakeDirstateTupleFn = extern "C" fn(
+    state: c_char,
+    mode: c_int,
+    size: c_int,
+    mtime: c_int,
+) -> PyObject;
+
+/// This is largely a copy/paste from cindex.rs, pending the merge of a
+/// `py_capsule_fn!` macro in the rust-cpython project:
+/// https://github.com/dgrunwald/rust-cpython/pull/169
+fn decapsule_make_dirstate_tuple(py: Python) -> PyResult<MakeDirstateTupleFn> {
+    unsafe {
+        let caps_name = CStr::from_bytes_with_nul_unchecked(
+            b"mercurial.cext.parsers.make_dirstate_tuple_CAPI\0",
+        );
+        let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0);
+        if from_caps.is_null() {
+            return Err(PyErr::fetch(py));
+        }
+        Ok(transmute(from_caps))
+    }
+}
+
+fn extract_dirstate(py: Python, dmap: &PyDict) -> Result<StateMap, PyErr> {
+    dmap.items(py)
+        .iter()
+        .map(|(filename, stats)| {
+            let stats = stats.extract::<PySequence>(py)?;
+            let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
+            let state = state.data(py)[0];
+            let mode = stats.get_item(py, 1)?.extract(py)?;
+            let size = stats.get_item(py, 2)?.extract(py)?;
+            let mtime = stats.get_item(py, 3)?.extract(py)?;
+            let filename = filename.extract::<PyBytes>(py)?;
+            let filename = filename.data(py);
+            Ok((
+                filename.to_owned(),
+                DirstateEntry {
+                    state,
+                    mode,
+                    size,
+                    mtime,
+                },
+            ))
+        })
+        .collect()
+}
+
+/// Create the module, with `__package__` given from parent
+pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
+    let dotted_name = &format!("{}.dirstate", package);
+    let m = PyModule::new(py, dotted_name)?;
+
+    m.add(py, "__package__", package)?;
+    m.add(py, "__doc__", "Dirstate - Rust implementation")?;
+
+    m.add_class::<Dirs>(py)?;
+    m.add_class::<DirstateMap>(py)?;
+    m.add(py, "AlreadyBorrowed", py.get_type::<AlreadyBorrowed>())?;
+
+    let sys = PyModule::import(py, "sys")?;
+    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
+    sys_modules.set_item(py, dotted_name, &m)?;
+
+    Ok(m)
+}
+
+fn parse_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    st: PyBytes,
+) -> PyResult<PyTuple> {
+    let mut dirstate_map = HashMap::new();
+    let mut copies = HashMap::new();
+
+    match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
+        Ok(parents) => {
+            for (filename, entry) in dirstate_map {
+                dmap.set_item(
+                    py,
+                    PyBytes::new(py, &filename),
+                    decapsule_make_dirstate_tuple(py)?(
+                        entry.state as c_char,
+                        entry.mode,
+                        entry.size,
+                        entry.mtime,
+                    ),
+                )?;
+            }
+            for (path, copy_path) in copies {
+                copymap.set_item(
+                    py,
+                    PyBytes::new(py, &path),
+                    PyBytes::new(py, &copy_path),
+                )?;
+            }
+            Ok(
+                (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
+                    .to_py_object(py),
+            )
+        }
+        Err(e) => Err(PyErr::new::<exc::ValueError, _>(
+            py,
+            match e {
+                DirstateParseError::TooLittleData => {
+                    "too little data for parents".to_string()
+                }
+                DirstateParseError::Overflow => {
+                    "overflow in dirstate".to_string()
+                }
+                DirstateParseError::CorruptedEntry(e) => e,
+                DirstateParseError::Damaged => {
+                    "dirstate appears to be damaged".to_string()
+                }
+            },
+        )),
+    }
+}
+
+fn pack_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    pl: PyTuple,
+    now: PyInt,
+) -> PyResult<PyBytes> {
+    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
+    let p1: &[u8] = p1.data(py);
+    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
+    let p2: &[u8] = p2.data(py);
+
+    let mut dirstate_map = extract_dirstate(py, &dmap)?;
+
+    let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
+        .items(py)
+        .iter()
+        .map(|(key, value)| {
+            Ok((
+                key.extract::<PyBytes>(py)?.data(py).to_owned(),
+                value.extract::<PyBytes>(py)?.data(py).to_owned(),
+            ))
+        })
+        .collect();
+
+    match pack_dirstate(
+        &mut dirstate_map,
+        &copies?,
+        DirstateParents {
+            p1: p1.to_owned(),
+            p2: p2.to_owned(),
+        },
+        Duration::from_secs(now.value(py) as u64),
+    ) {
+        Ok(packed) => {
+            for (
+                filename,
+                DirstateEntry {
+                    state,
+                    mode,
+                    size,
+                    mtime,
+                },
+            ) in dirstate_map
+            {
+                dmap.set_item(
+                    py,
+                    PyBytes::new(py, &filename[..]),
+                    decapsule_make_dirstate_tuple(py)?(
+                        state as c_char,
+                        mode,
+                        size,
+                        mtime,
+                    ),
+                )?;
+            }
+            Ok(PyBytes::new(py, &packed))
+        }
+        Err(error) => Err(PyErr::new::<exc::ValueError, _>(
+            py,
+            match error {
+                DirstatePackError::CorruptedParent => {
+                    "expected a 20-byte hash".to_string()
+                }
+                DirstatePackError::CorruptedEntry(e) => e,
+                DirstatePackError::BadSize(expected, actual) => {
+                    format!("bad dirstate size: {} != {}", actual, expected)
+                }
+            },
+        )),
+    }
+}
+
+/// Create the module, with `__package__` given from parent
+pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
+    let dotted_name = &format!("{}.parsers", package);
+    let m = PyModule::new(py, dotted_name)?;
+
+    m.add(py, "__package__", package)?;
+    m.add(py, "__doc__", "Parsers - Rust implementation")?;
+
+    m.add(
+        py,
+        "parse_dirstate",
+        py_fn!(
+            py,
+            parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
+        ),
+    )?;
+    m.add(
+        py,
+        "pack_dirstate",
+        py_fn!(
+            py,
+            pack_dirstate_wrapper(
+                dmap: PyDict,
+                copymap: PyDict,
+                pl: PyTuple,
+                now: PyInt
+            )
+        ),
+    )?;
+
+    let sys = PyModule::import(py, "sys")?;
+    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
+    sys_modules.set_item(py, dotted_name, &m)?;
+
+    Ok(m)
+}
diff --git a/rust/hg-cpython/src/dirstate/macros.rs b/rust/hg-cpython/src/dirstate/macros.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/macros.rs
@@ -0,0 +1,254 @@
+// dirstate_map.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Macros for use in the `hg-cpython` bridge library.
+
+/// Allows a `py_class!` generated struct to share references to one of its
+/// data members with Python.
+///
+/// # Warning
+///
+/// The targeted `py_class!` needs to have the
+/// `data leak_count: RefCell<usize>;` data attribute to compile.
+/// A better, more complicated macro is needed to automatically insert the
+/// leak count, which is not yet really battle tested (what happens when
+/// multiple references are needed?). See the example below.
+///
+/// TODO allow Python container types: for now, integration with the garbage
+///     collector does not extend to Rust structs holding references to Python
+///     objects. Should the need surface, `__traverse__` and `__clear__` will
+///     need to be written as per the `rust-cpython` docs on GC integration.
+///
+/// # Parameters
+///
+/// * `$name` is the same identifier used in for `py_class!` macro call.
+/// * `$inner_struct` is the identifier of the underlying Rust struct
+/// * `$data_member` is the identifier of the data member of `$inner_struct`
+/// that will be shared.
+/// * `$leaked` is the identifier to give to the struct that will manage
+/// references to `$name`, to be used for example in other macros like
+/// `py_shared_mapping_iterator`.
+///
+/// # Example
+///
+/// ```
+/// struct MyStruct {
+///     inner: Vec<u32>;
+/// }
+///
+/// py_class!(pub class MyType |py| {
+///     data inner: RefCell<MyStruct>;
+///     data leak_count: RefCell<usize>;
+/// });
+///
+/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef);
+/// ```
+macro_rules! py_shared_ref {
+    (
+        $name: ident,
+        $inner_struct: ident,
+        $data_member: ident,
+        $leaked: ident
+    ) => {
+        impl $name {
+            fn leak_immutable(&self, py: Python) -> &'static $inner_struct {
+                let ptr = self.$data_member(py).as_ptr();
+                *self.leak_count(py).borrow_mut() += 1;
+                unsafe { &*ptr }
+            }
+
+            fn borrow_mut<'a>(
+                &'a self,
+                py: Python<'a>,
+            ) -> PyResult<RefMut<$inner_struct>> {
+                match *self.leak_count(py).borrow() {
+                    0 => Ok(self.$data_member(py).borrow_mut()),
+                    // TODO
+                    // For now, this works differently than Python references
+                    // in the case of iterators.
+                    // Python does not complain when the data an iterator
+                    // points to is modified if the iterator is never used
+                    // afterwards.
+                    // Here, we are stricter than this by refusing to give a
+                    // mutable reference if it is already borrowed.
+                    // While the additional safety might be argued for, it
+                    // breaks valid programming patterns in Python and we need
+                    // to fix this issue down the line.
+                    _ => Err(AlreadyBorrowed::new(
+                        py,
+                        "Cannot borrow mutably while there are \
+                         immutable references in Python objects",
+                    )),
+                }
+            }
+
+            fn decrease_leak_count(&self, py: Python) {
+                *self.leak_count(py).borrow_mut() -= 1;
+            }
+        }
+
+        /// Manage immutable references to `$name` leaked into Python
+        /// iterators.
+        ///
+        /// In truth, this does not represent leaked references themselves;
+        /// it is instead useful alongside them to manage them.
+        pub struct $leaked {
+            inner: $name,
+        }
+
+        impl $leaked {
+            fn new(py: Python, inner: &$name) -> Self {
+                Self {
+                    inner: inner.clone_ref(py),
+                }
+            }
+        }
+
+        impl Drop for $leaked {
+            fn drop(&mut self) {
+                let gil = Python::acquire_gil();
+                let py = gil.python();
+                self.inner.decrease_leak_count(py);
+            }
+        }
+    };
+}
+
+/// Defines a `py_class!` that acts as a Python iterator over a Rust iterator.
+macro_rules! py_shared_iterator_impl {
+    (
+        $name: ident,
+        $leaked: ident,
+        $iterator_type: ty,
+        $success_func: expr,
+        $success_type: ty
+    ) => {
+        py_class!(pub class $name |py| {
+            data inner: RefCell<Option<$leaked>>;
+            data it: RefCell<$iterator_type>;
+
+            def __next__(&self) -> PyResult<$success_type> {
+                let mut inner_opt = self.inner(py).borrow_mut();
+                if inner_opt.is_some() {
+                    match self.it(py).borrow_mut().next() {
+                        None => {
+                            // replace Some(inner) by None, drop $leaked
+                            inner_opt.take();
+                            Ok(None)
+                        }
+                        Some(res) => {
+                            $success_func(py, res)
+                        }
+                    }
+                } else {
+                    Ok(None)
+                }
+            }
+
+            def __iter__(&self) -> PyResult<Self> {
+                Ok(self.clone_ref(py))
+            }
+        });
+
+        impl $name {
+            pub fn from_inner(
+                py: Python,
+                leaked: Option<$leaked>,
+                it: $iterator_type
+            ) -> PyResult<Self> {
+                Self::create_instance(
+                    py,
+                    RefCell::new(leaked),
+                    RefCell::new(it)
+                )
+            }
+        }
+    };
+}
+
+/// Defines a `py_class!` that acts as a Python mapping iterator over a Rust
+/// iterator.
+///
+/// TODO: this is a bit awkward to use, and a better (more complicated)
+///     procedural macro would simplify the interface a lot.
+///
+/// # Parameters
+///
+/// * `$name` is the identifier to give to the resulting Rust struct.
+/// * `$leaked` corresponds to `$leaked` in the matching `py_shared_ref!` call.
+/// * `$iterator_type` is the iterator type
+/// (like `std::collections::hash_map::Iter`).
+/// * `$key_type` is the type of the key in the mapping
+/// * `$value_type` is the type of the value in the mapping
+/// * `$success_func` is a function for processing the Rust `(key, value)`
+/// tuple on iteration success, turning it into something Python understands.
+/// * `$success_func` is the return type of `$success_func`
+///
+/// # Example
+///
+/// ```
+/// struct MyStruct {
+///     inner: HashMap<Vec<u8>, Vec<u8>>;
+/// }
+///
+/// py_class!(pub class MyType |py| {
+///     data inner: RefCell<MyStruct>;
+///     data leak_count: RefCell<usize>;
+///
+///     def __iter__(&self) -> PyResult<MyTypeItemsIterator> {
+///         MyTypeItemsIterator::create_instance(
+///             py,
+///             RefCell::new(Some(MyTypeLeakedRef::new(py, &self))),
+///             RefCell::new(self.leak_immutable(py).iter()),
+///         )
+///     }
+/// });
+///
+/// impl MyType {
+///     fn translate_key_value(
+///         py: Python,
+///         res: (&Vec<u8>, &Vec<u8>),
+///     ) -> PyResult<Option<(PyBytes, PyBytes)>> {
+///         let (f, entry) = res;
+///         Ok(Some((
+///             PyBytes::new(py, f),
+///             PyBytes::new(py, entry),
+///         )))
+///     }
+/// }
+///
+/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef);
+///
+/// py_shared_mapping_iterator!(
+///     MyTypeItemsIterator,
+///     MyTypeLeakedRef,
+///     std::collections::hash_map::Iter,
+///     Vec<u8>,
+///     Vec<u8>,
+///     MyType::translate_key_value,
+///     Option<(PyBytes, PyBytes)>
+/// );
+/// ```
+macro_rules! py_shared_mapping_iterator {
+    (
+        $name:ident,
+        $leaked:ident,
+        $iterator_type: ident,
+        $key_type: ty,
+        $value_type: ty,
+        $success_func: path,
+        $success_type: ty
+    ) => {
+        py_shared_iterator_impl!(
+            $name,
+            $leaked,
+            $iterator_type<'static, $key_type, $value_type>,
+            $success_func,
+            $success_type
+        );
+    };
+}
\ No newline at end of file
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -0,0 +1,475 @@
+// dirstate_map.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for the `hg::dirstate::dirstate_map` file provided by the
+//! `hg-core` package.
+
+use std::cell::{RefCell, RefMut};
+use std::collections::hash_map::Iter;
+use std::time::Duration;
+
+use cpython::{
+    exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyObject,
+    PyResult, PyTuple, Python, PythonObject, ToPyObject,
+};
+use libc::c_char;
+
+use dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator};
+use dirstate::decapsule_make_dirstate_tuple;
+use dirstate::dirs_multiset::Dirs;
+use exceptions::AlreadyBorrowed;
+use hg::{
+    DirsIterable, DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
+    DirstateParents,
+};
+
+/// TODO there are a couple of issues that needs fixing and I'm not certain
+///     of how to go about them:
+///     This object needs to share references to multiple members of its Rust
+///     inner struct, namely `copy_map`, `dirs` and `all_dirs`.
+///     Right now `CopyMap` is done, but it needs to have an explicit reference
+///     to `RustDirstateMap` which itself needs to have an encapsulation for
+///     every method in `CopyMap` (copymapcopy, etc.).
+///     This is ugly and hard to maintain.
+///     The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
+///     `py_class!` is already implemented and does not mention
+///     `RustDirstateMap`, rightfully so.
+///     All attributes also have to have a separate refcount data attribute for
+///     leaks, with all methods that go along for reference sharing.
+py_class!(pub class DirstateMap |py| {
+    data inner: RefCell<RustDirstateMap>;
+    data leak_count: RefCell<usize>;
+
+    def __new__(_cls, _root: PyObject) -> PyResult<Self> {
+        let inner = RustDirstateMap::default();
+        Self::create_instance(py, RefCell::new(inner), RefCell::new(0))
+    }
+
+    def clear(&self) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.clear();
+        Ok(py.None())
+    }
+
+    def get(
+        &self,
+        key: PyObject,
+        default: Option<PyObject> = None
+    ) -> PyResult<Option<PyObject>> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow().get(key.data(py)) {
+            Some(entry) => Ok(Some(decapsule_make_dirstate_tuple(py)?(
+                        entry.state as c_char,
+                        entry.mode,
+                        entry.size,
+                        entry.mtime,
+                    ))),
+            None => Ok(default)
+        }
+    }
+
+    def addfile(
+        &self,
+        f: PyObject,
+        oldstate: PyObject,
+        state: PyObject,
+        mode: PyObject,
+        size: PyObject,
+        mtime: PyObject
+    ) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.add_file(
+            f.extract::<PyBytes>(py)?.data(py),
+            oldstate.extract::<PyBytes>(py)?.data(py)[0],
+            DirstateEntry {
+                state: state.extract::<PyBytes>(py)?.data(py)[0],
+                mode: mode.extract(py)?,
+                size: size.extract(py)?,
+                mtime: mtime.extract(py)?,
+            },
+        );
+        Ok(py.None())
+    }
+
+    def removefile(
+        &self,
+        f: PyObject,
+        oldstate: PyObject,
+        size: PyObject
+    ) -> PyResult<PyObject> {
+        self.borrow_mut(py)?
+            .remove_file(
+                f.extract::<PyBytes>(py)?.data(py),
+                oldstate.extract::<PyBytes>(py)?.data(py)[0],
+                size.extract(py)?,
+            )
+            .or_else(|_| {
+                Err(PyErr::new::<exc::OSError, _>(
+                    py,
+                    "Dirstate error".to_string(),
+                ))
+            })?;
+        Ok(py.None())
+    }
+
+    def dropfile(
+        &self,
+        f: PyObject,
+        oldstate: PyObject
+    ) -> PyResult<PyBool> {
+        self.borrow_mut(py)?
+            .drop_file(
+                f.extract::<PyBytes>(py)?.data(py),
+                oldstate.extract::<PyBytes>(py)?.data(py)[0],
+            )
+            .and_then(|b| Ok(b.to_py_object(py)))
+            .or_else(|_| {
+                Err(PyErr::new::<exc::OSError, _>(
+                    py,
+                    "Dirstate error".to_string(),
+                ))
+            })
+    }
+
+    def clearambiguoustimes(
+        &self,
+        files: PyObject,
+        now: PyObject
+    ) -> PyResult<PyObject> {
+        let files: PyResult<Vec<Vec<u8>>> = files
+            .iter(py)?
+            .map(|filename| {
+                Ok(filename?.extract::<PyBytes>(py)?.data(py).to_owned())
+            })
+            .collect();
+        self.inner(py)
+            .borrow_mut()
+            .clear_ambiguous_times(files?, now.extract(py)?);
+        Ok(py.None())
+    }
+
+    // TODO share the reference
+    def nonnormalentries(&self) -> PyResult<PyObject> {
+        let (non_normal, other_parent) =
+            self.inner(py).borrow().non_normal_other_parent_entries();
+
+        let locals = PyDict::new(py);
+        locals.set_item(
+            py,
+            "non_normal",
+            non_normal
+                .iter()
+                .map(|v| PyBytes::new(py, &v))
+                .collect::<Vec<PyBytes>>()
+                .to_py_object(py),
+        )?;
+        locals.set_item(
+            py,
+            "other_parent",
+            other_parent
+                .iter()
+                .map(|v| PyBytes::new(py, &v))
+                .collect::<Vec<PyBytes>>()
+                .to_py_object(py),
+        )?;
+
+        py.eval("set(non_normal), set(other_parent)", None, Some(&locals))
+    }
+
+    def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
+        let d = d.extract::<PyBytes>(py)?;
+        Ok(self
+            .inner(py)
+            .borrow_mut()
+            .has_tracked_dir(d.data(py))
+            .to_py_object(py))
+    }
+
+    def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
+        let d = d.extract::<PyBytes>(py)?;
+        Ok(self
+            .inner(py)
+            .borrow_mut()
+            .has_dir(d.data(py))
+            .to_py_object(py))
+    }
+
+    def parents(&self, st: PyObject) -> PyResult<PyTuple> {
+        self.inner(py)
+            .borrow_mut()
+            .parents(st.extract::<PyBytes>(py)?.data(py))
+            .and_then(|d| {
+                Ok((PyBytes::new(py, &d.p1), PyBytes::new(py, &d.p2))
+                    .to_py_object(py))
+            })
+            .or_else(|_| {
+                Err(PyErr::new::<exc::OSError, _>(
+                    py,
+                    "Dirstate error".to_string(),
+                ))
+            })
+    }
+
+    def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
+        let p1 = p1.extract::<PyBytes>(py)?.data(py).to_vec();
+        let p2 = p2.extract::<PyBytes>(py)?.data(py).to_vec();
+
+        self.inner(py)
+            .borrow_mut()
+            .set_parents(DirstateParents { p1, p2 });
+        Ok(py.None())
+    }
+
+    def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
+        match self
+            .inner(py)
+            .borrow_mut()
+            .read(st.extract::<PyBytes>(py)?.data(py))
+        {
+            Ok(Some(parents)) => Ok(Some(
+                (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
+                    .to_py_object(py)
+                    .into_object(),
+            )),
+            Ok(None) => Ok(Some(py.None())),
+            Err(_) => Err(PyErr::new::<exc::OSError, _>(
+                py,
+                "Dirstate error".to_string(),
+            )),
+        }
+    }
+    def write(
+        &self,
+        p1: PyObject,
+        p2: PyObject,
+        now: PyObject
+    ) -> PyResult<PyBytes> {
+        let now = Duration::new(now.extract(py)?, 0);
+        let parents = DirstateParents {
+            p1: p1.extract::<PyBytes>(py)?.data(py).to_owned(),
+            p2: p2.extract::<PyBytes>(py)?.data(py).to_owned(),
+        };
+
+        match self.borrow_mut(py)?.pack(parents, now) {
+            Ok(packed) => Ok(PyBytes::new(py, &packed)),
+            Err(_) => Err(PyErr::new::<exc::OSError, _>(
+                py,
+                "Dirstate error".to_string(),
+            )),
+        }
+    }
+
+    def filefoldmapasdict(&self) -> PyResult<PyDict> {
+        let dict = PyDict::new(py);
+        for (key, value) in
+            self.borrow_mut(py)?.property_file_fold_map().iter()
+        {
+            dict.set_item(py, key, value)?;
+        }
+        Ok(dict)
+    }
+
+    def __len__(&self) -> PyResult<usize> {
+        Ok(self.inner(py).borrow().len())
+    }
+
+    def __contains__(&self, key: PyObject) -> PyResult<bool> {
+        let key = key.extract::<PyBytes>(py)?;
+        Ok(self.inner(py).borrow().contains_key(key.data(py)))
+    }
+
+    def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        let key = key.extract::<PyBytes>(py)?;
+        let key = key.data(py);
+        match self.inner(py).borrow().get(key) {
+            Some(entry) => Ok(decapsule_make_dirstate_tuple(py)?(
+                entry.state as c_char,
+                entry.mode,
+                entry.size,
+                entry.mtime,
+            )),
+            None => Err(PyErr::new::<exc::KeyError, _>(
+                py,
+                String::from_utf8_lossy(key),
+            )),
+        }
+    }
+
+    def keys(&self) -> PyResult<DirstateMapKeysIterator> {
+        DirstateMapKeysIterator::create_instance(
+            py,
+            RefCell::new(Some(DirstateMapLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def items(&self) -> PyResult<DirstateMapItemsIterator> {
+        DirstateMapItemsIterator::create_instance(
+            py,
+            RefCell::new(Some(DirstateMapLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
+        DirstateMapKeysIterator::create_instance(
+            py,
+            RefCell::new(Some(DirstateMapLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def getdirs(&self) -> PyResult<Dirs> {
+        // TODO don't copy, share the reference
+        self.inner(py).borrow_mut().set_dirs();
+        Dirs::from_inner(
+            py,
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&self.inner(py).borrow()),
+                Some(b'r'),
+            ),
+        )
+    }
+    def getalldirs(&self) -> PyResult<Dirs> {
+        // TODO don't copy, share the reference
+        self.inner(py).borrow_mut().set_all_dirs();
+        Dirs::from_inner(
+            py,
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&self.inner(py).borrow()),
+                None,
+            ),
+        )
+    }
+
+    // TODO all copymap* methods, see docstring above
+    def copymapcopy(&self) -> PyResult<PyDict> {
+        let dict = PyDict::new(py);
+        for (key, value) in self.inner(py).borrow().copy_map.iter() {
+            dict.set_item(py, PyBytes::new(py, key), PyBytes::new(py, value))?;
+        }
+        Ok(dict)
+    }
+
+    def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow().copy_map.get(key.data(py)) {
+            Some(copy) => Ok(PyBytes::new(py, copy)),
+            None => Err(PyErr::new::<exc::KeyError, _>(
+                py,
+                String::from_utf8_lossy(key.data(py)),
+            )),
+        }
+    }
+    def copymap(&self) -> PyResult<CopyMap> {
+        CopyMap::from_inner(py, self.clone_ref(py))
+    }
+
+    def copymaplen(&self) -> PyResult<usize> {
+        Ok(self.inner(py).borrow().copy_map.len())
+    }
+    def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
+        let key = key.extract::<PyBytes>(py)?;
+        Ok(self.inner(py).borrow().copy_map.contains_key(key.data(py)))
+    }
+    def copymapget(
+        &self,
+        key: PyObject,
+        default: Option<PyObject>
+    ) -> PyResult<Option<PyObject>> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow().copy_map.get(key.data(py)) {
+            Some(copy) => Ok(Some(PyBytes::new(py, copy).into_object())),
+            None => Ok(default),
+        }
+    }
+    def copymapsetitem(
+        &self,
+        key: PyObject,
+        value: PyObject
+    ) -> PyResult<PyObject> {
+        let key = key.extract::<PyBytes>(py)?;
+        let value = value.extract::<PyBytes>(py)?;
+        self.inner(py)
+            .borrow_mut()
+            .copy_map
+            .insert(key.data(py).to_vec(), value.data(py).to_vec());
+        Ok(py.None())
+    }
+    def copymappop(
+        &self,
+        key: PyObject,
+        default: Option<PyObject>
+    ) -> PyResult<Option<PyObject>> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow_mut().copy_map.remove(key.data(py)) {
+            Some(_) => Ok(None),
+            None => Ok(default),
+        }
+    }
+
+    def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
+        CopyMapKeysIterator::from_inner(
+            py,
+            Some(DirstateMapLeakedRef::new(py, &self)),
+            self.leak_immutable(py).copy_map.iter(),
+        )
+    }
+
+    def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
+        CopyMapItemsIterator::from_inner(
+            py,
+            Some(DirstateMapLeakedRef::new(py, &self)),
+            self.leak_immutable(py).copy_map.iter(),
+        )
+    }
+
+});
+
+impl DirstateMap {
+    fn translate_key(
+        py: Python,
+        res: (&Vec<u8>, &DirstateEntry),
+    ) -> PyResult<Option<PyBytes>> {
+        Ok(Some(PyBytes::new(py, res.0)))
+    }
+    fn translate_key_value(
+        py: Python,
+        res: (&Vec<u8>, &DirstateEntry),
+    ) -> PyResult<Option<(PyBytes, PyObject)>> {
+        let (f, entry) = res;
+        Ok(Some((
+            PyBytes::new(py, f),
+            decapsule_make_dirstate_tuple(py)?(
+                entry.state as c_char,
+                entry.mode,
+                entry.size,
+                entry.mtime,
+            ),
+        )))
+    }
+}
+
+py_shared_ref!(DirstateMap, RustDirstateMap, inner, DirstateMapLeakedRef);
+
+py_shared_mapping_iterator!(
+    DirstateMapKeysIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    DirstateEntry,
+    DirstateMap::translate_key,
+    Option<PyBytes>
+);
+
+py_shared_mapping_iterator!(
+    DirstateMapItemsIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    DirstateEntry,
+    DirstateMap::translate_key_value,
+    Option<(PyBytes, PyObject)>
+);
diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,124 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for the `hg::dirstate::dirs_multiset` file provided by the
+//! `hg-core` package.
+
+use std::cell::{RefCell, RefMut};
+use std::collections::hash_map::Iter;
+
+use cpython::{
+    exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
+    Python,
+};
+
+use dirstate::extract_dirstate;
+use exceptions::AlreadyBorrowed;
+use hg::{DirsIterable, DirsMultiset, DirstateMapError};
+
+py_class!(pub class Dirs |py| {
+    data inner: RefCell<DirsMultiset>;
+    data leak_count: RefCell<usize>;
+
+    // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
+    // a `list`)
+    def __new__(
+        _cls,
+        map: PyObject,
+        skip: Option<PyObject> = None
+    ) -> PyResult<Self> {
+        let mut skip_state: Option<u8> = None;
+        if let Some(skip) = skip {
+            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0]);
+        }
+        let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
+            let dirstate = extract_dirstate(py, &map)?;
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&dirstate),
+                skip_state,
+            )
+        } else {
+            let map: Result<Vec<Vec<u8>>, PyErr> = map
+                .iter(py)?
+                .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
+                .collect();
+            DirsMultiset::new(
+                DirsIterable::Manifest(&map?),
+                skip_state,
+            )
+        };
+
+        Self::create_instance(py, RefCell::new(inner), RefCell::new(0))
+    }
+
+    def addpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.add_path(
+            path.extract::<PyBytes>(py)?.data(py),
+        );
+        Ok(py.None())
+    }
+
+    def delpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.delete_path(
+            path.extract::<PyBytes>(py)?.data(py),
+        )
+            .and(Ok(py.None()))
+            .or_else(|e| {
+                match e {
+                    DirstateMapError::PathNotFound(_p) => {
+                        Err(PyErr::new::<exc::ValueError, _>(
+                            py,
+                            "expected a value, found none".to_string(),
+                        ))
+                    }
+                    DirstateMapError::EmptyPath => {
+                        Ok(py.None())
+                    }
+                }
+            })
+    }
+
+    def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
+        DirsMultisetKeysIterator::create_instance(
+            py,
+            RefCell::new(Some(DirsMultisetLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def __contains__(&self, item: PyObject) -> PyResult<bool> {
+        Ok(self
+            .inner(py)
+            .borrow()
+            .contains_key(item.extract::<PyBytes>(py)?.data(py).as_ref()))
+    }
+});
+
+py_shared_ref!(Dirs, DirsMultiset, inner, DirsMultisetLeakedRef);
+
+impl Dirs {
+    pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
+        Self::create_instance(py, RefCell::new(d), RefCell::new(0))
+    }
+
+    fn translate_key(
+        py: Python,
+        res: (&Vec<u8>, &u32),
+    ) -> PyResult<Option<PyBytes>> {
+        Ok(Some(PyBytes::new(py, res.0)))
+    }
+}
+
+py_shared_mapping_iterator!(
+    DirsMultisetKeysIterator,
+    DirsMultisetLeakedRef,
+    Iter,
+    Vec<u8>,
+    u32,
+    Dirs::translate_key,
+    Option<PyBytes>
+);
diff --git a/rust/hg-cpython/src/dirstate/copymap.rs b/rust/hg-cpython/src/dirstate/copymap.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/copymap.rs
@@ -0,0 +1,119 @@
+// copymap.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for `hg::dirstate::dirstate_map::CopyMap` provided by the
+//! `hg-core` package.
+
+use cpython::{PyBytes, PyClone, PyDict, PyObject, PyResult, Python};
+use std::cell::RefCell;
+use std::collections::hash_map::Iter;
+
+use dirstate::dirstate_map::{DirstateMap, DirstateMapLeakedRef};
+
+py_class!(pub class CopyMap |py| {
+    data dirstate_map: DirstateMap;
+
+    def __getitem__(&self, key: PyObject) -> PyResult<PyBytes> {
+        (*self.dirstate_map(py)).copymapgetitem(py, key)
+    }
+
+    def __len__(&self) -> PyResult<usize> {
+        self.dirstate_map(py).copymaplen(py)
+    }
+
+    def __contains__(&self, key: PyObject) -> PyResult<bool> {
+        self.dirstate_map(py).copymapcontains(py, key)
+    }
+
+    def get(
+        &self,
+        key: PyObject,
+        default: Option<PyObject> = None
+    ) -> PyResult<Option<PyObject>> {
+        self.dirstate_map(py).copymapget(py, key, default)
+    }
+
+    def pop(
+        &self,
+        key: PyObject,
+        default: Option<PyObject> = None
+    ) -> PyResult<Option<PyObject>> {
+        self.dirstate_map(py).copymappop(py, key, default)
+    }
+
+    def __iter__(&self) -> PyResult<CopyMapKeysIterator> {
+        self.dirstate_map(py).copymapiter(py)
+    }
+
+    // Python's `dict()` builtin works with either a subclass of dict
+    // or an abstract mapping. Said mapping needs to implement `__getitem__`
+    // and `keys`.
+    def keys(&self) -> PyResult<CopyMapKeysIterator> {
+        self.dirstate_map(py).copymapiter(py)
+    }
+
+    def items(&self) -> PyResult<CopyMapItemsIterator> {
+        self.dirstate_map(py).copymapitemsiter(py)
+    }
+
+    def iteritems(&self) -> PyResult<CopyMapItemsIterator> {
+        self.dirstate_map(py).copymapitemsiter(py)
+    }
+
+    def __setitem__(
+        &self,
+        key: PyObject,
+        item: PyObject
+    ) -> PyResult<()> {
+        self.dirstate_map(py).copymapsetitem(py, key, item)?;
+        Ok(())
+    }
+
+    def copy(&self) -> PyResult<PyDict> {
+        self.dirstate_map(py).copymapcopy(py)
+    }
+
+});
+
+impl CopyMap {
+    pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
+        Self::create_instance(py, dm)
+    }
+    fn translate_key(
+        py: Python,
+        res: (&Vec<u8>, &Vec<u8>),
+    ) -> PyResult<Option<PyBytes>> {
+        Ok(Some(PyBytes::new(py, res.0)))
+    }
+    fn translate_key_value(
+        py: Python,
+        res: (&Vec<u8>, &Vec<u8>),
+    ) -> PyResult<Option<(PyBytes, PyBytes)>> {
+        let (k, v) = res;
+        Ok(Some((PyBytes::new(py, k), PyBytes::new(py, v))))
+    }
+}
+
+py_shared_mapping_iterator!(
+    CopyMapKeysIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    Vec<u8>,
+    CopyMap::translate_key,
+    Option<PyBytes>
+);
+
+py_shared_mapping_iterator!(
+    CopyMapItemsIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    Vec<u8>,
+    CopyMap::translate_key_value,
+    Option<(PyBytes, PyBytes)>
+);
diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
deleted file mode 100644
--- a/rust/hg-cpython/src/dirstate.rs
+++ /dev/null
@@ -1,334 +0,0 @@
-// dirstate.rs
-//
-// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
-//
-// This software may be used and distributed according to the terms of the
-// GNU General Public License version 2 or any later version.
-
-//! Bindings for the `hg::dirstate` module provided by the
-//! `hg-core` package.
-//!
-//! From Python, this will be seen as `mercurial.rustext.dirstate`
-
-use cpython::{
-    exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject,
-    PyResult, PySequence, PyTuple, Python, PythonObject, ToPyObject,
-};
-use hg::{
-    pack_dirstate, parse_dirstate, CopyVecEntry, DirsIterable, DirsMultiset,
-    DirstateEntry, DirstateMapError, DirstatePackError, DirstateParents,
-    DirstateParseError, DirstateVec,
-};
-use std::collections::HashMap;
-use std::ffi::CStr;
-
-#[cfg(feature = "python27")]
-extern crate python27_sys as python_sys;
-#[cfg(feature = "python3")]
-extern crate python3_sys as python_sys;
-
-use self::python_sys::PyCapsule_Import;
-use libc::{c_char, c_int};
-use std::cell::RefCell;
-use std::mem::transmute;
-
-/// C code uses a custom `dirstate_tuple` type, checks in multiple instances
-/// for this type, and raises a Python `Exception` if the check does not pass.
-/// Because this type differs only in name from the regular Python tuple, it
-/// would be a good idea in the near future to remove it entirely to allow
-/// for a pure Python tuple of the same effective structure to be used,
-/// rendering this type and the capsule below useless.
-type MakeDirstateTupleFn = extern "C" fn(
-    state: c_char,
-    mode: c_int,
-    size: c_int,
-    mtime: c_int,
-) -> PyObject;
-
-/// This is largely a copy/paste from cindex.rs, pending the merge of a
-/// `py_capsule_fn!` macro in the rust-cpython project:
-/// https://github.com/dgrunwald/rust-cpython/pull/169
-fn decapsule_make_dirstate_tuple(py: Python) -> PyResult<MakeDirstateTupleFn> {
-    unsafe {
-        let caps_name = CStr::from_bytes_with_nul_unchecked(
-            b"mercurial.cext.parsers.make_dirstate_tuple_CAPI\0",
-        );
-        let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0);
-        if from_caps.is_null() {
-            return Err(PyErr::fetch(py));
-        }
-        Ok(transmute(from_caps))
-    }
-}
-
-fn parse_dirstate_wrapper(
-    py: Python,
-    dmap: PyDict,
-    copymap: PyDict,
-    st: PyBytes,
-) -> PyResult<PyTuple> {
-    match parse_dirstate(st.data(py)) {
-        Ok((parents, dirstate_vec, copies)) => {
-            for (filename, entry) in dirstate_vec {
-                dmap.set_item(
-                    py,
-                    PyBytes::new(py, &filename[..]),
-                    decapsule_make_dirstate_tuple(py)?(
-                        entry.state as c_char,
-                        entry.mode,
-                        entry.size,
-                        entry.mtime,
-                    ),
-                )?;
-            }
-            for CopyVecEntry { path, copy_path } in copies {
-                copymap.set_item(
-                    py,
-                    PyBytes::new(py, path),
-                    PyBytes::new(py, copy_path),
-                )?;
-            }
-            Ok((PyBytes::new(py, parents.p1), PyBytes::new(py, parents.p2))
-                .to_py_object(py))
-        }
-        Err(e) => Err(PyErr::new::<exc::ValueError, _>(
-            py,
-            match e {
-                DirstateParseError::TooLittleData => {
-                    "too little data for parents".to_string()
-                }
-                DirstateParseError::Overflow => {
-                    "overflow in dirstate".to_string()
-                }
-                DirstateParseError::CorruptedEntry(e) => e,
-            },
-        )),
-    }
-}
-
-fn extract_dirstate_vec(
-    py: Python,
-    dmap: &PyDict,
-) -> Result<DirstateVec, PyErr> {
-    dmap.items(py)
-        .iter()
-        .map(|(filename, stats)| {
-            let stats = stats.extract::<PySequence>(py)?;
-            let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
-            let state = state.data(py)[0] as i8;
-            let mode = stats.get_item(py, 1)?.extract(py)?;
-            let size = stats.get_item(py, 2)?.extract(py)?;
-            let mtime = stats.get_item(py, 3)?.extract(py)?;
-            let filename = filename.extract::<PyBytes>(py)?;
-            let filename = filename.data(py);
-            Ok((
-                filename.to_owned(),
-                DirstateEntry {
-                    state,
-                    mode,
-                    size,
-                    mtime,
-                },
-            ))
-        })
-        .collect()
-}
-
-fn pack_dirstate_wrapper(
-    py: Python,
-    dmap: PyDict,
-    copymap: PyDict,
-    pl: PyTuple,
-    now: PyInt,
-) -> PyResult<PyBytes> {
-    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
-    let p1: &[u8] = p1.data(py);
-    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
-    let p2: &[u8] = p2.data(py);
-
-    let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
-
-    let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
-        .items(py)
-        .iter()
-        .map(|(key, value)| {
-            Ok((
-                key.extract::<PyBytes>(py)?.data(py).to_owned(),
-                value.extract::<PyBytes>(py)?.data(py).to_owned(),
-            ))
-        })
-        .collect();
-
-    match pack_dirstate(
-        &dirstate_vec,
-        &copies?,
-        DirstateParents { p1, p2 },
-        now.as_object().extract::<i32>(py)?,
-    ) {
-        Ok((packed, new_dirstate_vec)) => {
-            for (
-                filename,
-                DirstateEntry {
-                    state,
-                    mode,
-                    size,
-                    mtime,
-                },
-            ) in new_dirstate_vec
-            {
-                dmap.set_item(
-                    py,
-                    PyBytes::new(py, &filename[..]),
-                    decapsule_make_dirstate_tuple(py)?(
-                        state as c_char,
-                        mode,
-                        size,
-                        mtime,
-                    ),
-                )?;
-            }
-            Ok(PyBytes::new(py, &packed))
-        }
-        Err(error) => Err(PyErr::new::<exc::ValueError, _>(
-            py,
-            match error {
-                DirstatePackError::CorruptedParent => {
-                    "expected a 20-byte hash".to_string()
-                }
-                DirstatePackError::CorruptedEntry(e) => e,
-                DirstatePackError::BadSize(expected, actual) => {
-                    format!("bad dirstate size: {} != {}", actual, expected)
-                }
-            },
-        )),
-    }
-}
-
-py_class!(pub class Dirs |py| {
-    data dirs_map: RefCell<DirsMultiset>;
-
-    // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
-    // a `list`)
-    def __new__(
-        _cls,
-        map: PyObject,
-        skip: Option<PyObject> = None
-    ) -> PyResult<Self> {
-        let mut skip_state: Option<i8> = None;
-        if let Some(skip) = skip {
-            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
-        }
-        let dirs_map;
-
-        if let Ok(map) = map.cast_as::<PyDict>(py) {
-            let dirstate_vec = extract_dirstate_vec(py, &map)?;
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Dirstate(dirstate_vec),
-                skip_state,
-            )
-        } else {
-            let map: Result<Vec<Vec<u8>>, PyErr> = map
-                .iter(py)?
-                .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
-                .collect();
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Manifest(map?),
-                skip_state,
-            )
-        }
-
-        Self::create_instance(py, RefCell::new(dirs_map))
-    }
-
-    def addpath(&self, path: PyObject) -> PyResult<PyObject> {
-        self.dirs_map(py).borrow_mut().add_path(
-            path.extract::<PyBytes>(py)?.data(py),
-        );
-        Ok(py.None())
-    }
-
-    def delpath(&self, path: PyObject) -> PyResult<PyObject> {
-        self.dirs_map(py).borrow_mut().delete_path(
-            path.extract::<PyBytes>(py)?.data(py),
-        )
-            .and(Ok(py.None()))
-            .or_else(|e| {
-                match e {
-                    DirstateMapError::PathNotFound(_p) => {
-                        Err(PyErr::new::<exc::ValueError, _>(
-                            py,
-                            "expected a value, found none".to_string(),
-                        ))
-                    }
-                    DirstateMapError::EmptyPath => {
-                        Ok(py.None())
-                    }
-                }
-            })
-    }
-
-    // This is really inefficient on top of being ugly, but it's an easy way
-    // of having it work to continue working on the rest of the module
-    // hopefully bypassing Python entirely pretty soon.
-    def __iter__(&self) -> PyResult<PyObject> {
-        let dict = PyDict::new(py);
-
-        for (key, value) in self.dirs_map(py).borrow().iter() {
-            dict.set_item(
-                py,
-                PyBytes::new(py, &key[..]),
-                value.to_py_object(py),
-            )?;
-        }
-
-        let locals = PyDict::new(py);
-        locals.set_item(py, "obj", dict)?;
-
-        py.eval("iter(obj)", None, Some(&locals))
-    }
-
-    def __contains__(&self, item: PyObject) -> PyResult<bool> {
-        Ok(self
-            .dirs_map(py)
-            .borrow()
-            .contains_key(item.extract::<PyBytes>(py)?.data(py).as_ref()))
-    }
-});
-
-/// Create the module, with `__package__` given from parent
-pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
-    let dotted_name = &format!("{}.dirstate", package);
-    let m = PyModule::new(py, dotted_name)?;
-
-    m.add(py, "__package__", package)?;
-    m.add(py, "__doc__", "Dirstate - Rust implementation")?;
-    m.add(
-        py,
-        "parse_dirstate",
-        py_fn!(
-            py,
-            parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
-        ),
-    )?;
-    m.add(
-        py,
-        "pack_dirstate",
-        py_fn!(
-            py,
-            pack_dirstate_wrapper(
-                dmap: PyDict,
-                copymap: PyDict,
-                pl: PyTuple,
-                now: PyInt
-            )
-        ),
-    )?;
-
-    m.add_class::<Dirs>(py)?;
-
-    let sys = PyModule::import(py, "sys")?;
-    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
-    sys_modules.set_item(py, dotted_name, &m)?;
-
-    Ok(m)
-}
diff --git a/rust/hg-core/src/utils/mod.rs b/rust/hg-core/src/utils/mod.rs
--- a/rust/hg-core/src/utils/mod.rs
+++ b/rust/hg-core/src/utils/mod.rs
@@ -1,3 +1,12 @@
+// utils module
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Contains useful functions, traits, structs, etc. for use in core.
+
 pub mod files;
 
 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -1,3 +1,12 @@
+// files.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Functions for fiddling with files.
+
 use std::path::Path;
 
 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -6,22 +6,22 @@
 extern crate memchr;
 #[macro_use]
 extern crate lazy_static;
+extern crate core;
 extern crate regex;
 
 mod ancestors;
 pub mod dagops;
+pub mod utils;
 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
 mod dirstate;
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
     dirs_multiset::DirsMultiset,
-    parsers::{pack_dirstate, parse_dirstate},
-    CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
-    DirstateVec,
+    dirstate_map::{CopyMap, DirstateMap, StateMap},
+    parsers, DirsIterable, DirstateEntry, DirstateParents,
 };
 mod filepatterns;
-mod utils;
 
 pub use filepatterns::{
     build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
@@ -66,6 +66,13 @@
     TooLittleData,
     Overflow,
     CorruptedEntry(String),
+    Damaged,
+}
+
+impl From<std::io::Error> for DirstateParseError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateParseError::CorruptedEntry(e.to_string())
+    }
 }
 
 #[derive(Debug, PartialEq)]
@@ -75,21 +82,34 @@
     BadSize(usize, usize),
 }
 
+impl From<std::io::Error> for DirstatePackError {
+    fn from(e: std::io::Error) -> Self {
+        DirstatePackError::CorruptedEntry(e.to_string())
+    }
+}
 #[derive(Debug, PartialEq)]
 pub enum DirstateMapError {
     PathNotFound(Vec<u8>),
     EmptyPath,
 }
 
-impl From<std::io::Error> for DirstatePackError {
-    fn from(e: std::io::Error) -> Self {
-        DirstatePackError::CorruptedEntry(e.to_string())
+pub enum DirstateError {
+    Parse(DirstateParseError),
+    Pack(DirstatePackError),
+    Map(DirstateMapError),
+    IO(std::io::Error),
+    ParallelAccessSuspected,
+}
+
+impl From<DirstateParseError> for DirstateError {
+    fn from(e: DirstateParseError) -> Self {
+        DirstateError::Parse(e)
     }
 }
 
-impl From<std::io::Error> for DirstateParseError {
-    fn from(e: std::io::Error) -> Self {
-        DirstateParseError::CorruptedEntry(e.to_string())
+impl From<DirstatePackError> for DirstateError {
+    fn from(e: DirstatePackError) -> Self {
+        DirstateError::Pack(e)
     }
 }
 
@@ -109,3 +129,15 @@
         PatternFileError::IO(e)
     }
 }
+
+impl From<DirstateMapError> for DirstateError {
+    fn from(e: DirstateMapError) -> Self {
+        DirstateError::Map(e)
+    }
+}
+
+impl From<std::io::Error> for DirstateError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateError::IO(e)
+    }
+}
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -1,3 +1,12 @@
+// filepatterns.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Mercurial-specific patterns handling.
+
 use crate::{LineNumber, PatternError, PatternFileError};
 use regex::bytes::Regex;
 use std::collections::HashMap;
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -4,31 +4,31 @@
 // GNU General Public License version 2 or any later version.
 
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
-use std::collections::HashMap;
+use dirstate::dirstate_map::{CopyMap, StateMap};
 use std::io::Cursor;
-use {
-    CopyVec, CopyVecEntry, DirstateEntry, DirstatePackError, DirstateParents,
-    DirstateParseError, DirstateVec,
-};
+use std::time::Duration;
+use {DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError};
 
 /// Parents are stored in the dirstate as byte hashes.
-const PARENT_SIZE: usize = 20;
+pub const PARENT_SIZE: usize = 20;
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
+// TODO parse/pack: is mutate-on-loop better for performance?
+
 pub fn parse_dirstate(
+    state_map: &mut StateMap,
+    copy_map: &mut CopyMap,
     contents: &[u8],
-) -> Result<(DirstateParents, DirstateVec, CopyVec), DirstateParseError> {
+) -> Result<DirstateParents, DirstateParseError> {
     if contents.len() < PARENT_SIZE * 2 {
         return Err(DirstateParseError::TooLittleData);
     }
 
-    let mut dirstate_vec = vec![];
-    let mut copies = vec![];
     let mut curr_pos = PARENT_SIZE * 2;
     let parents = DirstateParents {
-        p1: &contents[..PARENT_SIZE],
-        p2: &contents[PARENT_SIZE..curr_pos],
+        p1: contents[..PARENT_SIZE].to_vec(),
+        p2: contents[PARENT_SIZE..curr_pos].to_vec(),
     };
 
     while curr_pos < contents.len() {
@@ -38,7 +38,7 @@
         let entry_bytes = &contents[curr_pos..];
 
         let mut cursor = Cursor::new(entry_bytes);
-        let state = cursor.read_i8()?;
+        let state = cursor.read_u8()?;
         let mode = cursor.read_i32::<BigEndian>()?;
         let size = cursor.read_i32::<BigEndian>()?;
         let mtime = cursor.read_i32::<BigEndian>()?;
@@ -57,9 +57,9 @@
         };
 
         if let Some(copy_path) = copy {
-            copies.push(CopyVecEntry { path, copy_path });
+            copy_map.insert(path.to_owned(), copy_path.to_owned());
         };
-        dirstate_vec.push((
+        state_map.insert(
             path.to_owned(),
             DirstateEntry {
                 state,
@@ -67,28 +67,30 @@
                 size,
                 mtime,
             },
-        ));
+        );
         curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
     }
 
-    Ok((parents, dirstate_vec, copies))
+    Ok(parents)
 }
 
 pub fn pack_dirstate(
-    dirstate_vec: &DirstateVec,
-    copymap: &HashMap<Vec<u8>, Vec<u8>>,
+    state_map: &mut StateMap,
+    copy_map: &CopyMap,
     parents: DirstateParents,
-    now: i32,
-) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> {
+    now: Duration,
+) -> Result<Vec<u8>, DirstatePackError> {
     if parents.p1.len() != PARENT_SIZE || parents.p2.len() != PARENT_SIZE {
         return Err(DirstatePackError::CorruptedParent);
     }
 
-    let expected_size: usize = dirstate_vec
+    let now = now.as_secs() as i32;
+
+    let expected_size: usize = state_map
         .iter()
-        .map(|(ref filename, _)| {
+        .map(|(filename, _)| {
             let mut length = MIN_ENTRY_SIZE + filename.len();
-            if let Some(ref copy) = copymap.get(filename) {
+            if let Some(ref copy) = copy_map.get(filename) {
                 length += copy.len() + 1;
             }
             length
@@ -97,15 +99,15 @@
     let expected_size = expected_size + PARENT_SIZE * 2;
 
     let mut packed = Vec::with_capacity(expected_size);
-    let mut new_dirstate_vec = vec![];
+    let mut new_state_map = vec![];
 
     packed.extend(parents.p1);
     packed.extend(parents.p2);
 
-    for (ref filename, entry) in dirstate_vec {
-        let mut new_filename: Vec<u8> = filename.to_owned();
+    for (ref filename, entry) in state_map.iter() {
+        let mut new_filename: Vec<u8> = filename.to_vec();
         let mut new_mtime: i32 = entry.mtime;
-        if entry.state == 'n' as i8 && entry.mtime == now.into() {
+        if entry.state == b'n' && entry.mtime == now {
             // The file was last modified "simultaneously" with the current
             // write to dirstate (i.e. within the same second for file-
             // systems with a granularity of 1 sec). This commonly happens
@@ -116,8 +118,8 @@
             // contents of the file if the size is the same. This prevents
             // mistakenly treating such files as clean.
             new_mtime = -1;
-            new_dirstate_vec.push((
-                filename.to_owned(),
+            new_state_map.push((
+                filename.to_owned().to_vec(),
                 DirstateEntry {
                     mtime: new_mtime,
                     ..*entry
@@ -125,12 +127,12 @@
             ));
         }
 
-        if let Some(copy) = copymap.get(filename) {
+        if let Some(copy) = copy_map.get(*filename) {
             new_filename.push('\0' as u8);
             new_filename.extend(copy);
         }
 
-        packed.write_i8(entry.state)?;
+        packed.write_u8(entry.state)?;
         packed.write_i32::<BigEndian>(entry.mode)?;
         packed.write_i32::<BigEndian>(entry.size)?;
         packed.write_i32::<BigEndian>(new_mtime)?;
@@ -142,143 +144,155 @@
         return Err(DirstatePackError::BadSize(expected_size, packed.len()));
     }
 
-    Ok((packed, new_dirstate_vec))
+    state_map.extend(new_state_map);
+
+    Ok(packed)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn test_pack_dirstate_empty() {
-        let dirstate_vec: DirstateVec = vec![];
+        let mut state_map: StateMap = HashMap::new();
         let copymap = HashMap::new();
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
-        let expected =
-            (b"1234567891011121314100000000000000000000".to_vec(), vec![]);
+        let now = Duration::new(15000000, 0);
+        let expected = b"1234567891011121314100000000000000000000".to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
+
+        assert!(state_map.is_empty())
     }
     #[test]
     fn test_pack_dirstate_one_entry() {
-        let dirstate_vec: DirstateVec = vec![(
-            vec!['f' as u8, '1' as u8],
-            DirstateEntry {
-                state: 'n' as i8,
-                mode: 0o644,
-                size: 0,
-                mtime: 791231220,
-            },
-        )];
-        let copymap = HashMap::new();
-        let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
-        };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49,
-            ]
-            .to_vec(),
-            vec![],
-        );
-
-        assert_eq!(
-            expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
-        );
-    }
-    #[test]
-    fn test_pack_dirstate_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+        let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: b'n',
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
+
+        let copymap = HashMap::new();
+        let parents = DirstateParents {
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
+        };
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 2, 102, 49,
+        ]
+        .to_vec();
+
+        assert_eq!(
+            expected,
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
+        );
+
+        assert_eq!(expected_state_map, state_map);
+    }
+    #[test]
+    fn test_pack_dirstate_one_entry_with_copy() {
+        let expected_state_map: StateMap = [(
+            b"f1".to_vec(),
+            DirstateEntry {
+                state: b'n',
+                mode: 0o644,
+                size: 0,
+                mtime: 791231220,
+            },
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111,
-                112, 121, 110, 97, 109, 101,
-            ]
-            .to_vec(),
-            vec![],
-        );
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
+            109, 101,
+        ]
+        .to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
+        assert_eq!(expected_state_map, state_map);
     }
 
     #[test]
     fn test_parse_pack_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+        let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: b'n',
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
 
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
         assert_eq!(
-            (
-                parents,
-                dirstate_vec,
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
-            ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     fn test_parse_pack_multiple_entries_with_copy() {
-        let dirstate_vec: DirstateVec = vec![
+        let mut state_map: StateMap = [
             (
                 b"f1".to_vec(),
                 DirstateEntry {
-                    state: 'n' as i8,
+                    state: b'n',
                     mode: 0o644,
                     size: 0,
                     mtime: 791231220,
@@ -287,7 +301,7 @@
             (
                 b"f2".to_vec(),
                 DirstateEntry {
-                    state: 'm' as i8,
+                    state: b'm',
                     mode: 0o777,
                     size: 1000,
                     mtime: 791231220,
@@ -296,7 +310,7 @@
             (
                 b"f3".to_vec(),
                 DirstateEntry {
-                    state: 'r' as i8,
+                    state: b'r',
                     mode: 0o644,
                     size: 234553,
                     mtime: 791231220,
@@ -305,84 +319,95 @@
             (
                 b"f4\xF6".to_vec(),
                 DirstateEntry {
-                    state: 'a' as i8,
+                    state: b'a',
                     mode: 0o644,
                     size: -1,
                     mtime: -1,
                 },
             ),
-        ];
+        ]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
 
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
         assert_eq!(
-            (parents, dirstate_vec, copymap),
-            parse_dirstate(result.0.as_slice())
-                .and_then(|(p, dvec, cvec)| Ok((
-                    p,
-                    dvec,
-                    cvec.iter()
-                        .map(|entry| (
-                            entry.path.to_vec(),
-                            entry.copy_path.to_vec()
-                        ))
-                        .collect()
-                )))
-                .unwrap()
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
     fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
-        let dirstate_vec: DirstateVec = vec![(
+        let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: b'n',
                 mode: 0o644,
                 size: 0,
                 mtime: 15000000,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
+
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
 
         assert_eq!(
             (
                 parents,
-                vec![(
+                [(
                     b"f1".to_vec(),
                     DirstateEntry {
-                        state: 'n' as i8,
+                        state: b'n',
                         mode: 0o644,
                         size: 0,
                         mtime: -1
                     }
-                )],
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
+                )]
+                .iter()
+                .cloned()
+                .collect::<StateMap>(),
+                copymap,
             ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 }
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,36 +1,29 @@
+use std::collections::HashMap;
+
 pub mod dirs_multiset;
+pub mod dirstate_map;
 pub mod parsers;
 
-#[derive(Debug, PartialEq, Copy, Clone)]
-pub struct DirstateParents<'a> {
-    pub p1: &'a [u8],
-    pub p2: &'a [u8],
+#[derive(Debug, PartialEq, Clone)]
+pub struct DirstateParents {
+    pub p1: Vec<u8>,
+    pub p2: Vec<u8>,
 }
 
 /// The C implementation uses all signed types. This will be an issue
 /// either when 4GB+ source files are commonplace or in 2038, whichever
 /// comes first.
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Copy, Clone)]
 pub struct DirstateEntry {
-    pub state: i8,
+    pub state: u8,
     pub mode: i32,
     pub mtime: i32,
     pub size: i32,
 }
 
-pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>;
-
-#[derive(Debug, PartialEq)]
-pub struct CopyVecEntry<'a> {
-    pub path: &'a [u8],
-    pub copy_path: &'a [u8],
-}
-
-pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
-
 /// The Python implementation passes either a mapping (dirstate) or a flat
 /// iterable (manifest)
-pub enum DirsIterable {
-    Dirstate(DirstateVec),
-    Manifest(Vec<Vec<u8>>),
+pub enum DirsIterable<'a> {
+    Dirstate(&'a HashMap<Vec<u8>, DirstateEntry>),
+    Manifest(&'a Vec<Vec<u8>>),
 }
diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -0,0 +1,441 @@
+// dirstate_map.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use core::borrow::Borrow;
+use dirstate::parsers::PARENT_SIZE;
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+use std::ops::Deref;
+use std::time::Duration;
+use {
+    parsers::pack_dirstate, parsers::parse_dirstate, DirsIterable,
+    DirsMultiset, DirstateEntry, DirstateError, DirstateMapError,
+    DirstateParents, DirstateParseError,
+};
+
+pub type StateMap = HashMap<Vec<u8>, DirstateEntry>;
+pub type CopyMap = HashMap<Vec<u8>, Vec<u8>>;
+pub type FileFoldMap = HashMap<Vec<u8>, Vec<u8>>;
+
+static NULL_REVISION: &[u8] = b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+pub struct DirstateMap {
+    state_map: StateMap,
+    pub copy_map: CopyMap,
+    file_fold_map: Option<FileFoldMap>,
+    pub dirs: Option<DirsMultiset>,
+    pub all_dirs: Option<DirsMultiset>,
+    non_normal_set: HashSet<Vec<u8>>,
+    other_parent_set: HashSet<Vec<u8>>,
+    parents: Option<DirstateParents>,
+    dirty_parents: bool,
+}
+
+/// Should only really be used in python interface code, for clarity
+impl Deref for DirstateMap {
+    type Target = StateMap;
+
+    fn deref(&self) -> &Self::Target {
+        &self.state_map
+    }
+}
+
+impl Default for DirstateMap {
+    fn default() -> Self {
+        Self {
+            state_map: StateMap::new(),
+            copy_map: CopyMap::new(),
+            file_fold_map: None,
+            dirs: None,
+            all_dirs: None,
+            non_normal_set: HashSet::new(),
+            other_parent_set: HashSet::new(),
+            parents: None,
+            dirty_parents: false,
+        }
+    }
+}
+
+impl FromIterator<(Vec<u8>, DirstateEntry)> for DirstateMap {
+    fn from_iter<I: IntoIterator<Item = (Vec<u8>, DirstateEntry)>>(
+        iter: I,
+    ) -> Self {
+        Self {
+            state_map: iter.into_iter().collect(),
+            ..Self::default()
+        }
+    }
+}
+
+impl DirstateMap {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn clear(&mut self) {
+        self.state_map.clear();
+        self.copy_map.clear();
+        self.file_fold_map = None;
+        self.non_normal_set.clear();
+        self.other_parent_set.clear();
+        self.set_parents(DirstateParents {
+            p1: NULL_REVISION.to_vec(),
+            p2: NULL_REVISION.to_vec(),
+        })
+    }
+
+    /// Add a tracked file to the dirstate
+    pub fn add_file(
+        &mut self,
+        filename: &[u8],
+        old_state: u8,
+        entry: DirstateEntry,
+    ) {
+        if old_state == b'?' || old_state == b'r' {
+            if let Some(ref mut dirs) = self.dirs {
+                dirs.add_path(filename)
+            }
+        }
+        if old_state == b'?' {
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.add_path(filename)
+            }
+        }
+        self.state_map.insert(filename.to_owned(), entry.to_owned());
+
+        if entry.state != b'n' || entry.mtime == -1 {
+            self.non_normal_set.insert(filename.to_owned());
+        }
+
+        if entry.size == -2 {
+            self.other_parent_set.insert(filename.to_owned());
+        }
+    }
+
+    /// Mark a file as removed in the dirstate.
+    ///
+    /// The `size` parameter is used to store sentinel values that indicate
+    /// the file's previous state.  In the future, we should refactor this
+    /// to be more explicit about what that state is.
+    pub fn remove_file(
+        &mut self,
+        filename: &[u8],
+        old_state: u8,
+        size: i32,
+    ) -> Result<(), DirstateMapError> {
+        if old_state != b'?' && old_state != b'r' {
+            if let Some(ref mut dirs) = self.dirs {
+                dirs.delete_path(filename)?;
+            }
+        }
+        if old_state == b'?' {
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.add_path(filename);
+            }
+        }
+
+        if let Some(ref mut file_fold_map) = self.file_fold_map {
+            file_fold_map
+                .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
+        }
+        self.state_map.insert(
+            filename.to_owned(),
+            DirstateEntry {
+                state: b'r',
+                mode: 0,
+                size: size,
+                mtime: 0,
+            },
+        );
+        self.non_normal_set.insert(filename.to_owned());
+        Ok(())
+    }
+
+    /// Remove a file from the dirstate.
+    /// Returns `true` if the file was previously recorded.
+    pub fn drop_file(
+        &mut self,
+        filename: &[u8],
+        old_state: u8,
+    ) -> Result<bool, DirstateMapError> {
+        let exists = self
+            .state_map
+            .remove::<Vec<u8>>(filename.to_owned().as_ref())
+            .is_some();
+
+        if exists {
+            if old_state != b'r' {
+                if let Some(ref mut dirs) = self.dirs {
+                    dirs.delete_path(filename)?;
+                }
+            }
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.delete_path(filename)?;
+            }
+        }
+        if let Some(ref mut file_fold_map) = self.file_fold_map {
+            file_fold_map
+                .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
+        }
+        self.non_normal_set
+            .remove::<Vec<u8>>(filename.to_owned().as_ref());
+
+        Ok(exists)
+    }
+
+    pub fn clear_ambiguous_times(
+        &mut self,
+        filenames: Vec<Vec<u8>>,
+        now: i32,
+    ) {
+        for filename in filenames {
+            let mut changed = false;
+            self.state_map
+                .entry(filename.to_owned())
+                .and_modify(|entry| {
+                    if entry.state == b'n' && entry.mtime == now {
+                        changed = true;
+                        *entry = DirstateEntry {
+                            mtime: -1,
+                            ..*entry
+                        };
+                    }
+                });
+            if changed {
+                self.non_normal_set.insert(filename.to_owned());
+            }
+        }
+    }
+
+    pub fn non_normal_other_parent_entries(
+        &self,
+    ) -> (HashSet<Vec<u8>>, HashSet<Vec<u8>>) {
+        let mut non_normal = HashSet::new();
+        let mut other_parent = HashSet::new();
+
+        for (
+            filename,
+            DirstateEntry {
+                state, size, mtime, ..
+            },
+        ) in self.state_map.iter()
+        {
+            if *state != b'n' || *mtime == -1 {
+                non_normal.insert(filename.to_owned());
+            }
+            if *state == b'n' && *size == -2 {
+                other_parent.insert(filename.to_owned());
+            }
+        }
+
+        (non_normal, other_parent)
+    }
+    pub fn set_all_dirs(&mut self) -> () {
+        if self.all_dirs.is_none() {
+            self.all_dirs = Some(DirsMultiset::new(
+                DirsIterable::Dirstate(&self.state_map),
+                None,
+            ));
+        }
+    }
+    pub fn set_dirs(&mut self) -> () {
+        if self.dirs.is_none() {
+            self.dirs = Some(DirsMultiset::new(
+                DirsIterable::Dirstate(&self.state_map),
+                Some(b'r'),
+            ));
+        }
+    }
+
+    pub fn has_tracked_dir(&mut self, directory: &[u8]) -> bool {
+        self.set_dirs();
+        self.dirs.as_ref().unwrap().contains_key(directory.as_ref())
+    }
+
+    pub fn has_dir(&mut self, directory: &[u8]) -> bool {
+        self.set_all_dirs();
+        self.all_dirs
+            .as_ref()
+            .unwrap()
+            .contains_key(directory.as_ref())
+    }
+
+    pub fn parents(
+        &mut self,
+        file_contents: &[u8],
+    ) -> Result<DirstateParents, DirstateError> {
+        if let Some(ref parents) = self.parents {
+            return Ok(parents.clone());
+        }
+        let parents;
+        if file_contents.len() == 40 {
+            parents = DirstateParents {
+                p1: file_contents[..PARENT_SIZE].to_owned(),
+                p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2].to_owned(),
+            };
+        } else if file_contents.is_empty() {
+            parents = DirstateParents {
+                p1: NULL_REVISION.to_owned(),
+                p2: NULL_REVISION.to_owned(),
+            };
+        } else {
+            return Err(DirstateError::Parse(DirstateParseError::Damaged));
+        }
+
+        self.parents = Some(parents.to_owned());
+        Ok(parents.clone())
+    }
+
+    pub fn set_parents(&mut self, parents: DirstateParents) {
+        self.parents = Some(parents.clone());
+        self.dirty_parents = true;
+    }
+
+    pub fn read(
+        &mut self,
+        file_contents: &[u8],
+    ) -> Result<Option<DirstateParents>, DirstateError> {
+        if file_contents.is_empty() {
+            return Ok(None);
+        }
+
+        let parents = parse_dirstate(
+            &mut self.state_map,
+            &mut self.copy_map,
+            file_contents,
+        )?;
+
+        if !self.dirty_parents {
+            self.set_parents(parents.to_owned());
+        }
+
+        Ok(Some(parents))
+    }
+
+    pub fn pack(
+        &mut self,
+        parents: DirstateParents,
+        now: Duration,
+    ) -> Result<Vec<u8>, DirstateError> {
+        let packed =
+            pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
+
+        self.dirty_parents = false;
+
+        let result = self.non_normal_other_parent_entries();
+        self.non_normal_set = result.0;
+        self.other_parent_set = result.1;
+        Ok(packed)
+    }
+}
+
+/// Holds all property-like functions to make Python happy in the short term
+impl DirstateMap {
+    pub fn property_file_fold_map(&mut self) -> FileFoldMap {
+        if let Some(ref file_fold_map) = self.file_fold_map {
+            return file_fold_map.to_owned();
+        }
+        let mut new_file_fold_map = FileFoldMap::new();
+        for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
+        {
+            if *state == b'r' {
+                new_file_fold_map.insert(
+                    filename.to_ascii_uppercase().to_owned(),
+                    filename.to_owned(),
+                );
+            }
+        }
+        self.file_fold_map = Some(new_file_fold_map);
+        self.file_fold_map.to_owned().unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_dirs_multiset() {
+        let mut map = DirstateMap::new();
+        assert!(map.dirs.is_none());
+        assert!(map.all_dirs.is_none());
+
+        assert_eq!(false, map.has_dir(b"nope"));
+        assert!(map.all_dirs.is_some());
+        assert!(map.dirs.is_none());
+
+        assert_eq!(false, map.has_tracked_dir(b"nope"));
+        assert!(map.dirs.is_some());
+    }
+
+    #[test]
+    fn test_add_file() {
+        let mut map = DirstateMap::new();
+
+        assert_eq!(0, map.len());
+
+        map.add_file(
+            b"meh",
+            b'n',
+            DirstateEntry {
+                state: b'n',
+                mode: 1337,
+                mtime: 1337,
+                size: 1337,
+            },
+        );
+
+        assert_eq!(1, map.len());
+        assert_eq!(0, map.non_normal_set.len());
+        assert_eq!(0, map.other_parent_set.len());
+    }
+
+    #[test]
+    fn test_non_normal_other_parent_entries() {
+        let map: DirstateMap = [
+            (b"f1", ('r', 1337, 1337, 1337)),
+            (b"f2", ('n', 1337, 1337, -1)),
+            (b"f3", ('n', 1337, 1337, 1337)),
+            (b"f4", ('n', 1337, -2, 1337)),
+            (b"f5", ('a', 1337, 1337, 1337)),
+            (b"f6", ('a', 1337, 1337, -1)),
+            (b"f7", ('m', 1337, 1337, -1)),
+            (b"f8", ('m', 1337, 1337, 1337)),
+            (b"f9", ('m', 1337, -2, 1337)),
+            (b"fa", ('a', 1337, -2, 1337)),
+            (b"fb", ('r', 1337, -2, 1337)),
+        ]
+        .iter()
+        .map(|(fname, (state, mode, size, mtime))| {
+            (
+                fname.to_vec(),
+                DirstateEntry {
+                    state: *state as u8,
+                    mode: *mode,
+                    size: *size,
+                    mtime: *mtime,
+                },
+            )
+        })
+        .collect();
+
+        let non_normal = [
+            b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
+        ]
+        .iter()
+        .map(|x| x.to_vec())
+        .collect();
+
+        let mut other_parent = HashSet::new();
+        other_parent.insert(b"f4".to_vec());
+
+        assert_eq!(
+            (non_normal, other_parent),
+            map.non_normal_other_parent_entries()
+        );
+    }
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -21,17 +21,17 @@
     /// Initializes the multiset from a dirstate or a manifest.
     ///
     /// If `skip_state` is provided, skips dirstate entries with equal state.
-    pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
+    pub fn new(iterable: DirsIterable, skip_state: Option<u8>) -> Self {
         let mut multiset = DirsMultiset {
             inner: HashMap::new(),
         };
 
         match iterable {
             DirsIterable::Dirstate(vec) => {
-                for (ref filename, DirstateEntry { state, .. }) in vec {
+                for (filename, DirstateEntry { state, .. }) in vec {
                     // This `if` is optimized out of the loop
                     if let Some(skip) = skip_state {
-                        if skip != state {
+                        if skip != *state {
                             multiset.add_path(filename);
                         }
                     } else {
@@ -40,7 +40,7 @@
                 }
             }
             DirsIterable::Manifest(vec) => {
-                for ref filename in vec {
+                for filename in vec {
                     multiset.add_path(filename);
                 }
             }
@@ -136,10 +136,11 @@
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn test_delete_path_path_not_found() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"doesnotexist/";
         assert_eq!(
             Err(DirstateMapError::PathNotFound(path.to_vec())),
@@ -150,7 +151,7 @@
     #[test]
     fn test_delete_path_empty_path() {
         let mut map =
-            DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
+            DirsMultiset::new(DirsIterable::Manifest(&vec![vec![]]), None);
         let path = b"";
         assert_eq!(Ok(()), map.delete_path(path));
         assert_eq!(
@@ -190,7 +191,7 @@
 
     #[test]
     fn test_add_path_empty_path() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"";
         map.add_path(path);
 
@@ -199,7 +200,7 @@
 
     #[test]
     fn test_add_path_successful() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
 
         map.add_path(b"a/");
         assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
@@ -246,13 +247,13 @@
     fn test_dirsmultiset_new_empty() {
         use DirsIterable::{Dirstate, Manifest};
 
-        let new = DirsMultiset::new(Manifest(vec![]), None);
+        let new = DirsMultiset::new(Manifest(&vec![]), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
         assert_eq!(expected, new);
 
-        let new = DirsMultiset::new(Dirstate(vec![]), None);
+        let new = DirsMultiset::new(Dirstate(&HashMap::new()), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
@@ -272,7 +273,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), None);
+        let new = DirsMultiset::new(Manifest(&input_vec), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -297,7 +298,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), None);
+        let new = DirsMultiset::new(Dirstate(&input_map), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -317,7 +318,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
+        let new = DirsMultiset::new(Manifest(&input_vec), Some(b'n'));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -331,7 +332,7 @@
                     (
                         f.as_bytes().to_vec(),
                         DirstateEntry {
-                            state: *state as i8,
+                            state: *state as u8,
                             mode: 0,
                             mtime: 0,
                             size: 0,
@@ -346,7 +347,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
+        let new = DirsMultiset::new(Dirstate(&input_map), Some(b'n'));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -13,6 +13,6 @@
 
 [dependencies]
 byteorder = "1.3.1"
+memchr = "2.2.0"
 lazy_static = "1.3.0"
-memchr = "2.2.0"
-regex = "^1.1"
+regex = "^1.1"
\ No newline at end of file
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -36,7 +36,7 @@
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "python3-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -65,7 +65,7 @@
 dependencies = [
  "cpython 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "hg-core 0.1.0",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "python3-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -75,7 +75,7 @@
 version = "0.1.0"
 dependencies = [
  "hg-core 0.1.0",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -85,7 +85,7 @@
 
 [[package]]
 name = "libc"
-version = "0.2.45"
+version = "0.2.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
@@ -103,7 +103,7 @@
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
@@ -112,7 +112,7 @@
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
@@ -122,7 +122,7 @@
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -177,7 +177,7 @@
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -189,7 +189,7 @@
 dependencies = [
  "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "fuchsia-cprng 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -307,7 +307,7 @@
 "checksum cpython 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b489034e723e7f5109fecd19b719e664f89ef925be785885252469e9822fa940"
 "checksum fuchsia-cprng 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "81f7f8eb465745ea9b02e2704612a9946a59fa40572086c6fd49d6ddcf30bf31"
 "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
-"checksum libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)" = "2d2857ec59fadc0773853c664d2d18e7198e83883e7060b63c924cb077bd5c74"
+"checksum libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)" = "42914d39aad277d9e176efbdad68acb1d5443ab65afe0e0e4f0d49352a950880"
 "checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
 "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
 "checksum python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "56114c37d4dca82526d74009df7782a28c871ac9d36b19d4cb9e67672258527e"
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -28,7 +28,7 @@
 )
 
 parsers = policy.importmod(r'parsers')
-dirstatemod = policy.importrust(r'dirstate', default=parsers)
+rustmod = policy.importrust(r'dirstate')
 
 propertycache = util.propertycache
 filecache = scmutil.filecache
@@ -652,7 +652,8 @@
         delaywrite = self._ui.configint('debug', 'dirstate.delaywrite')
         if delaywrite > 0:
             # do we have any files to delay for?
-            for f, e in self._map.iteritems():
+            items = self._map.iteritems()
+            for f, e in items:
                 if e[0] == 'n' and e[3] == now:
                     import time # to avoid useless import
                     # rather than sleep n seconds, sleep until the next
@@ -663,6 +664,12 @@
                     time.sleep(end - clock)
                     now = end # trust our estimate that the end is near now
                     break
+            # since the iterator is potentially not depleted,
+            # delete the iterator to release the reference for the Rust
+            # implementation.
+            # TODO make the Rust implementation behave like Python
+            # since this would not work with a non ref-counting GC.
+            del items
 
         self._map.write(st, now)
         self._lastnormaltime = 0
@@ -1475,7 +1482,9 @@
         # parsing the dirstate.
         #
         # (we cannot decorate the function directly since it is in a C module)
-        parse_dirstate = util.nogc(dirstatemod.parse_dirstate)
+        parse_dirstate = parsers.parse_dirstate
+
+        parse_dirstate = util.nogc(parse_dirstate)
         p = parse_dirstate(self._map, self.copymap, st)
         if not self._dirtyparents:
             self.setparents(*p)
@@ -1486,8 +1495,10 @@
         self.get = self._map.get
 
     def write(self, st, now):
-        st.write(dirstatemod.pack_dirstate(self._map, self.copymap,
-                                           self.parents(), now))
+        pack_dirstate = parsers.pack_dirstate
+
+        st.write(pack_dirstate(self._map, self.copymap,
+                                       self.parents(), now))
         st.close()
         self._dirtyparents = False
         self.nonnormalset, self.otherparentset = self.nonnormalentries()
@@ -1516,3 +1527,187 @@
         for name in self._dirs:
             f[normcase(name)] = name
         return f
+
+
+if rustmod is not None:
+    class dirstatemap(object):
+        def __init__(self, ui, opener, root):
+            self._ui = ui
+            self._opener = opener
+            self._root = root
+            self._filename = 'dirstate'
+            self._parents = None
+            self._dirtyparents = False
+
+            # for consistent view between _pl() and _read() invocations
+            self._pendingmode = None
+
+
+        def addfile(self, *args, **kwargs):
+            return self._rustmap.addfile(*args, **kwargs)
+
+        def removefile(self, *args, **kwargs):
+            return self._rustmap.removefile(*args, **kwargs)
+
+        def dropfile(self, *args, **kwargs):
+            return self._rustmap.dropfile(*args, **kwargs)
+
+        def clearambiguoustimes(self, *args, **kwargs):
+            return self._rustmap.clearambiguoustimes(*args, **kwargs)
+
+        def nonnormalentries(self):
+            return self._rustmap.nonnormalentries()
+
+        def get(self, *args, **kwargs):
+            return self._rustmap.get(*args, **kwargs)
+
+        @propertycache
+        def _rustmap(self):
+            self._rustmap = rustmod.DirstateMap(self._root)
+            self.read()
+            return self._rustmap
+
+        @property
+        def copymap(self):
+            return self._rustmap.copymap()
+
+        def preload(self):
+            self._rustmap
+
+        def clear(self):
+            self._rustmap.clear()
+            self.setparents(nullid, nullid)
+            util.clearcachedproperty(self, "_dirs")
+            util.clearcachedproperty(self, "_alldirs")
+            util.clearcachedproperty(self, "dirfoldmap")
+
+        def items(self):
+            return self._rustmap.items()
+
+        def keys(self):
+            return iter(self._rustmap)
+
+        def __contains__(self, key):
+            return key in self._rustmap
+
+        def __getitem__(self, item):
+            return self._rustmap[item]
+
+        def __len__(self):
+            return len(self._rustmap)
+
+        def __iter__(self):
+            return iter(self._rustmap)
+
+        # forward for python2,3 compat
+        iteritems = items
+
+        def _opendirstatefile(self):
+            fp, mode = txnutil.trypending(self._root, self._opener,
+                                          self._filename)
+            if self._pendingmode is not None and self._pendingmode != mode:
+                fp.close()
+                raise error.Abort(_('working directory state may be '
+                                    'changed parallelly'))
+            self._pendingmode = mode
+            return fp
+
+        def setparents(self, p1, p2):
+            self._rustmap.setparents(p1, p2)
+            self._parents = (p1, p2)
+            self._dirtyparents = True
+
+        def parents(self):
+            if not self._parents:
+                try:
+                    fp = self._opendirstatefile()
+                    st = fp.read(40)
+                    fp.close()
+                except IOError as err:
+                    if err.errno != errno.ENOENT:
+                        raise
+                    # File doesn't exist, so the current state is empty
+                    st = ''
+
+                try:
+                    self._parents = self._rustmap.parents(st)
+                except ValueError:
+                    raise error.Abort(_('working directory state appears '
+                                        'damaged!'))
+
+            return self._parents
+
+        def read(self):
+            # ignore HG_PENDING because identity is used only for writing
+            self.identity = util.filestat.frompath(
+                self._opener.join(self._filename))
+
+            try:
+                fp = self._opendirstatefile()
+                try:
+                    st = fp.read()
+                finally:
+                    fp.close()
+            except IOError as err:
+                if err.errno != errno.ENOENT:
+                    raise
+                return
+            if not st:
+                return
+
+            parse_dirstate = util.nogc(self._rustmap.read)
+            parents = parse_dirstate(st)
+            if parents and not self._dirtyparents:
+                self.setparents(*parents)
+
+        def write(self, st, now):
+            parents = self.parents()
+            st.write(self._rustmap.write(parents[0], parents[1], now))
+            st.close()
+            self._dirtyparents = False
+
+        @propertycache
+        def filefoldmap(self):
+            """Returns a dictionary mapping normalized case paths to their
+            non-normalized versions.
+            """
+            return self._rustmap.filefoldmapasdict()
+
+        def hastrackeddir(self, d):
+            self._dirs # Trigger Python's propertycache
+            return self._rustmap.hastrackeddir(d)
+
+        def hasdir(self, d):
+            self._dirs # Trigger Python's propertycache
+            return self._rustmap.hasdir(d)
+
+        @propertycache
+        def _dirs(self):
+            return self._rustmap.getdirs()
+
+        @propertycache
+        def _alldirs(self):
+            return self._rustmap.getalldirs()
+
+        @propertycache
+        def identity(self):
+            self._rustmap
+            return self.identity
+
+        @property
+        def nonnormalset(self):
+            nonnorm, otherparents = self._rustmap.nonnormalentries()
+            return nonnorm
+
+        @property
+        def otherparentset(self):
+            nonnorm, otherparents = self._rustmap.nonnormalentries()
+            return otherparents
+
+        @propertycache
+        def dirfoldmap(self):
+            f = {}
+            normcase = util.normcase
+            for name in self._dirs:
+                f[normcase(name)] = name
+            return f
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -459,7 +459,7 @@
     lfiles = set()
     for f in actions:
         splitstandin = lfutil.splitstandin(f)
-        if splitstandin in p1:
+        if splitstandin is not None and splitstandin in p1:
             lfiles.add(splitstandin)
         elif lfutil.standin(f) in p1:
             lfiles.add(f)



To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mjpieters, mercurial-devel


More information about the Mercurial-devel mailing list