D6594: RFC dirstatemap

Alphare (Raphaël Gomès) phabricator at mercurial-scm.org
Mon Jul 8 09:55:32 EDT 2019


Alphare updated this revision to Diff 15791.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D6594?vs=15730&id=15791

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6594/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6594

AFFECTED FILES
  hgext/largefiles/overrides.py
  mercurial/dirstate.py
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/lib.rs
  rust/hg-core/src/utils/files.rs
  rust/hg-core/src/utils/mod.rs
  rust/hg-cpython/src/dirstate.rs
  rust/hg-cpython/src/dirstate/copymap.rs
  rust/hg-cpython/src/dirstate/dirs_multiset.rs
  rust/hg-cpython/src/dirstate/dirstate_map.rs
  rust/hg-cpython/src/dirstate/macros.rs
  rust/hg-cpython/src/dirstate/mod.rs
  rust/hg-cpython/src/exceptions.rs
  rust/hg-cpython/src/lib.rs
  tests/fakedirstatewritetime.py

CHANGE DETAILS

diff --git a/tests/fakedirstatewritetime.py b/tests/fakedirstatewritetime.py
--- a/tests/fakedirstatewritetime.py
+++ b/tests/fakedirstatewritetime.py
@@ -16,12 +16,6 @@
 )
 from mercurial.utils import dateutil
 
-try:
-    from mercurial import rustext
-    rustext.__name__  # force actual import (see hgdemandimport)
-except ImportError:
-    rustext = None
-
 configtable = {}
 configitem = registrar.configitem(configtable)
 
@@ -30,6 +24,7 @@
 )
 
 parsers = policy.importmod(r'parsers')
+rustmod = policy.importrust(r'parsers')
 
 def pack_dirstate(fakenow, orig, dmap, copymap, pl, now):
     # execute what original parsers.pack_dirstate should do actually
@@ -57,12 +52,17 @@
     # 'fakenow' value and 'touch -t YYYYmmddHHMM' argument easy
     fakenow = dateutil.parsedate(fakenow, [b'%Y%m%d%H%M'])[0]
 
-    if rustext is not None:
-        orig_module = rustext.dirstate
-        orig_pack_dirstate = rustext.dirstate.pack_dirstate
-    else:
-        orig_module = parsers
-        orig_pack_dirstate = parsers.pack_dirstate
+    if rustmod is not None:
+        # The Rust implementation does not use public parse/pack dirstate
+        # to prevent conversion round-trips
+        orig_dirstatemap_write = dirstate.dirstatemap.write
+        wrapper = lambda self, st, now: orig_dirstatemap_write(self,
+                                                               st,
+                                                               fakenow)
+        dirstate.dirstatemap.write = wrapper
+
+    orig_module = parsers
+    orig_pack_dirstate = parsers.pack_dirstate
 
     orig_dirstate_getfsnow = dirstate._getfsnow
     wrapper = lambda *args: pack_dirstate(fakenow, orig_pack_dirstate, *args)
@@ -74,6 +74,8 @@
     finally:
         orig_module.pack_dirstate = orig_pack_dirstate
         dirstate._getfsnow = orig_dirstate_getfsnow
+        if rustmod is not None:
+            dirstate.dirstatemap.write = orig_dirstatemap_write
 
 def _poststatusfixup(orig, workingctx, status, fixup):
     ui = workingctx.repo().ui
diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs
--- a/rust/hg-cpython/src/lib.rs
+++ b/rust/hg-cpython/src/lib.rs
@@ -50,6 +50,11 @@
         "filepatterns",
         filepatterns::init_module(py, &dotted_name)?,
     )?;
+    m.add(
+        py,
+        "parsers",
+        dirstate::init_parsers_module(py, &dotted_name)?,
+    )?;
     m.add(py, "GraphError", py.get_type::<exceptions::GraphError>())?;
     m.add(
         py,
diff --git a/rust/hg-cpython/src/exceptions.rs b/rust/hg-cpython/src/exceptions.rs
--- a/rust/hg-cpython/src/exceptions.rs
+++ b/rust/hg-cpython/src/exceptions.rs
@@ -65,3 +65,5 @@
         }
     }
 }
+
+py_exception!(shared_ref, AlreadyBorrowed, RuntimeError);
diff --git a/rust/hg-cpython/src/dirstate/mod.rs b/rust/hg-cpython/src/dirstate/mod.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/mod.rs
@@ -0,0 +1,281 @@
+// dirstate module
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for the `hg::dirstate` module provided by the
+//! `hg-core` package.
+//!
+//! From Python, this will be seen as `mercurial.rustext.dirstate`
+use cpython::{
+    exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
+    PySequence, PyTuple, Python, ToPyObject,
+};
+use hg::{
+    DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
+    EntryState, StateMap,
+};
+use std::ffi::CStr;
+
+#[cfg(feature = "python27")]
+extern crate python27_sys as python_sys;
+#[cfg(feature = "python3")]
+extern crate python3_sys as python_sys;
+
+use self::python_sys::PyCapsule_Import;
+use hg::parsers::{pack_dirstate, parse_dirstate};
+use libc::{c_char, c_int};
+use std::collections::HashMap;
+use std::mem::transmute;
+use std::time::Duration;
+
+#[macro_use]
+mod macros;
+mod copymap;
+mod dirs_multiset;
+mod dirstate_map;
+use dirstate::dirs_multiset::Dirs;
+use dirstate::dirstate_map::DirstateMap;
+use exceptions::AlreadyBorrowed;
+use std::convert::TryInto;
+
+/// C code uses a custom `dirstate_tuple` type, checks in multiple instances
+/// for this type, and raises a Python `Exception` if the check does not pass.
+/// Because this type differs only in name from the regular Python tuple, it
+/// would be a good idea in the near future to remove it entirely to allow
+/// for a pure Python tuple of the same effective structure to be used,
+/// rendering this type and the capsule below useless.
+type MakeDirstateTupleFn = extern "C" fn(
+    state: c_char,
+    mode: c_int,
+    size: c_int,
+    mtime: c_int,
+) -> PyObject;
+
+/// This is largely a copy/paste from cindex.rs, pending the merge of a
+/// `py_capsule_fn!` macro in the rust-cpython project:
+/// https://github.com/dgrunwald/rust-cpython/pull/169
+fn decapsule_make_dirstate_tuple(py: Python) -> PyResult<MakeDirstateTupleFn> {
+    unsafe {
+        let caps_name = CStr::from_bytes_with_nul_unchecked(
+            b"mercurial.cext.parsers.make_dirstate_tuple_CAPI\0",
+        );
+        let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0);
+        if from_caps.is_null() {
+            return Err(PyErr::fetch(py));
+        }
+        Ok(transmute(from_caps))
+    }
+}
+
+fn extract_dirstate(py: Python, dmap: &PyDict) -> Result<StateMap, PyErr> {
+    dmap.items(py)
+        .iter()
+        .map(|(filename, stats)| {
+            let stats = stats.extract::<PySequence>(py)?;
+            let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
+            let state: EntryState =
+                state.data(py)[0].try_into().map_err(|e: DirstateParseError| {
+                    PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                })?;
+            let mode = stats.get_item(py, 1)?.extract(py)?;
+            let size = stats.get_item(py, 2)?.extract(py)?;
+            let mtime = stats.get_item(py, 3)?.extract(py)?;
+            let filename = filename.extract::<PyBytes>(py)?;
+            let filename = filename.data(py);
+            Ok((
+                filename.to_owned(),
+                DirstateEntry {
+                    state,
+                    mode,
+                    size,
+                    mtime,
+                },
+            ))
+        })
+        .collect()
+}
+
+/// Create the module, with `__package__` given from parent
+pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
+    let dotted_name = &format!("{}.dirstate", package);
+    let m = PyModule::new(py, dotted_name)?;
+
+    m.add(py, "__package__", package)?;
+    m.add(py, "__doc__", "Dirstate - Rust implementation")?;
+
+    m.add_class::<Dirs>(py)?;
+    m.add_class::<DirstateMap>(py)?;
+    m.add(py, "AlreadyBorrowed", py.get_type::<AlreadyBorrowed>())?;
+
+    let sys = PyModule::import(py, "sys")?;
+    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
+    sys_modules.set_item(py, dotted_name, &m)?;
+
+    Ok(m)
+}
+
+fn parse_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    st: PyBytes,
+) -> PyResult<PyTuple> {
+    let mut dirstate_map = HashMap::new();
+    let mut copies = HashMap::new();
+
+    match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
+        Ok(parents) => {
+            for (filename, entry) in dirstate_map {
+                dmap.set_item(
+                    py,
+                    PyBytes::new(py, &filename),
+                    decapsule_make_dirstate_tuple(py)?(
+                        entry.state as c_char,
+                        entry.mode,
+                        entry.size,
+                        entry.mtime,
+                    ),
+                )?;
+            }
+            for (path, copy_path) in copies {
+                copymap.set_item(
+                    py,
+                    PyBytes::new(py, &path),
+                    PyBytes::new(py, &copy_path),
+                )?;
+            }
+            Ok(
+                (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
+                    .to_py_object(py),
+            )
+        }
+        Err(e) => Err(PyErr::new::<exc::ValueError, _>(
+            py,
+            match e {
+                DirstateParseError::TooLittleData => {
+                    "too little data for parents".to_string()
+                }
+                DirstateParseError::Overflow => {
+                    "overflow in dirstate".to_string()
+                }
+                DirstateParseError::CorruptedEntry(e) => e,
+                DirstateParseError::Damaged => {
+                    "dirstate appears to be damaged".to_string()
+                }
+            },
+        )),
+    }
+}
+
+fn pack_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    pl: PyTuple,
+    now: PyInt,
+) -> PyResult<PyBytes> {
+    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
+    let p1: &[u8] = p1.data(py);
+    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
+    let p2: &[u8] = p2.data(py);
+
+    let mut dirstate_map = extract_dirstate(py, &dmap)?;
+
+    let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
+        .items(py)
+        .iter()
+        .map(|(key, value)| {
+            Ok((
+                key.extract::<PyBytes>(py)?.data(py).to_owned(),
+                value.extract::<PyBytes>(py)?.data(py).to_owned(),
+            ))
+        })
+        .collect();
+
+    match pack_dirstate(
+        &mut dirstate_map,
+        &copies?,
+        DirstateParents {
+            p1: p1.to_owned(),
+            p2: p2.to_owned(),
+        },
+        Duration::from_secs(now.value(py) as u64),
+    ) {
+        Ok(packed) => {
+            for (
+                filename,
+                DirstateEntry {
+                    state,
+                    mode,
+                    size,
+                    mtime,
+                },
+            ) in dirstate_map
+            {
+                dmap.set_item(
+                    py,
+                    PyBytes::new(py, &filename[..]),
+                    decapsule_make_dirstate_tuple(py)?(
+                        state as c_char,
+                        mode,
+                        size,
+                        mtime,
+                    ),
+                )?;
+            }
+            Ok(PyBytes::new(py, &packed))
+        }
+        Err(error) => Err(PyErr::new::<exc::ValueError, _>(
+            py,
+            match error {
+                DirstatePackError::CorruptedParent => {
+                    "expected a 20-byte hash".to_string()
+                }
+                DirstatePackError::CorruptedEntry(e) => e,
+                DirstatePackError::BadSize(expected, actual) => {
+                    format!("bad dirstate size: {} != {}", actual, expected)
+                }
+            },
+        )),
+    }
+}
+
+/// Create the module, with `__package__` given from parent
+pub fn init_parsers_module(py: Python, package: &str) -> PyResult<PyModule> {
+    let dotted_name = &format!("{}.parsers", package);
+    let m = PyModule::new(py, dotted_name)?;
+
+    m.add(py, "__package__", package)?;
+    m.add(py, "__doc__", "Parsers - Rust implementation")?;
+
+    m.add(
+        py,
+        "parse_dirstate",
+        py_fn!(
+            py,
+            parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
+        ),
+    )?;
+    m.add(
+        py,
+        "pack_dirstate",
+        py_fn!(
+            py,
+            pack_dirstate_wrapper(
+                dmap: PyDict,
+                copymap: PyDict,
+                pl: PyTuple,
+                now: PyInt
+            )
+        ),
+    )?;
+
+    let sys = PyModule::import(py, "sys")?;
+    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
+    sys_modules.set_item(py, dotted_name, &m)?;
+
+    Ok(m)
+}
diff --git a/rust/hg-cpython/src/dirstate/macros.rs b/rust/hg-cpython/src/dirstate/macros.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/macros.rs
@@ -0,0 +1,254 @@
+// dirstate_map.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Macros for use in the `hg-cpython` bridge library.
+
+/// Allows a `py_class!` generated struct to share references to one of its
+/// data members with Python.
+///
+/// # Warning
+///
+/// The targeted `py_class!` needs to have the
+/// `data leak_count: RefCell<usize>;` data attribute to compile.
+/// A better, more complicated macro is needed to automatically insert the
+/// leak count, which is not yet really battle tested (what happens when
+/// multiple references are needed?). See the example below.
+///
+/// TODO allow Python container types: for now, integration with the garbage
+///     collector does not extend to Rust structs holding references to Python
+///     objects. Should the need surface, `__traverse__` and `__clear__` will
+///     need to be written as per the `rust-cpython` docs on GC integration.
+///
+/// # Parameters
+///
+/// * `$name` is the same identifier used in for `py_class!` macro call.
+/// * `$inner_struct` is the identifier of the underlying Rust struct
+/// * `$data_member` is the identifier of the data member of `$inner_struct`
+/// that will be shared.
+/// * `$leaked` is the identifier to give to the struct that will manage
+/// references to `$name`, to be used for example in other macros like
+/// `py_shared_mapping_iterator`.
+///
+/// # Example
+///
+/// ```
+/// struct MyStruct {
+///     inner: Vec<u32>;
+/// }
+///
+/// py_class!(pub class MyType |py| {
+///     data inner: RefCell<MyStruct>;
+///     data leak_count: RefCell<usize>;
+/// });
+///
+/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef);
+/// ```
+macro_rules! py_shared_ref {
+    (
+        $name: ident,
+        $inner_struct: ident,
+        $data_member: ident,
+        $leaked: ident
+    ) => {
+        impl $name {
+            fn leak_immutable(&self, py: Python) -> &'static $inner_struct {
+                let ptr = self.$data_member(py).as_ptr();
+                *self.leak_count(py).borrow_mut() += 1;
+                unsafe { &*ptr }
+            }
+
+            fn borrow_mut<'a>(
+                &'a self,
+                py: Python<'a>,
+            ) -> PyResult<RefMut<$inner_struct>> {
+                match *self.leak_count(py).borrow() {
+                    0 => Ok(self.$data_member(py).borrow_mut()),
+                    // TODO
+                    // For now, this works differently than Python references
+                    // in the case of iterators.
+                    // Python does not complain when the data an iterator
+                    // points to is modified if the iterator is never used
+                    // afterwards.
+                    // Here, we are stricter than this by refusing to give a
+                    // mutable reference if it is already borrowed.
+                    // While the additional safety might be argued for, it
+                    // breaks valid programming patterns in Python and we need
+                    // to fix this issue down the line.
+                    _ => Err(AlreadyBorrowed::new(
+                        py,
+                        "Cannot borrow mutably while there are \
+                         immutable references in Python objects",
+                    )),
+                }
+            }
+
+            fn decrease_leak_count(&self, py: Python) {
+                *self.leak_count(py).borrow_mut() -= 1;
+            }
+        }
+
+        /// Manage immutable references to `$name` leaked into Python
+        /// iterators.
+        ///
+        /// In truth, this does not represent leaked references themselves;
+        /// it is instead useful alongside them to manage them.
+        pub struct $leaked {
+            inner: $name,
+        }
+
+        impl $leaked {
+            fn new(py: Python, inner: &$name) -> Self {
+                Self {
+                    inner: inner.clone_ref(py),
+                }
+            }
+        }
+
+        impl Drop for $leaked {
+            fn drop(&mut self) {
+                let gil = Python::acquire_gil();
+                let py = gil.python();
+                self.inner.decrease_leak_count(py);
+            }
+        }
+    };
+}
+
+/// Defines a `py_class!` that acts as a Python iterator over a Rust iterator.
+macro_rules! py_shared_iterator_impl {
+    (
+        $name: ident,
+        $leaked: ident,
+        $iterator_type: ty,
+        $success_func: expr,
+        $success_type: ty
+    ) => {
+        py_class!(pub class $name |py| {
+            data inner: RefCell<Option<$leaked>>;
+            data it: RefCell<$iterator_type>;
+
+            def __next__(&self) -> PyResult<$success_type> {
+                let mut inner_opt = self.inner(py).borrow_mut();
+                if inner_opt.is_some() {
+                    match self.it(py).borrow_mut().next() {
+                        None => {
+                            // replace Some(inner) by None, drop $leaked
+                            inner_opt.take();
+                            Ok(None)
+                        }
+                        Some(res) => {
+                            $success_func(py, res)
+                        }
+                    }
+                } else {
+                    Ok(None)
+                }
+            }
+
+            def __iter__(&self) -> PyResult<Self> {
+                Ok(self.clone_ref(py))
+            }
+        });
+
+        impl $name {
+            pub fn from_inner(
+                py: Python,
+                leaked: Option<$leaked>,
+                it: $iterator_type
+            ) -> PyResult<Self> {
+                Self::create_instance(
+                    py,
+                    RefCell::new(leaked),
+                    RefCell::new(it)
+                )
+            }
+        }
+    };
+}
+
+/// Defines a `py_class!` that acts as a Python mapping iterator over a Rust
+/// iterator.
+///
+/// TODO: this is a bit awkward to use, and a better (more complicated)
+///     procedural macro would simplify the interface a lot.
+///
+/// # Parameters
+///
+/// * `$name` is the identifier to give to the resulting Rust struct.
+/// * `$leaked` corresponds to `$leaked` in the matching `py_shared_ref!` call.
+/// * `$iterator_type` is the iterator type
+/// (like `std::collections::hash_map::Iter`).
+/// * `$key_type` is the type of the key in the mapping
+/// * `$value_type` is the type of the value in the mapping
+/// * `$success_func` is a function for processing the Rust `(key, value)`
+/// tuple on iteration success, turning it into something Python understands.
+/// * `$success_func` is the return type of `$success_func`
+///
+/// # Example
+///
+/// ```
+/// struct MyStruct {
+///     inner: HashMap<Vec<u8>, Vec<u8>>;
+/// }
+///
+/// py_class!(pub class MyType |py| {
+///     data inner: RefCell<MyStruct>;
+///     data leak_count: RefCell<usize>;
+///
+///     def __iter__(&self) -> PyResult<MyTypeItemsIterator> {
+///         MyTypeItemsIterator::create_instance(
+///             py,
+///             RefCell::new(Some(MyTypeLeakedRef::new(py, &self))),
+///             RefCell::new(self.leak_immutable(py).iter()),
+///         )
+///     }
+/// });
+///
+/// impl MyType {
+///     fn translate_key_value(
+///         py: Python,
+///         res: (&Vec<u8>, &Vec<u8>),
+///     ) -> PyResult<Option<(PyBytes, PyBytes)>> {
+///         let (f, entry) = res;
+///         Ok(Some((
+///             PyBytes::new(py, f),
+///             PyBytes::new(py, entry),
+///         )))
+///     }
+/// }
+///
+/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef);
+///
+/// py_shared_mapping_iterator!(
+///     MyTypeItemsIterator,
+///     MyTypeLeakedRef,
+///     std::collections::hash_map::Iter,
+///     Vec<u8>,
+///     Vec<u8>,
+///     MyType::translate_key_value,
+///     Option<(PyBytes, PyBytes)>
+/// );
+/// ```
+macro_rules! py_shared_mapping_iterator {
+    (
+        $name:ident,
+        $leaked:ident,
+        $iterator_type: ident,
+        $key_type: ty,
+        $value_type: ty,
+        $success_func: path,
+        $success_type: ty
+    ) => {
+        py_shared_iterator_impl!(
+            $name,
+            $leaked,
+            $iterator_type<'static, $key_type, $value_type>,
+            $success_func,
+            $success_type
+        );
+    };
+}
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -0,0 +1,492 @@
+// dirstate_map.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for the `hg::dirstate::dirstate_map` file provided by the
+//! `hg-core` package.
+
+use std::cell::{RefCell, RefMut};
+use std::collections::hash_map::Iter;
+use std::convert::TryInto;
+use std::time::Duration;
+
+use cpython::{
+    exc, ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyObject,
+    PyResult, PyTuple, Python, PythonObject, ToPyObject,
+};
+use libc::c_char;
+
+use dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator};
+use dirstate::decapsule_make_dirstate_tuple;
+use dirstate::dirs_multiset::Dirs;
+use exceptions::AlreadyBorrowed;
+use hg::{
+    DirsIterable, DirsMultiset, DirstateEntry, DirstateMap as RustDirstateMap,
+    DirstateParents, DirstateParseError, EntryState,
+};
+
+/// TODO there are a couple of issues that needs fixing and I'm not certain
+///     of how to go about them:
+///     This object needs to share references to multiple members of its Rust
+///     inner struct, namely `copy_map`, `dirs` and `all_dirs`.
+///     Right now `CopyMap` is done, but it needs to have an explicit reference
+///     to `RustDirstateMap` which itself needs to have an encapsulation for
+///     every method in `CopyMap` (copymapcopy, etc.).
+///     This is ugly and hard to maintain.
+///     The same logic applies to `dirs` and `all_dirs`, however the `Dirs`
+///     `py_class!` is already implemented and does not mention
+///     `RustDirstateMap`, rightfully so.
+///     All attributes also have to have a separate refcount data attribute for
+///     leaks, with all methods that go along for reference sharing.
+py_class!(pub class DirstateMap |py| {
+    data inner: RefCell<RustDirstateMap>;
+    data leak_count: RefCell<usize>;
+
+    def __new__(_cls, _root: PyObject) -> PyResult<Self> {
+        let inner = RustDirstateMap::default();
+        Self::create_instance(py, RefCell::new(inner), RefCell::new(0))
+    }
+
+    def clear(&self) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.clear();
+        Ok(py.None())
+    }
+
+    def get(
+        &self,
+        key: PyObject,
+        default: Option<PyObject> = None
+    ) -> PyResult<Option<PyObject>> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow().get(key.data(py)) {
+            Some(entry) => Ok(Some(decapsule_make_dirstate_tuple(py)?(
+                        entry.state as c_char,
+                        entry.mode,
+                        entry.size,
+                        entry.mtime,
+                    ))),
+            None => Ok(default)
+        }
+    }
+
+    def addfile(
+        &self,
+        f: PyObject,
+        oldstate: PyObject,
+        state: PyObject,
+        mode: PyObject,
+        size: PyObject,
+        mtime: PyObject
+    ) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.add_file(
+            f.extract::<PyBytes>(py)?.data(py),
+            oldstate.extract::<PyBytes>(py)?.data(py)[0]
+                .try_into()
+                .map_err(|e: DirstateParseError| {
+                    PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                })?,
+            DirstateEntry {
+                state: state.extract::<PyBytes>(py)?.data(py)[0]
+                    .try_into()
+                    .map_err(|e: DirstateParseError| {
+                        PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                    })?,
+                mode: mode.extract(py)?,
+                size: size.extract(py)?,
+                mtime: mtime.extract(py)?,
+            },
+        );
+        Ok(py.None())
+    }
+
+    def removefile(
+        &self,
+        f: PyObject,
+        oldstate: PyObject,
+        size: PyObject
+    ) -> PyResult<PyObject> {
+        self.borrow_mut(py)?
+            .remove_file(
+                f.extract::<PyBytes>(py)?.data(py),
+                oldstate.extract::<PyBytes>(py)?.data(py)[0]
+                    .try_into()
+                    .map_err(|e: DirstateParseError| {
+                        PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                    })?,
+                size.extract(py)?,
+            )
+            .or_else(|_| {
+                Err(PyErr::new::<exc::OSError, _>(
+                    py,
+                    "Dirstate error".to_string(),
+                ))
+            })?;
+        Ok(py.None())
+    }
+
+    def dropfile(
+        &self,
+        f: PyObject,
+        oldstate: PyObject
+    ) -> PyResult<PyBool> {
+        self.borrow_mut(py)?
+            .drop_file(
+                f.extract::<PyBytes>(py)?.data(py),
+                oldstate.extract::<PyBytes>(py)?.data(py)[0]
+                    .try_into()
+                    .map_err(|e: DirstateParseError| {
+                        PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                    })?,
+            )
+            .and_then(|b| Ok(b.to_py_object(py)))
+            .or_else(|_| {
+                Err(PyErr::new::<exc::OSError, _>(
+                    py,
+                    "Dirstate error".to_string(),
+                ))
+            })
+    }
+
+    def clearambiguoustimes(
+        &self,
+        files: PyObject,
+        now: PyObject
+    ) -> PyResult<PyObject> {
+        let files: PyResult<Vec<Vec<u8>>> = files
+            .iter(py)?
+            .map(|filename| {
+                Ok(filename?.extract::<PyBytes>(py)?.data(py).to_owned())
+            })
+            .collect();
+        self.inner(py)
+            .borrow_mut()
+            .clear_ambiguous_times(files?, now.extract(py)?);
+        Ok(py.None())
+    }
+
+    // TODO share the reference
+    def nonnormalentries(&self) -> PyResult<PyObject> {
+        let (non_normal, other_parent) =
+            self.inner(py).borrow().non_normal_other_parent_entries();
+
+        let locals = PyDict::new(py);
+        locals.set_item(
+            py,
+            "non_normal",
+            non_normal
+                .iter()
+                .map(|v| PyBytes::new(py, &v))
+                .collect::<Vec<PyBytes>>()
+                .to_py_object(py),
+        )?;
+        locals.set_item(
+            py,
+            "other_parent",
+            other_parent
+                .iter()
+                .map(|v| PyBytes::new(py, &v))
+                .collect::<Vec<PyBytes>>()
+                .to_py_object(py),
+        )?;
+
+        py.eval("set(non_normal), set(other_parent)", None, Some(&locals))
+    }
+
+    def hastrackeddir(&self, d: PyObject) -> PyResult<PyBool> {
+        let d = d.extract::<PyBytes>(py)?;
+        Ok(self
+            .inner(py)
+            .borrow_mut()
+            .has_tracked_dir(d.data(py))
+            .to_py_object(py))
+    }
+
+    def hasdir(&self, d: PyObject) -> PyResult<PyBool> {
+        let d = d.extract::<PyBytes>(py)?;
+        Ok(self
+            .inner(py)
+            .borrow_mut()
+            .has_dir(d.data(py))
+            .to_py_object(py))
+    }
+
+    def parents(&self, st: PyObject) -> PyResult<PyTuple> {
+        self.inner(py)
+            .borrow_mut()
+            .parents(st.extract::<PyBytes>(py)?.data(py))
+            .and_then(|d| {
+                Ok((PyBytes::new(py, &d.p1), PyBytes::new(py, &d.p2))
+                    .to_py_object(py))
+            })
+            .or_else(|_| {
+                Err(PyErr::new::<exc::OSError, _>(
+                    py,
+                    "Dirstate error".to_string(),
+                ))
+            })
+    }
+
+    def setparents(&self, p1: PyObject, p2: PyObject) -> PyResult<PyObject> {
+        let p1 = p1.extract::<PyBytes>(py)?.data(py).to_vec();
+        let p2 = p2.extract::<PyBytes>(py)?.data(py).to_vec();
+
+        self.inner(py)
+            .borrow_mut()
+            .set_parents(DirstateParents { p1, p2 });
+        Ok(py.None())
+    }
+
+    def read(&self, st: PyObject) -> PyResult<Option<PyObject>> {
+        match self
+            .inner(py)
+            .borrow_mut()
+            .read(st.extract::<PyBytes>(py)?.data(py))
+        {
+            Ok(Some(parents)) => Ok(Some(
+                (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
+                    .to_py_object(py)
+                    .into_object(),
+            )),
+            Ok(None) => Ok(Some(py.None())),
+            Err(_) => Err(PyErr::new::<exc::OSError, _>(
+                py,
+                "Dirstate error".to_string(),
+            )),
+        }
+    }
+    def write(
+        &self,
+        p1: PyObject,
+        p2: PyObject,
+        now: PyObject
+    ) -> PyResult<PyBytes> {
+        let now = Duration::new(now.extract(py)?, 0);
+        let parents = DirstateParents {
+            p1: p1.extract::<PyBytes>(py)?.data(py).to_owned(),
+            p2: p2.extract::<PyBytes>(py)?.data(py).to_owned(),
+        };
+
+        match self.borrow_mut(py)?.pack(parents, now) {
+            Ok(packed) => Ok(PyBytes::new(py, &packed)),
+            Err(_) => Err(PyErr::new::<exc::OSError, _>(
+                py,
+                "Dirstate error".to_string(),
+            )),
+        }
+    }
+
+    def filefoldmapasdict(&self) -> PyResult<PyDict> {
+        let dict = PyDict::new(py);
+        for (key, value) in
+            self.borrow_mut(py)?.property_file_fold_map().iter()
+        {
+            dict.set_item(py, key, value)?;
+        }
+        Ok(dict)
+    }
+
+    def __len__(&self) -> PyResult<usize> {
+        Ok(self.inner(py).borrow().len())
+    }
+
+    def __contains__(&self, key: PyObject) -> PyResult<bool> {
+        let key = key.extract::<PyBytes>(py)?;
+        Ok(self.inner(py).borrow().contains_key(key.data(py)))
+    }
+
+    def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        let key = key.extract::<PyBytes>(py)?;
+        let key = key.data(py);
+        match self.inner(py).borrow().get(key) {
+            Some(entry) => Ok(decapsule_make_dirstate_tuple(py)?(
+                entry.state as c_char,
+                entry.mode,
+                entry.size,
+                entry.mtime,
+            )),
+            None => Err(PyErr::new::<exc::KeyError, _>(
+                py,
+                String::from_utf8_lossy(key),
+            )),
+        }
+    }
+
+    def keys(&self) -> PyResult<DirstateMapKeysIterator> {
+        DirstateMapKeysIterator::create_instance(
+            py,
+            RefCell::new(Some(DirstateMapLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def items(&self) -> PyResult<DirstateMapItemsIterator> {
+        DirstateMapItemsIterator::create_instance(
+            py,
+            RefCell::new(Some(DirstateMapLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
+        DirstateMapKeysIterator::create_instance(
+            py,
+            RefCell::new(Some(DirstateMapLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def getdirs(&self) -> PyResult<Dirs> {
+        // TODO don't copy, share the reference
+        self.inner(py).borrow_mut().set_dirs();
+        Dirs::from_inner(
+            py,
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&self.inner(py).borrow()),
+                Some(EntryState::Removed),
+            ),
+        )
+    }
+    def getalldirs(&self) -> PyResult<Dirs> {
+        // TODO don't copy, share the reference
+        self.inner(py).borrow_mut().set_all_dirs();
+        Dirs::from_inner(
+            py,
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&self.inner(py).borrow()),
+                None,
+            ),
+        )
+    }
+
+    // TODO all copymap* methods, see docstring above
+    def copymapcopy(&self) -> PyResult<PyDict> {
+        let dict = PyDict::new(py);
+        for (key, value) in self.inner(py).borrow().copy_map.iter() {
+            dict.set_item(py, PyBytes::new(py, key), PyBytes::new(py, value))?;
+        }
+        Ok(dict)
+    }
+
+    def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow().copy_map.get(key.data(py)) {
+            Some(copy) => Ok(PyBytes::new(py, copy)),
+            None => Err(PyErr::new::<exc::KeyError, _>(
+                py,
+                String::from_utf8_lossy(key.data(py)),
+            )),
+        }
+    }
+    def copymap(&self) -> PyResult<CopyMap> {
+        CopyMap::from_inner(py, self.clone_ref(py))
+    }
+
+    def copymaplen(&self) -> PyResult<usize> {
+        Ok(self.inner(py).borrow().copy_map.len())
+    }
+    def copymapcontains(&self, key: PyObject) -> PyResult<bool> {
+        let key = key.extract::<PyBytes>(py)?;
+        Ok(self.inner(py).borrow().copy_map.contains_key(key.data(py)))
+    }
+    def copymapget(
+        &self,
+        key: PyObject,
+        default: Option<PyObject>
+    ) -> PyResult<Option<PyObject>> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow().copy_map.get(key.data(py)) {
+            Some(copy) => Ok(Some(PyBytes::new(py, copy).into_object())),
+            None => Ok(default),
+        }
+    }
+    def copymapsetitem(
+        &self,
+        key: PyObject,
+        value: PyObject
+    ) -> PyResult<PyObject> {
+        let key = key.extract::<PyBytes>(py)?;
+        let value = value.extract::<PyBytes>(py)?;
+        self.inner(py)
+            .borrow_mut()
+            .copy_map
+            .insert(key.data(py).to_vec(), value.data(py).to_vec());
+        Ok(py.None())
+    }
+    def copymappop(
+        &self,
+        key: PyObject,
+        default: Option<PyObject>
+    ) -> PyResult<Option<PyObject>> {
+        let key = key.extract::<PyBytes>(py)?;
+        match self.inner(py).borrow_mut().copy_map.remove(key.data(py)) {
+            Some(_) => Ok(None),
+            None => Ok(default),
+        }
+    }
+
+    def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
+        CopyMapKeysIterator::from_inner(
+            py,
+            Some(DirstateMapLeakedRef::new(py, &self)),
+            self.leak_immutable(py).copy_map.iter(),
+        )
+    }
+
+    def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
+        CopyMapItemsIterator::from_inner(
+            py,
+            Some(DirstateMapLeakedRef::new(py, &self)),
+            self.leak_immutable(py).copy_map.iter(),
+        )
+    }
+
+});
+
+impl DirstateMap {
+    fn translate_key(
+        py: Python,
+        res: (&Vec<u8>, &DirstateEntry),
+    ) -> PyResult<Option<PyBytes>> {
+        Ok(Some(PyBytes::new(py, res.0)))
+    }
+    fn translate_key_value(
+        py: Python,
+        res: (&Vec<u8>, &DirstateEntry),
+    ) -> PyResult<Option<(PyBytes, PyObject)>> {
+        let (f, entry) = res;
+        Ok(Some((
+            PyBytes::new(py, f),
+            decapsule_make_dirstate_tuple(py)?(
+                entry.state as c_char,
+                entry.mode,
+                entry.size,
+                entry.mtime,
+            ),
+        )))
+    }
+}
+
+py_shared_ref!(DirstateMap, RustDirstateMap, inner, DirstateMapLeakedRef);
+
+py_shared_mapping_iterator!(
+    DirstateMapKeysIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    DirstateEntry,
+    DirstateMap::translate_key,
+    Option<PyBytes>
+);
+
+py_shared_mapping_iterator!(
+    DirstateMapItemsIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    DirstateEntry,
+    DirstateMap::translate_key_value,
+    Option<(PyBytes, PyObject)>
+);
diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,127 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for the `hg::dirstate::dirs_multiset` file provided by the
+//! `hg-core` package.
+
+use std::cell::{RefCell, RefMut};
+use std::collections::hash_map::Iter;
+use std::convert::TryInto;
+
+use cpython::{
+    exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
+    Python,
+};
+
+use dirstate::extract_dirstate;
+use exceptions::AlreadyBorrowed;
+use hg::{DirsIterable, DirsMultiset, DirstateMapError, DirstateParseError, EntryState};
+
+py_class!(pub class Dirs |py| {
+    data inner: RefCell<DirsMultiset>;
+    data leak_count: RefCell<usize>;
+
+    // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
+    // a `list`)
+    def __new__(
+        _cls,
+        map: PyObject,
+        skip: Option<PyObject> = None
+    ) -> PyResult<Self> {
+        let mut skip_state: Option<EntryState> = None;
+        if let Some(skip) = skip {
+            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0].try_into().map_err(|e: DirstateParseError| {
+                    PyErr::new::<exc::ValueError, _>(py, e.to_string())
+                })?);
+        }
+        let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
+            let dirstate = extract_dirstate(py, &map)?;
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&dirstate),
+                skip_state,
+            )
+        } else {
+            let map: Result<Vec<Vec<u8>>, PyErr> = map
+                .iter(py)?
+                .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
+                .collect();
+            DirsMultiset::new(
+                DirsIterable::Manifest(&map?),
+                skip_state,
+            )
+        };
+
+        Self::create_instance(py, RefCell::new(inner), RefCell::new(0))
+    }
+
+    def addpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.add_path(
+            path.extract::<PyBytes>(py)?.data(py),
+        );
+        Ok(py.None())
+    }
+
+    def delpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.borrow_mut(py)?.delete_path(
+            path.extract::<PyBytes>(py)?.data(py),
+        )
+            .and(Ok(py.None()))
+            .or_else(|e| {
+                match e {
+                    DirstateMapError::PathNotFound(_p) => {
+                        Err(PyErr::new::<exc::ValueError, _>(
+                            py,
+                            "expected a value, found none".to_string(),
+                        ))
+                    }
+                    DirstateMapError::EmptyPath => {
+                        Ok(py.None())
+                    }
+                }
+            })
+    }
+
+    def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
+        DirsMultisetKeysIterator::create_instance(
+            py,
+            RefCell::new(Some(DirsMultisetLeakedRef::new(py, &self))),
+            RefCell::new(self.leak_immutable(py).iter()),
+        )
+    }
+
+    def __contains__(&self, item: PyObject) -> PyResult<bool> {
+        Ok(self
+            .inner(py)
+            .borrow()
+            .contains_key(item.extract::<PyBytes>(py)?.data(py).as_ref()))
+    }
+});
+
+py_shared_ref!(Dirs, DirsMultiset, inner, DirsMultisetLeakedRef);
+
+impl Dirs {
+    pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
+        Self::create_instance(py, RefCell::new(d), RefCell::new(0))
+    }
+
+    fn translate_key(
+        py: Python,
+        res: (&Vec<u8>, &u32),
+    ) -> PyResult<Option<PyBytes>> {
+        Ok(Some(PyBytes::new(py, res.0)))
+    }
+}
+
+py_shared_mapping_iterator!(
+    DirsMultisetKeysIterator,
+    DirsMultisetLeakedRef,
+    Iter,
+    Vec<u8>,
+    u32,
+    Dirs::translate_key,
+    Option<PyBytes>
+);
diff --git a/rust/hg-cpython/src/dirstate/copymap.rs b/rust/hg-cpython/src/dirstate/copymap.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/dirstate/copymap.rs
@@ -0,0 +1,119 @@
+// copymap.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings for `hg::dirstate::dirstate_map::CopyMap` provided by the
+//! `hg-core` package.
+
+use cpython::{PyBytes, PyClone, PyDict, PyObject, PyResult, Python};
+use std::cell::RefCell;
+use std::collections::hash_map::Iter;
+
+use dirstate::dirstate_map::{DirstateMap, DirstateMapLeakedRef};
+
+py_class!(pub class CopyMap |py| {
+    data dirstate_map: DirstateMap;
+
+    def __getitem__(&self, key: PyObject) -> PyResult<PyBytes> {
+        (*self.dirstate_map(py)).copymapgetitem(py, key)
+    }
+
+    def __len__(&self) -> PyResult<usize> {
+        self.dirstate_map(py).copymaplen(py)
+    }
+
+    def __contains__(&self, key: PyObject) -> PyResult<bool> {
+        self.dirstate_map(py).copymapcontains(py, key)
+    }
+
+    def get(
+        &self,
+        key: PyObject,
+        default: Option<PyObject> = None
+    ) -> PyResult<Option<PyObject>> {
+        self.dirstate_map(py).copymapget(py, key, default)
+    }
+
+    def pop(
+        &self,
+        key: PyObject,
+        default: Option<PyObject> = None
+    ) -> PyResult<Option<PyObject>> {
+        self.dirstate_map(py).copymappop(py, key, default)
+    }
+
+    def __iter__(&self) -> PyResult<CopyMapKeysIterator> {
+        self.dirstate_map(py).copymapiter(py)
+    }
+
+    // Python's `dict()` builtin works with either a subclass of dict
+    // or an abstract mapping. Said mapping needs to implement `__getitem__`
+    // and `keys`.
+    def keys(&self) -> PyResult<CopyMapKeysIterator> {
+        self.dirstate_map(py).copymapiter(py)
+    }
+
+    def items(&self) -> PyResult<CopyMapItemsIterator> {
+        self.dirstate_map(py).copymapitemsiter(py)
+    }
+
+    def iteritems(&self) -> PyResult<CopyMapItemsIterator> {
+        self.dirstate_map(py).copymapitemsiter(py)
+    }
+
+    def __setitem__(
+        &self,
+        key: PyObject,
+        item: PyObject
+    ) -> PyResult<()> {
+        self.dirstate_map(py).copymapsetitem(py, key, item)?;
+        Ok(())
+    }
+
+    def copy(&self) -> PyResult<PyDict> {
+        self.dirstate_map(py).copymapcopy(py)
+    }
+
+});
+
+impl CopyMap {
+    pub fn from_inner(py: Python, dm: DirstateMap) -> PyResult<Self> {
+        Self::create_instance(py, dm)
+    }
+    fn translate_key(
+        py: Python,
+        res: (&Vec<u8>, &Vec<u8>),
+    ) -> PyResult<Option<PyBytes>> {
+        Ok(Some(PyBytes::new(py, res.0)))
+    }
+    fn translate_key_value(
+        py: Python,
+        res: (&Vec<u8>, &Vec<u8>),
+    ) -> PyResult<Option<(PyBytes, PyBytes)>> {
+        let (k, v) = res;
+        Ok(Some((PyBytes::new(py, k), PyBytes::new(py, v))))
+    }
+}
+
+py_shared_mapping_iterator!(
+    CopyMapKeysIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    Vec<u8>,
+    CopyMap::translate_key,
+    Option<PyBytes>
+);
+
+py_shared_mapping_iterator!(
+    CopyMapItemsIterator,
+    DirstateMapLeakedRef,
+    Iter,
+    Vec<u8>,
+    Vec<u8>,
+    CopyMap::translate_key_value,
+    Option<(PyBytes, PyBytes)>
+);
diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
deleted file mode 100644
--- a/rust/hg-cpython/src/dirstate.rs
+++ /dev/null
@@ -1,334 +0,0 @@
-// dirstate.rs
-//
-// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
-//
-// This software may be used and distributed according to the terms of the
-// GNU General Public License version 2 or any later version.
-
-//! Bindings for the `hg::dirstate` module provided by the
-//! `hg-core` package.
-//!
-//! From Python, this will be seen as `mercurial.rustext.dirstate`
-
-use cpython::{
-    exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject,
-    PyResult, PySequence, PyTuple, Python, PythonObject, ToPyObject,
-};
-use hg::{
-    pack_dirstate, parse_dirstate, CopyVecEntry, DirsIterable, DirsMultiset,
-    DirstateEntry, DirstateMapError, DirstatePackError, DirstateParents,
-    DirstateParseError, DirstateVec,
-};
-use std::collections::HashMap;
-use std::ffi::CStr;
-
-#[cfg(feature = "python27")]
-extern crate python27_sys as python_sys;
-#[cfg(feature = "python3")]
-extern crate python3_sys as python_sys;
-
-use self::python_sys::PyCapsule_Import;
-use libc::{c_char, c_int};
-use std::cell::RefCell;
-use std::mem::transmute;
-
-/// C code uses a custom `dirstate_tuple` type, checks in multiple instances
-/// for this type, and raises a Python `Exception` if the check does not pass.
-/// Because this type differs only in name from the regular Python tuple, it
-/// would be a good idea in the near future to remove it entirely to allow
-/// for a pure Python tuple of the same effective structure to be used,
-/// rendering this type and the capsule below useless.
-type MakeDirstateTupleFn = extern "C" fn(
-    state: c_char,
-    mode: c_int,
-    size: c_int,
-    mtime: c_int,
-) -> PyObject;
-
-/// This is largely a copy/paste from cindex.rs, pending the merge of a
-/// `py_capsule_fn!` macro in the rust-cpython project:
-/// https://github.com/dgrunwald/rust-cpython/pull/169
-fn decapsule_make_dirstate_tuple(py: Python) -> PyResult<MakeDirstateTupleFn> {
-    unsafe {
-        let caps_name = CStr::from_bytes_with_nul_unchecked(
-            b"mercurial.cext.parsers.make_dirstate_tuple_CAPI\0",
-        );
-        let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0);
-        if from_caps.is_null() {
-            return Err(PyErr::fetch(py));
-        }
-        Ok(transmute(from_caps))
-    }
-}
-
-fn parse_dirstate_wrapper(
-    py: Python,
-    dmap: PyDict,
-    copymap: PyDict,
-    st: PyBytes,
-) -> PyResult<PyTuple> {
-    match parse_dirstate(st.data(py)) {
-        Ok((parents, dirstate_vec, copies)) => {
-            for (filename, entry) in dirstate_vec {
-                dmap.set_item(
-                    py,
-                    PyBytes::new(py, &filename[..]),
-                    decapsule_make_dirstate_tuple(py)?(
-                        entry.state as c_char,
-                        entry.mode,
-                        entry.size,
-                        entry.mtime,
-                    ),
-                )?;
-            }
-            for CopyVecEntry { path, copy_path } in copies {
-                copymap.set_item(
-                    py,
-                    PyBytes::new(py, path),
-                    PyBytes::new(py, copy_path),
-                )?;
-            }
-            Ok((PyBytes::new(py, parents.p1), PyBytes::new(py, parents.p2))
-                .to_py_object(py))
-        }
-        Err(e) => Err(PyErr::new::<exc::ValueError, _>(
-            py,
-            match e {
-                DirstateParseError::TooLittleData => {
-                    "too little data for parents".to_string()
-                }
-                DirstateParseError::Overflow => {
-                    "overflow in dirstate".to_string()
-                }
-                DirstateParseError::CorruptedEntry(e) => e,
-            },
-        )),
-    }
-}
-
-fn extract_dirstate_vec(
-    py: Python,
-    dmap: &PyDict,
-) -> Result<DirstateVec, PyErr> {
-    dmap.items(py)
-        .iter()
-        .map(|(filename, stats)| {
-            let stats = stats.extract::<PySequence>(py)?;
-            let state = stats.get_item(py, 0)?.extract::<PyBytes>(py)?;
-            let state = state.data(py)[0] as i8;
-            let mode = stats.get_item(py, 1)?.extract(py)?;
-            let size = stats.get_item(py, 2)?.extract(py)?;
-            let mtime = stats.get_item(py, 3)?.extract(py)?;
-            let filename = filename.extract::<PyBytes>(py)?;
-            let filename = filename.data(py);
-            Ok((
-                filename.to_owned(),
-                DirstateEntry {
-                    state,
-                    mode,
-                    size,
-                    mtime,
-                },
-            ))
-        })
-        .collect()
-}
-
-fn pack_dirstate_wrapper(
-    py: Python,
-    dmap: PyDict,
-    copymap: PyDict,
-    pl: PyTuple,
-    now: PyInt,
-) -> PyResult<PyBytes> {
-    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
-    let p1: &[u8] = p1.data(py);
-    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
-    let p2: &[u8] = p2.data(py);
-
-    let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
-
-    let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
-        .items(py)
-        .iter()
-        .map(|(key, value)| {
-            Ok((
-                key.extract::<PyBytes>(py)?.data(py).to_owned(),
-                value.extract::<PyBytes>(py)?.data(py).to_owned(),
-            ))
-        })
-        .collect();
-
-    match pack_dirstate(
-        &dirstate_vec,
-        &copies?,
-        DirstateParents { p1, p2 },
-        now.as_object().extract::<i32>(py)?,
-    ) {
-        Ok((packed, new_dirstate_vec)) => {
-            for (
-                filename,
-                DirstateEntry {
-                    state,
-                    mode,
-                    size,
-                    mtime,
-                },
-            ) in new_dirstate_vec
-            {
-                dmap.set_item(
-                    py,
-                    PyBytes::new(py, &filename[..]),
-                    decapsule_make_dirstate_tuple(py)?(
-                        state as c_char,
-                        mode,
-                        size,
-                        mtime,
-                    ),
-                )?;
-            }
-            Ok(PyBytes::new(py, &packed))
-        }
-        Err(error) => Err(PyErr::new::<exc::ValueError, _>(
-            py,
-            match error {
-                DirstatePackError::CorruptedParent => {
-                    "expected a 20-byte hash".to_string()
-                }
-                DirstatePackError::CorruptedEntry(e) => e,
-                DirstatePackError::BadSize(expected, actual) => {
-                    format!("bad dirstate size: {} != {}", actual, expected)
-                }
-            },
-        )),
-    }
-}
-
-py_class!(pub class Dirs |py| {
-    data dirs_map: RefCell<DirsMultiset>;
-
-    // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
-    // a `list`)
-    def __new__(
-        _cls,
-        map: PyObject,
-        skip: Option<PyObject> = None
-    ) -> PyResult<Self> {
-        let mut skip_state: Option<i8> = None;
-        if let Some(skip) = skip {
-            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
-        }
-        let dirs_map;
-
-        if let Ok(map) = map.cast_as::<PyDict>(py) {
-            let dirstate_vec = extract_dirstate_vec(py, &map)?;
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Dirstate(dirstate_vec),
-                skip_state,
-            )
-        } else {
-            let map: Result<Vec<Vec<u8>>, PyErr> = map
-                .iter(py)?
-                .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
-                .collect();
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Manifest(map?),
-                skip_state,
-            )
-        }
-
-        Self::create_instance(py, RefCell::new(dirs_map))
-    }
-
-    def addpath(&self, path: PyObject) -> PyResult<PyObject> {
-        self.dirs_map(py).borrow_mut().add_path(
-            path.extract::<PyBytes>(py)?.data(py),
-        );
-        Ok(py.None())
-    }
-
-    def delpath(&self, path: PyObject) -> PyResult<PyObject> {
-        self.dirs_map(py).borrow_mut().delete_path(
-            path.extract::<PyBytes>(py)?.data(py),
-        )
-            .and(Ok(py.None()))
-            .or_else(|e| {
-                match e {
-                    DirstateMapError::PathNotFound(_p) => {
-                        Err(PyErr::new::<exc::ValueError, _>(
-                            py,
-                            "expected a value, found none".to_string(),
-                        ))
-                    }
-                    DirstateMapError::EmptyPath => {
-                        Ok(py.None())
-                    }
-                }
-            })
-    }
-
-    // This is really inefficient on top of being ugly, but it's an easy way
-    // of having it work to continue working on the rest of the module
-    // hopefully bypassing Python entirely pretty soon.
-    def __iter__(&self) -> PyResult<PyObject> {
-        let dict = PyDict::new(py);
-
-        for (key, value) in self.dirs_map(py).borrow().iter() {
-            dict.set_item(
-                py,
-                PyBytes::new(py, &key[..]),
-                value.to_py_object(py),
-            )?;
-        }
-
-        let locals = PyDict::new(py);
-        locals.set_item(py, "obj", dict)?;
-
-        py.eval("iter(obj)", None, Some(&locals))
-    }
-
-    def __contains__(&self, item: PyObject) -> PyResult<bool> {
-        Ok(self
-            .dirs_map(py)
-            .borrow()
-            .contains_key(item.extract::<PyBytes>(py)?.data(py).as_ref()))
-    }
-});
-
-/// Create the module, with `__package__` given from parent
-pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
-    let dotted_name = &format!("{}.dirstate", package);
-    let m = PyModule::new(py, dotted_name)?;
-
-    m.add(py, "__package__", package)?;
-    m.add(py, "__doc__", "Dirstate - Rust implementation")?;
-    m.add(
-        py,
-        "parse_dirstate",
-        py_fn!(
-            py,
-            parse_dirstate_wrapper(dmap: PyDict, copymap: PyDict, st: PyBytes)
-        ),
-    )?;
-    m.add(
-        py,
-        "pack_dirstate",
-        py_fn!(
-            py,
-            pack_dirstate_wrapper(
-                dmap: PyDict,
-                copymap: PyDict,
-                pl: PyTuple,
-                now: PyInt
-            )
-        ),
-    )?;
-
-    m.add_class::<Dirs>(py)?;
-
-    let sys = PyModule::import(py, "sys")?;
-    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
-    sys_modules.set_item(py, dotted_name, &m)?;
-
-    Ok(m)
-}
diff --git a/rust/hg-core/src/utils/mod.rs b/rust/hg-core/src/utils/mod.rs
--- a/rust/hg-core/src/utils/mod.rs
+++ b/rust/hg-core/src/utils/mod.rs
@@ -1,3 +1,12 @@
+// utils module
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Contains useful functions, traits, structs, etc. for use in core.
+
 pub mod files;
 
 pub fn replace_slice<T>(buf: &mut [T], from: &[T], to: &[T])
diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -1,3 +1,12 @@
+// files.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Functions for fiddling with files.
+
 use std::path::Path;
 
 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -6,22 +6,22 @@
 extern crate memchr;
 #[macro_use]
 extern crate lazy_static;
+extern crate core;
 extern crate regex;
 
 mod ancestors;
 pub mod dagops;
+pub mod utils;
 pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors};
 mod dirstate;
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
     dirs_multiset::DirsMultiset,
-    parsers::{pack_dirstate, parse_dirstate},
-    CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
-    DirstateVec,
+    dirstate_map::{CopyMap, DirstateMap, StateMap},
+    parsers, DirsIterable, DirstateEntry, DirstateParents, EntryState,
 };
 mod filepatterns;
-mod utils;
 
 pub use filepatterns::{
     build_single_regex, read_pattern_file, PatternSyntax, PatternTuple,
@@ -66,6 +66,25 @@
     TooLittleData,
     Overflow,
     CorruptedEntry(String),
+    Damaged,
+}
+
+impl From<std::io::Error> for DirstateParseError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateParseError::CorruptedEntry(e.to_string())
+    }
+}
+
+impl ToString for DirstateParseError {
+    fn to_string(&self) -> String {
+        use DirstateParseError::*;
+        match self {
+            TooLittleData => "Too little data for dirstate.".to_string(),
+            Overflow => "Overflow in dirstate.".to_string(),
+            CorruptedEntry(e) => format!("Corrupted entry: {}.", e),
+            Damaged => "Dirstate appears to be damaged.".to_string(),
+        }
+    }
 }
 
 #[derive(Debug, PartialEq)]
@@ -75,21 +94,33 @@
     BadSize(usize, usize),
 }
 
+impl From<std::io::Error> for DirstatePackError {
+    fn from(e: std::io::Error) -> Self {
+        DirstatePackError::CorruptedEntry(e.to_string())
+    }
+}
 #[derive(Debug, PartialEq)]
 pub enum DirstateMapError {
     PathNotFound(Vec<u8>),
     EmptyPath,
 }
 
-impl From<std::io::Error> for DirstatePackError {
-    fn from(e: std::io::Error) -> Self {
-        DirstatePackError::CorruptedEntry(e.to_string())
+pub enum DirstateError {
+    Parse(DirstateParseError),
+    Pack(DirstatePackError),
+    Map(DirstateMapError),
+    IO(std::io::Error),
+}
+
+impl From<DirstateParseError> for DirstateError {
+    fn from(e: DirstateParseError) -> Self {
+        DirstateError::Parse(e)
     }
 }
 
-impl From<std::io::Error> for DirstateParseError {
-    fn from(e: std::io::Error) -> Self {
-        DirstateParseError::CorruptedEntry(e.to_string())
+impl From<DirstatePackError> for DirstateError {
+    fn from(e: DirstatePackError) -> Self {
+        DirstateError::Pack(e)
     }
 }
 
@@ -109,3 +140,15 @@
         PatternFileError::IO(e)
     }
 }
+
+impl From<DirstateMapError> for DirstateError {
+    fn from(e: DirstateMapError) -> Self {
+        DirstateError::Map(e)
+    }
+}
+
+impl From<std::io::Error> for DirstateError {
+    fn from(e: std::io::Error) -> Self {
+        DirstateError::IO(e)
+    }
+}
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -1,3 +1,12 @@
+// filepatterns.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Mercurial-specific patterns handling.
+
 use crate::{LineNumber, PatternError, PatternFileError};
 use regex::bytes::Regex;
 use std::collections::HashMap;
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -4,31 +4,33 @@
 // GNU General Public License version 2 or any later version.
 
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
-use std::collections::HashMap;
+use dirstate::dirstate_map::{CopyMap, StateMap};
+use dirstate::EntryState;
+use std::convert::TryInto;
 use std::io::Cursor;
-use {
-    CopyVec, CopyVecEntry, DirstateEntry, DirstatePackError, DirstateParents,
-    DirstateParseError, DirstateVec,
-};
+use std::time::Duration;
+use {DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError};
 
 /// Parents are stored in the dirstate as byte hashes.
-const PARENT_SIZE: usize = 20;
+pub const PARENT_SIZE: usize = 20;
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
+// TODO parse/pack: is mutate-on-loop better for performance?
+
 pub fn parse_dirstate(
+    state_map: &mut StateMap,
+    copy_map: &mut CopyMap,
     contents: &[u8],
-) -> Result<(DirstateParents, DirstateVec, CopyVec), DirstateParseError> {
+) -> Result<DirstateParents, DirstateParseError> {
     if contents.len() < PARENT_SIZE * 2 {
         return Err(DirstateParseError::TooLittleData);
     }
 
-    let mut dirstate_vec = vec![];
-    let mut copies = vec![];
     let mut curr_pos = PARENT_SIZE * 2;
     let parents = DirstateParents {
-        p1: &contents[..PARENT_SIZE],
-        p2: &contents[PARENT_SIZE..curr_pos],
+        p1: contents[..PARENT_SIZE].to_vec(),
+        p2: contents[PARENT_SIZE..curr_pos].to_vec(),
     };
 
     while curr_pos < contents.len() {
@@ -38,7 +40,7 @@
         let entry_bytes = &contents[curr_pos..];
 
         let mut cursor = Cursor::new(entry_bytes);
-        let state = cursor.read_i8()?;
+        let state: EntryState = cursor.read_u8()?.try_into()?;
         let mode = cursor.read_i32::<BigEndian>()?;
         let size = cursor.read_i32::<BigEndian>()?;
         let mtime = cursor.read_i32::<BigEndian>()?;
@@ -57,9 +59,9 @@
         };
 
         if let Some(copy_path) = copy {
-            copies.push(CopyVecEntry { path, copy_path });
+            copy_map.insert(path.to_owned(), copy_path.to_owned());
         };
-        dirstate_vec.push((
+        state_map.insert(
             path.to_owned(),
             DirstateEntry {
                 state,
@@ -67,28 +69,30 @@
                 size,
                 mtime,
             },
-        ));
+        );
         curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
     }
 
-    Ok((parents, dirstate_vec, copies))
+    Ok(parents)
 }
 
 pub fn pack_dirstate(
-    dirstate_vec: &DirstateVec,
-    copymap: &HashMap<Vec<u8>, Vec<u8>>,
+    state_map: &mut StateMap,
+    copy_map: &CopyMap,
     parents: DirstateParents,
-    now: i32,
-) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> {
+    now: Duration,
+) -> Result<Vec<u8>, DirstatePackError> {
     if parents.p1.len() != PARENT_SIZE || parents.p2.len() != PARENT_SIZE {
         return Err(DirstatePackError::CorruptedParent);
     }
 
-    let expected_size: usize = dirstate_vec
+    let now = now.as_secs() as i32;
+
+    let expected_size: usize = state_map
         .iter()
-        .map(|(ref filename, _)| {
+        .map(|(filename, _)| {
             let mut length = MIN_ENTRY_SIZE + filename.len();
-            if let Some(ref copy) = copymap.get(filename) {
+            if let Some(ref copy) = copy_map.get(filename) {
                 length += copy.len() + 1;
             }
             length
@@ -97,15 +101,15 @@
     let expected_size = expected_size + PARENT_SIZE * 2;
 
     let mut packed = Vec::with_capacity(expected_size);
-    let mut new_dirstate_vec = vec![];
+    let mut new_state_map = vec![];
 
     packed.extend(parents.p1);
     packed.extend(parents.p2);
 
-    for (ref filename, entry) in dirstate_vec {
-        let mut new_filename: Vec<u8> = filename.to_owned();
+    for (ref filename, entry) in state_map.iter() {
+        let mut new_filename: Vec<u8> = filename.to_vec();
         let mut new_mtime: i32 = entry.mtime;
-        if entry.state == 'n' as i8 && entry.mtime == now.into() {
+        if entry.state == EntryState::Normal && entry.mtime == now {
             // The file was last modified "simultaneously" with the current
             // write to dirstate (i.e. within the same second for file-
             // systems with a granularity of 1 sec). This commonly happens
@@ -116,8 +120,8 @@
             // contents of the file if the size is the same. This prevents
             // mistakenly treating such files as clean.
             new_mtime = -1;
-            new_dirstate_vec.push((
-                filename.to_owned(),
+            new_state_map.push((
+                filename.to_owned().to_vec(),
                 DirstateEntry {
                     mtime: new_mtime,
                     ..*entry
@@ -125,12 +129,12 @@
             ));
         }
 
-        if let Some(copy) = copymap.get(filename) {
+        if let Some(copy) = copy_map.get(*filename) {
             new_filename.push('\0' as u8);
             new_filename.extend(copy);
         }
 
-        packed.write_i8(entry.state)?;
+        packed.write_u8(entry.state.into())?;
         packed.write_i32::<BigEndian>(entry.mode)?;
         packed.write_i32::<BigEndian>(entry.size)?;
         packed.write_i32::<BigEndian>(new_mtime)?;
@@ -142,143 +146,155 @@
         return Err(DirstatePackError::BadSize(expected_size, packed.len()));
     }
 
-    Ok((packed, new_dirstate_vec))
+    state_map.extend(new_state_map);
+
+    Ok(packed)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn test_pack_dirstate_empty() {
-        let dirstate_vec: DirstateVec = vec![];
+        let mut state_map: StateMap = HashMap::new();
         let copymap = HashMap::new();
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
-        let expected =
-            (b"1234567891011121314100000000000000000000".to_vec(), vec![]);
+        let now = Duration::new(15000000, 0);
+        let expected = b"1234567891011121314100000000000000000000".to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
+
+        assert!(state_map.is_empty())
     }
     #[test]
     fn test_pack_dirstate_one_entry() {
-        let dirstate_vec: DirstateVec = vec![(
-            vec!['f' as u8, '1' as u8],
-            DirstateEntry {
-                state: 'n' as i8,
-                mode: 0o644,
-                size: 0,
-                mtime: 791231220,
-            },
-        )];
-        let copymap = HashMap::new();
-        let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
-        };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49,
-            ]
-            .to_vec(),
-            vec![],
-        );
-
-        assert_eq!(
-            expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
-        );
-    }
-    #[test]
-    fn test_pack_dirstate_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+        let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
+
+        let copymap = HashMap::new();
+        let parents = DirstateParents {
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
+        };
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 2, 102, 49,
+        ]
+        .to_vec();
+
+        assert_eq!(
+            expected,
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
+        );
+
+        assert_eq!(expected_state_map, state_map);
+    }
+    #[test]
+    fn test_pack_dirstate_one_entry_with_copy() {
+        let expected_state_map: StateMap = [(
+            b"f1".to_vec(),
+            DirstateEntry {
+                state: EntryState::Normal,
+                mode: 0o644,
+                size: 0,
+                mtime: 791231220,
+            },
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111,
-                112, 121, 110, 97, 109, 101,
-            ]
-            .to_vec(),
-            vec![],
-        );
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
+            109, 101,
+        ]
+        .to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
+        assert_eq!(expected_state_map, state_map);
     }
 
     #[test]
     fn test_parse_pack_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+        let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 791231220,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
 
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
         assert_eq!(
-            (
-                parents,
-                dirstate_vec,
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
-            ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     fn test_parse_pack_multiple_entries_with_copy() {
-        let dirstate_vec: DirstateVec = vec![
+        let mut state_map: StateMap = [
             (
                 b"f1".to_vec(),
                 DirstateEntry {
-                    state: 'n' as i8,
+                    state: EntryState::Normal,
                     mode: 0o644,
                     size: 0,
                     mtime: 791231220,
@@ -287,7 +303,7 @@
             (
                 b"f2".to_vec(),
                 DirstateEntry {
-                    state: 'm' as i8,
+                    state: EntryState::Merged,
                     mode: 0o777,
                     size: 1000,
                     mtime: 791231220,
@@ -296,7 +312,7 @@
             (
                 b"f3".to_vec(),
                 DirstateEntry {
-                    state: 'r' as i8,
+                    state: EntryState::Removed,
                     mode: 0o644,
                     size: 234553,
                     mtime: 791231220,
@@ -305,84 +321,95 @@
             (
                 b"f4\xF6".to_vec(),
                 DirstateEntry {
-                    state: 'a' as i8,
+                    state: EntryState::Added,
                     mode: 0o644,
                     size: -1,
                     mtime: -1,
                 },
             ),
-        ];
+        ]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
 
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
         assert_eq!(
-            (parents, dirstate_vec, copymap),
-            parse_dirstate(result.0.as_slice())
-                .and_then(|(p, dvec, cvec)| Ok((
-                    p,
-                    dvec,
-                    cvec.iter()
-                        .map(|entry| (
-                            entry.path.to_vec(),
-                            entry.copy_path.to_vec()
-                        ))
-                        .collect()
-                )))
-                .unwrap()
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
     fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
-        let dirstate_vec: DirstateVec = vec![(
+        let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
-                state: 'n' as i8,
+                state: EntryState::Normal,
                 mode: 0o644,
                 size: 0,
                 mtime: 15000000,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
+
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
 
         assert_eq!(
             (
                 parents,
-                vec![(
+                [(
                     b"f1".to_vec(),
                     DirstateEntry {
-                        state: 'n' as i8,
+                        state: EntryState::Normal,
                         mode: 0o644,
                         size: 0,
                         mtime: -1
                     }
-                )],
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
+                )]
+                .iter()
+                .cloned()
+                .collect::<StateMap>(),
+                copymap,
             ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 }
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,36 +1,70 @@
+use std::collections::HashMap;
+use std::convert::TryFrom;
+use DirstateParseError;
+
 pub mod dirs_multiset;
+pub mod dirstate_map;
 pub mod parsers;
 
-#[derive(Debug, PartialEq, Copy, Clone)]
-pub struct DirstateParents<'a> {
-    pub p1: &'a [u8],
-    pub p2: &'a [u8],
+#[derive(Debug, PartialEq, Clone)]
+pub struct DirstateParents {
+    pub p1: Vec<u8>,
+    pub p2: Vec<u8>,
 }
 
 /// The C implementation uses all signed types. This will be an issue
 /// either when 4GB+ source files are commonplace or in 2038, whichever
 /// comes first.
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Copy, Clone)]
 pub struct DirstateEntry {
-    pub state: i8,
+    pub state: EntryState,
     pub mode: i32,
     pub mtime: i32,
     pub size: i32,
 }
 
-pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>;
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable<'a> {
+    Dirstate(&'a HashMap<Vec<u8>, DirstateEntry>),
+    Manifest(&'a Vec<Vec<u8>>),
+}
 
-#[derive(Debug, PartialEq)]
-pub struct CopyVecEntry<'a> {
-    pub path: &'a [u8],
-    pub copy_path: &'a [u8],
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub enum EntryState {
+    Normal,
+    Added,
+    Removed,
+    Merged,
+    Unknown,
 }
 
-pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
+impl TryFrom<u8> for EntryState {
+    type Error = DirstateParseError;
 
-/// The Python implementation passes either a mapping (dirstate) or a flat
-/// iterable (manifest)
-pub enum DirsIterable {
-    Dirstate(DirstateVec),
-    Manifest(Vec<Vec<u8>>),
+    fn try_from(value: u8) -> Result<Self, Self::Error> {
+        match value {
+            b'n' => Ok(EntryState::Normal),
+            b'a' => Ok(EntryState::Added),
+            b'r' => Ok(EntryState::Removed),
+            b'm' => Ok(EntryState::Merged),
+            b'?' => Ok(EntryState::Unknown),
+            _ => Err(DirstateParseError::CorruptedEntry(format!(
+                "Incorrect entry state {}",
+                value
+            ))),
+        }
+    }
 }
+
+impl Into<u8> for EntryState {
+    fn into(self) -> u8 {
+        match self {
+            EntryState::Normal => b'n',
+            EntryState::Added => b'a',
+            EntryState::Removed => b'r',
+            EntryState::Merged => b'm',
+            EntryState::Unknown => b'?',
+        }
+    }
+}
diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -0,0 +1,432 @@
+// dirstate_map.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes at octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use core::borrow::Borrow;
+use dirstate::parsers::PARENT_SIZE;
+use dirstate::EntryState;
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+use std::ops::Deref;
+use std::time::Duration;
+use {
+    parsers::pack_dirstate, parsers::parse_dirstate, DirsIterable,
+    DirsMultiset, DirstateEntry, DirstateError, DirstateMapError,
+    DirstateParents, DirstateParseError,
+};
+
+pub type StateMap = HashMap<Vec<u8>, DirstateEntry>;
+pub type CopyMap = HashMap<Vec<u8>, Vec<u8>>;
+pub type FileFoldMap = HashMap<Vec<u8>, Vec<u8>>;
+
+const NULL_REVISION: &[u8] = b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+const MTIME_UNSET: i32 = -1;
+const SIZE_DIRTY: i32 = -2;
+
+#[derive(Default)]
+pub struct DirstateMap {
+    state_map: StateMap,
+    pub copy_map: CopyMap,
+    file_fold_map: Option<FileFoldMap>,
+    pub dirs: Option<DirsMultiset>,
+    pub all_dirs: Option<DirsMultiset>,
+    non_normal_set: HashSet<Vec<u8>>,
+    other_parent_set: HashSet<Vec<u8>>,
+    parents: Option<DirstateParents>,
+    dirty_parents: bool,
+}
+
+/// Should only really be used in python interface code, for clarity
+impl Deref for DirstateMap {
+    type Target = StateMap;
+
+    fn deref(&self) -> &Self::Target {
+        &self.state_map
+    }
+}
+
+impl FromIterator<(Vec<u8>, DirstateEntry)> for DirstateMap {
+    fn from_iter<I: IntoIterator<Item = (Vec<u8>, DirstateEntry)>>(
+        iter: I,
+    ) -> Self {
+        Self {
+            state_map: iter.into_iter().collect(),
+            ..Self::default()
+        }
+    }
+}
+
+impl DirstateMap {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn clear(&mut self) {
+        self.state_map.clear();
+        self.copy_map.clear();
+        self.file_fold_map = None;
+        self.non_normal_set.clear();
+        self.other_parent_set.clear();
+        self.set_parents(DirstateParents {
+            p1: NULL_REVISION.to_vec(),
+            p2: NULL_REVISION.to_vec(),
+        })
+    }
+
+    /// Add a tracked file to the dirstate
+    pub fn add_file(
+        &mut self,
+        filename: &[u8],
+        old_state: EntryState,
+        entry: DirstateEntry,
+    ) {
+        if old_state == EntryState::Unknown || old_state == EntryState::Removed
+        {
+            if let Some(ref mut dirs) = self.dirs {
+                dirs.add_path(filename)
+            }
+        }
+        if old_state == EntryState::Unknown {
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.add_path(filename)
+            }
+        }
+        self.state_map.insert(filename.to_owned(), entry.to_owned());
+
+        if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
+            self.non_normal_set.insert(filename.to_owned());
+        }
+
+        if entry.size == SIZE_DIRTY {
+            self.other_parent_set.insert(filename.to_owned());
+        }
+    }
+
+    /// Mark a file as removed in the dirstate.
+    ///
+    /// The `size` parameter is used to store sentinel values that indicate
+    /// the file's previous state.  In the future, we should refactor this
+    /// to be more explicit about what that state is.
+    pub fn remove_file(
+        &mut self,
+        filename: &[u8],
+        old_state: EntryState,
+        size: i32,
+    ) -> Result<(), DirstateMapError> {
+        if old_state != EntryState::Unknown && old_state != EntryState::Removed
+        {
+            if let Some(ref mut dirs) = self.dirs {
+                dirs.delete_path(filename)?;
+            }
+        }
+        if old_state == EntryState::Unknown {
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.add_path(filename);
+            }
+        }
+
+        if let Some(ref mut file_fold_map) = self.file_fold_map {
+            file_fold_map
+                .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
+        }
+        self.state_map.insert(
+            filename.to_owned(),
+            DirstateEntry {
+                state: EntryState::Removed,
+                mode: 0,
+                size: size,
+                mtime: 0,
+            },
+        );
+        self.non_normal_set.insert(filename.to_owned());
+        Ok(())
+    }
+
+    /// Remove a file from the dirstate.
+    /// Returns `true` if the file was previously recorded.
+    pub fn drop_file(
+        &mut self,
+        filename: &[u8],
+        old_state: EntryState,
+    ) -> Result<bool, DirstateMapError> {
+        let exists = self
+            .state_map
+            .remove::<Vec<u8>>(filename.to_owned().as_ref())
+            .is_some();
+
+        if exists {
+            if old_state != EntryState::Removed {
+                if let Some(ref mut dirs) = self.dirs {
+                    dirs.delete_path(filename)?;
+                }
+            }
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.delete_path(filename)?;
+            }
+        }
+        if let Some(ref mut file_fold_map) = self.file_fold_map {
+            file_fold_map
+                .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
+        }
+        self.non_normal_set
+            .remove::<Vec<u8>>(filename.to_owned().as_ref());
+
+        Ok(exists)
+    }
+
+    pub fn clear_ambiguous_times(
+        &mut self,
+        filenames: Vec<Vec<u8>>,
+        now: i32,
+    ) {
+        for filename in filenames {
+            let mut changed = false;
+            self.state_map
+                .entry(filename.to_owned())
+                .and_modify(|entry| {
+                    if entry.state == EntryState::Normal && entry.mtime == now
+                    {
+                        changed = true;
+                        *entry = DirstateEntry {
+                            mtime: MTIME_UNSET,
+                            ..*entry
+                        };
+                    }
+                });
+            if changed {
+                self.non_normal_set.insert(filename.to_owned());
+            }
+        }
+    }
+
+    pub fn non_normal_other_parent_entries(
+        &self,
+    ) -> (HashSet<Vec<u8>>, HashSet<Vec<u8>>) {
+        let mut non_normal = HashSet::new();
+        let mut other_parent = HashSet::new();
+
+        for (
+            filename,
+            DirstateEntry {
+                state, size, mtime, ..
+            },
+        ) in self.state_map.iter()
+        {
+            if *state != EntryState::Normal || *mtime == MTIME_UNSET {
+                non_normal.insert(filename.to_owned());
+            }
+            if *state == EntryState::Normal && *size == SIZE_DIRTY {
+                other_parent.insert(filename.to_owned());
+            }
+        }
+
+        (non_normal, other_parent)
+    }
+    pub fn set_all_dirs(&mut self) {
+        if self.all_dirs.is_none() {
+            self.all_dirs = Some(DirsMultiset::new(
+                DirsIterable::Dirstate(&self.state_map),
+                None,
+            ));
+        }
+    }
+    pub fn set_dirs(&mut self) {
+        if self.dirs.is_none() {
+            self.dirs = Some(DirsMultiset::new(
+                DirsIterable::Dirstate(&self.state_map),
+                Some(EntryState::Removed),
+            ));
+        }
+    }
+
+    pub fn has_tracked_dir(&mut self, directory: &[u8]) -> bool {
+        self.set_dirs();
+        self.dirs.as_ref().unwrap().contains_key(directory.as_ref())
+    }
+
+    pub fn has_dir(&mut self, directory: &[u8]) -> bool {
+        self.set_all_dirs();
+        self.all_dirs
+            .as_ref()
+            .unwrap()
+            .contains_key(directory.as_ref())
+    }
+
+    pub fn parents(
+        &mut self,
+        file_contents: &[u8],
+    ) -> Result<DirstateParents, DirstateError> {
+        if let Some(ref parents) = self.parents {
+            return Ok(parents.clone());
+        }
+        let parents;
+        if file_contents.len() == 40 {
+            parents = DirstateParents {
+                p1: file_contents[..PARENT_SIZE].to_owned(),
+                p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2].to_owned(),
+            };
+        } else if file_contents.is_empty() {
+            parents = DirstateParents {
+                p1: NULL_REVISION.to_owned(),
+                p2: NULL_REVISION.to_owned(),
+            };
+        } else {
+            return Err(DirstateError::Parse(DirstateParseError::Damaged));
+        }
+
+        self.parents = Some(parents.to_owned());
+        Ok(parents.clone())
+    }
+
+    pub fn set_parents(&mut self, parents: DirstateParents) {
+        self.parents = Some(parents.clone());
+        self.dirty_parents = true;
+    }
+
+    pub fn read(
+        &mut self,
+        file_contents: &[u8],
+    ) -> Result<Option<DirstateParents>, DirstateError> {
+        if file_contents.is_empty() {
+            return Ok(None);
+        }
+
+        let parents = parse_dirstate(
+            &mut self.state_map,
+            &mut self.copy_map,
+            file_contents,
+        )?;
+
+        if !self.dirty_parents {
+            self.set_parents(parents.to_owned());
+        }
+
+        Ok(Some(parents))
+    }
+
+    pub fn pack(
+        &mut self,
+        parents: DirstateParents,
+        now: Duration,
+    ) -> Result<Vec<u8>, DirstateError> {
+        let packed =
+            pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
+
+        self.dirty_parents = false;
+
+        let result = self.non_normal_other_parent_entries();
+        self.non_normal_set = result.0;
+        self.other_parent_set = result.1;
+        Ok(packed)
+    }
+}
+
+/// Holds all property-like functions to make Python happy in the short term
+impl DirstateMap {
+    pub fn property_file_fold_map(&mut self) -> FileFoldMap {
+        if let Some(ref file_fold_map) = self.file_fold_map {
+            return file_fold_map.to_owned();
+        }
+        let mut new_file_fold_map = FileFoldMap::new();
+        for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
+        {
+            if *state == EntryState::Removed {
+                new_file_fold_map.insert(
+                    filename.to_ascii_uppercase().to_owned(),
+                    filename.to_owned(),
+                );
+            }
+        }
+        self.file_fold_map = Some(new_file_fold_map);
+        self.file_fold_map.to_owned().unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_dirs_multiset() {
+        let mut map = DirstateMap::new();
+        assert!(map.dirs.is_none());
+        assert!(map.all_dirs.is_none());
+
+        assert_eq!(false, map.has_dir(b"nope"));
+        assert!(map.all_dirs.is_some());
+        assert!(map.dirs.is_none());
+
+        assert_eq!(false, map.has_tracked_dir(b"nope"));
+        assert!(map.dirs.is_some());
+    }
+
+    #[test]
+    fn test_add_file() {
+        let mut map = DirstateMap::new();
+
+        assert_eq!(0, map.len());
+
+        map.add_file(
+            b"meh",
+            EntryState::Normal,
+            DirstateEntry {
+                state: EntryState::Normal,
+                mode: 1337,
+                mtime: 1337,
+                size: 1337,
+            },
+        );
+
+        assert_eq!(1, map.len());
+        assert_eq!(0, map.non_normal_set.len());
+        assert_eq!(0, map.other_parent_set.len());
+    }
+
+    #[test]
+    fn test_non_normal_other_parent_entries() {
+        let map: DirstateMap = [
+            (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
+            (b"f2", (EntryState::Normal, 1337, 1337, -1)),
+            (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
+            (b"f4", (EntryState::Normal, 1337, -2, 1337)),
+            (b"f5", (EntryState::Added, 1337, 1337, 1337)),
+            (b"f6", (EntryState::Added, 1337, 1337, -1)),
+            (b"f7", (EntryState::Merged, 1337, 1337, -1)),
+            (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
+            (b"f9", (EntryState::Merged, 1337, -2, 1337)),
+            (b"fa", (EntryState::Added, 1337, -2, 1337)),
+            (b"fb", (EntryState::Removed, 1337, -2, 1337)),
+        ]
+        .iter()
+        .map(|(fname, (state, mode, size, mtime))| {
+            (
+                fname.to_vec(),
+                DirstateEntry {
+                    state: *state,
+                    mode: *mode,
+                    size: *size,
+                    mtime: *mtime,
+                },
+            )
+        })
+        .collect();
+
+        let non_normal = [
+            b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
+        ]
+        .iter()
+        .map(|x| x.to_vec())
+        .collect();
+
+        let mut other_parent = HashSet::new();
+        other_parent.insert(b"f4".to_vec());
+
+        assert_eq!(
+            (non_normal, other_parent),
+            map.non_normal_other_parent_entries()
+        );
+    }
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -8,6 +8,7 @@
 //! A multiset of directory names.
 //!
 //! Used to counts the references to directories in a manifest or dirstate.
+use dirstate::EntryState;
 use std::collections::hash_map::{Entry, Iter};
 use std::collections::HashMap;
 use {DirsIterable, DirstateEntry, DirstateMapError};
@@ -21,17 +22,20 @@
     /// Initializes the multiset from a dirstate or a manifest.
     ///
     /// If `skip_state` is provided, skips dirstate entries with equal state.
-    pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
+    pub fn new(
+        iterable: DirsIterable,
+        skip_state: Option<EntryState>,
+    ) -> Self {
         let mut multiset = DirsMultiset {
             inner: HashMap::new(),
         };
 
         match iterable {
             DirsIterable::Dirstate(vec) => {
-                for (ref filename, DirstateEntry { state, .. }) in vec {
+                for (filename, DirstateEntry { state, .. }) in vec {
                     // This `if` is optimized out of the loop
                     if let Some(skip) = skip_state {
-                        if skip != state {
+                        if skip != *state {
                             multiset.add_path(filename);
                         }
                     } else {
@@ -40,7 +44,7 @@
                 }
             }
             DirsIterable::Manifest(vec) => {
-                for ref filename in vec {
+                for filename in vec {
                     multiset.add_path(filename);
                 }
             }
@@ -140,10 +144,12 @@
 #[cfg(test)]
 mod tests {
     use super::*;
+    use dirstate::EntryState;
+    use std::collections::HashMap;
 
     #[test]
     fn test_delete_path_path_not_found() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"doesnotexist/";
         assert_eq!(
             Err(DirstateMapError::PathNotFound(path.to_vec())),
@@ -154,7 +160,7 @@
     #[test]
     fn test_delete_path_empty_path() {
         let mut map =
-            DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
+            DirsMultiset::new(DirsIterable::Manifest(&vec![vec![]]), None);
         let path = b"";
         assert_eq!(Ok(()), map.delete_path(path));
         assert_eq!(
@@ -194,7 +200,7 @@
 
     #[test]
     fn test_add_path_empty_path() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"";
         map.add_path(path);
 
@@ -203,7 +209,7 @@
 
     #[test]
     fn test_add_path_successful() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
 
         map.add_path(b"a/");
         assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
@@ -250,13 +256,13 @@
     fn test_dirsmultiset_new_empty() {
         use DirsIterable::{Dirstate, Manifest};
 
-        let new = DirsMultiset::new(Manifest(vec![]), None);
+        let new = DirsMultiset::new(Manifest(&vec![]), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
         assert_eq!(expected, new);
 
-        let new = DirsMultiset::new(Dirstate(vec![]), None);
+        let new = DirsMultiset::new(Dirstate(&HashMap::new()), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
@@ -276,7 +282,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), None);
+        let new = DirsMultiset::new(Manifest(&input_vec), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -288,7 +294,7 @@
                 (
                     f.as_bytes().to_vec(),
                     DirstateEntry {
-                        state: 0,
+                        state: EntryState::Normal,
                         mode: 0,
                         mtime: 0,
                         size: 0,
@@ -301,7 +307,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), None);
+        let new = DirsMultiset::new(Dirstate(&input_map), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -321,28 +327,33 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
+        let new =
+            DirsMultiset::new(Manifest(&input_vec), Some(EntryState::Normal));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
         // Skip does not affect a manifest
         assert_eq!(expected, new);
 
-        let input_map =
-            [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
-                .iter()
-                .map(|(f, state)| {
-                    (
-                        f.as_bytes().to_vec(),
-                        DirstateEntry {
-                            state: *state as i8,
-                            mode: 0,
-                            mtime: 0,
-                            size: 0,
-                        },
-                    )
-                })
-                .collect();
+        let input_map = [
+            ("a/", EntryState::Normal),
+            ("a/b/", EntryState::Normal),
+            ("a/c", EntryState::Removed),
+            ("a/d/", EntryState::Merged),
+        ]
+        .iter()
+        .map(|(f, state)| {
+            (
+                f.as_bytes().to_vec(),
+                DirstateEntry {
+                    state: *state,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                },
+            )
+        })
+        .collect();
 
         // "a" incremented with "a/c" and "a/d/"
         let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
@@ -350,7 +361,8 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
+        let new =
+            DirsMultiset::new(Dirstate(&input_map), Some(EntryState::Normal));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -13,6 +13,6 @@
 
 [dependencies]
 byteorder = "1.3.1"
+memchr = "2.2.0"
 lazy_static = "1.3.0"
-memchr = "2.2.0"
-regex = "^1.1"
+regex = "^1.1"
\ No newline at end of file
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -36,7 +36,7 @@
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "python3-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -65,7 +65,7 @@
 dependencies = [
  "cpython 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "hg-core 0.1.0",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "python3-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -75,7 +75,7 @@
 version = "0.1.0"
 dependencies = [
  "hg-core 0.1.0",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
@@ -85,7 +85,7 @@
 
 [[package]]
 name = "libc"
-version = "0.2.45"
+version = "0.2.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
 [[package]]
@@ -103,7 +103,7 @@
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
@@ -112,7 +112,7 @@
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
@@ -122,7 +122,7 @@
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
  "autocfg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -177,7 +177,7 @@
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -189,7 +189,7 @@
 dependencies = [
  "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "fuchsia-cprng 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -307,7 +307,7 @@
 "checksum cpython 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b489034e723e7f5109fecd19b719e664f89ef925be785885252469e9822fa940"
 "checksum fuchsia-cprng 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "81f7f8eb465745ea9b02e2704612a9946a59fa40572086c6fd49d6ddcf30bf31"
 "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
-"checksum libc 0.2.45 (registry+https://github.com/rust-lang/crates.io-index)" = "2d2857ec59fadc0773853c664d2d18e7198e83883e7060b63c924cb077bd5c74"
+"checksum libc 0.2.55 (registry+https://github.com/rust-lang/crates.io-index)" = "42914d39aad277d9e176efbdad68acb1d5443ab65afe0e0e4f0d49352a950880"
 "checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39"
 "checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
 "checksum python27-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "56114c37d4dca82526d74009df7782a28c871ac9d36b19d4cb9e67672258527e"
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -28,7 +28,7 @@
 )
 
 parsers = policy.importmod(r'parsers')
-dirstatemod = policy.importrust(r'dirstate', default=parsers)
+rustmod = policy.importrust(r'dirstate')
 
 propertycache = util.propertycache
 filecache = scmutil.filecache
@@ -652,7 +652,8 @@
         delaywrite = self._ui.configint('debug', 'dirstate.delaywrite')
         if delaywrite > 0:
             # do we have any files to delay for?
-            for f, e in self._map.iteritems():
+            items = self._map.iteritems()
+            for f, e in items:
                 if e[0] == 'n' and e[3] == now:
                     import time # to avoid useless import
                     # rather than sleep n seconds, sleep until the next
@@ -663,6 +664,12 @@
                     time.sleep(end - clock)
                     now = end # trust our estimate that the end is near now
                     break
+            # since the iterator is potentially not depleted,
+            # delete the iterator to release the reference for the Rust
+            # implementation.
+            # TODO make the Rust implementation behave like Python
+            # since this would not work with a non ref-counting GC.
+            del items
 
         self._map.write(st, now)
         self._lastnormaltime = 0
@@ -1475,7 +1482,9 @@
         # parsing the dirstate.
         #
         # (we cannot decorate the function directly since it is in a C module)
-        parse_dirstate = util.nogc(dirstatemod.parse_dirstate)
+        parse_dirstate = parsers.parse_dirstate
+
+        parse_dirstate = util.nogc(parse_dirstate)
         p = parse_dirstate(self._map, self.copymap, st)
         if not self._dirtyparents:
             self.setparents(*p)
@@ -1486,8 +1495,10 @@
         self.get = self._map.get
 
     def write(self, st, now):
-        st.write(dirstatemod.pack_dirstate(self._map, self.copymap,
-                                           self.parents(), now))
+        pack_dirstate = parsers.pack_dirstate
+
+        st.write(pack_dirstate(self._map, self.copymap,
+                                       self.parents(), now))
         st.close()
         self._dirtyparents = False
         self.nonnormalset, self.otherparentset = self.nonnormalentries()
@@ -1516,3 +1527,187 @@
         for name in self._dirs:
             f[normcase(name)] = name
         return f
+
+
+if rustmod is not None:
+    class dirstatemap(object):
+        def __init__(self, ui, opener, root):
+            self._ui = ui
+            self._opener = opener
+            self._root = root
+            self._filename = 'dirstate'
+            self._parents = None
+            self._dirtyparents = False
+
+            # for consistent view between _pl() and _read() invocations
+            self._pendingmode = None
+
+
+        def addfile(self, *args, **kwargs):
+            return self._rustmap.addfile(*args, **kwargs)
+
+        def removefile(self, *args, **kwargs):
+            return self._rustmap.removefile(*args, **kwargs)
+
+        def dropfile(self, *args, **kwargs):
+            return self._rustmap.dropfile(*args, **kwargs)
+
+        def clearambiguoustimes(self, *args, **kwargs):
+            return self._rustmap.clearambiguoustimes(*args, **kwargs)
+
+        def nonnormalentries(self):
+            return self._rustmap.nonnormalentries()
+
+        def get(self, *args, **kwargs):
+            return self._rustmap.get(*args, **kwargs)
+
+        @propertycache
+        def _rustmap(self):
+            self._rustmap = rustmod.DirstateMap(self._root)
+            self.read()
+            return self._rustmap
+
+        @property
+        def copymap(self):
+            return self._rustmap.copymap()
+
+        def preload(self):
+            self._rustmap
+
+        def clear(self):
+            self._rustmap.clear()
+            self.setparents(nullid, nullid)
+            util.clearcachedproperty(self, "_dirs")
+            util.clearcachedproperty(self, "_alldirs")
+            util.clearcachedproperty(self, "dirfoldmap")
+
+        def items(self):
+            return self._rustmap.items()
+
+        def keys(self):
+            return iter(self._rustmap)
+
+        def __contains__(self, key):
+            return key in self._rustmap
+
+        def __getitem__(self, item):
+            return self._rustmap[item]
+
+        def __len__(self):
+            return len(self._rustmap)
+
+        def __iter__(self):
+            return iter(self._rustmap)
+
+        # forward for python2,3 compat
+        iteritems = items
+
+        def _opendirstatefile(self):
+            fp, mode = txnutil.trypending(self._root, self._opener,
+                                          self._filename)
+            if self._pendingmode is not None and self._pendingmode != mode:
+                fp.close()
+                raise error.Abort(_('working directory state may be '
+                                    'changed parallelly'))
+            self._pendingmode = mode
+            return fp
+
+        def setparents(self, p1, p2):
+            self._rustmap.setparents(p1, p2)
+            self._parents = (p1, p2)
+            self._dirtyparents = True
+
+        def parents(self):
+            if not self._parents:
+                try:
+                    fp = self._opendirstatefile()
+                    st = fp.read(40)
+                    fp.close()
+                except IOError as err:
+                    if err.errno != errno.ENOENT:
+                        raise
+                    # File doesn't exist, so the current state is empty
+                    st = ''
+
+                try:
+                    self._parents = self._rustmap.parents(st)
+                except ValueError:
+                    raise error.Abort(_('working directory state appears '
+                                        'damaged!'))
+
+            return self._parents
+
+        def read(self):
+            # ignore HG_PENDING because identity is used only for writing
+            self.identity = util.filestat.frompath(
+                self._opener.join(self._filename))
+
+            try:
+                fp = self._opendirstatefile()
+                try:
+                    st = fp.read()
+                finally:
+                    fp.close()
+            except IOError as err:
+                if err.errno != errno.ENOENT:
+                    raise
+                return
+            if not st:
+                return
+
+            parse_dirstate = util.nogc(self._rustmap.read)
+            parents = parse_dirstate(st)
+            if parents and not self._dirtyparents:
+                self.setparents(*parents)
+
+        def write(self, st, now):
+            parents = self.parents()
+            st.write(self._rustmap.write(parents[0], parents[1], now))
+            st.close()
+            self._dirtyparents = False
+
+        @propertycache
+        def filefoldmap(self):
+            """Returns a dictionary mapping normalized case paths to their
+            non-normalized versions.
+            """
+            return self._rustmap.filefoldmapasdict()
+
+        def hastrackeddir(self, d):
+            self._dirs # Trigger Python's propertycache
+            return self._rustmap.hastrackeddir(d)
+
+        def hasdir(self, d):
+            self._dirs # Trigger Python's propertycache
+            return self._rustmap.hasdir(d)
+
+        @propertycache
+        def _dirs(self):
+            return self._rustmap.getdirs()
+
+        @propertycache
+        def _alldirs(self):
+            return self._rustmap.getalldirs()
+
+        @propertycache
+        def identity(self):
+            self._rustmap
+            return self.identity
+
+        @property
+        def nonnormalset(self):
+            nonnorm, otherparents = self._rustmap.nonnormalentries()
+            return nonnorm
+
+        @property
+        def otherparentset(self):
+            nonnorm, otherparents = self._rustmap.nonnormalentries()
+            return otherparents
+
+        @propertycache
+        def dirfoldmap(self):
+            f = {}
+            normcase = util.normcase
+            for name in self._dirs:
+                f[normcase(name)] = name
+            return f
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -459,7 +459,7 @@
     lfiles = set()
     for f in actions:
         splitstandin = lfutil.splitstandin(f)
-        if splitstandin in p1:
+        if splitstandin is not None and splitstandin in p1:
             lfiles.add(splitstandin)
         elif lfutil.standin(f) in p1:
             lfiles.add(f)



To: Alphare, #hg-reviewers
Cc: yuja, durin42, kevincox, mjpieters, mercurial-devel


More information about the Mercurial-devel mailing list