D7869: rust-dirs-multiset: add `DirsChildrenMultiset`

Alphare (Raphaël Gomès) phabricator at mercurial-scm.org
Tue Jan 14 12:35:08 EST 2020


Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  In a future patch, this structure will be needed to store information needed by
  the (also upcoming) `IgnoreMatcher`.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7869

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/utils/files.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -10,11 +10,11 @@
 //! Functions for fiddling with files.
 
 use crate::utils::hg_path::{HgPath, HgPathBuf};
-use std::iter::FusedIterator;
 
 use crate::utils::replace_slice;
 use lazy_static::lazy_static;
 use std::fs::Metadata;
+use std::iter::FusedIterator;
 use std::path::Path;
 
 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
@@ -64,6 +64,34 @@
 
 impl<'a> FusedIterator for Ancestors<'a> {}
 
+/// An iterator over repository path yielding itself and its ancestors.
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct AncestorsWithBase<'a> {
+    next: Option<(&'a HgPath, &'a HgPath)>,
+}
+
+impl<'a> Iterator for AncestorsWithBase<'a> {
+    type Item = (&'a HgPath, &'a HgPath);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let next = self.next;
+        self.next = match self.next {
+            Some((s, _)) if s.is_empty() => None,
+            Some((s, _)) => match s.bytes().rposition(|c| *c == b'/') {
+                None => Some((HgPath::new(""), s)),
+                Some(size) => Some((
+                    HgPath::new(&s.as_bytes()[..size]),
+                    HgPath::new(&s.as_bytes()[size + 1..]),
+                )),
+            },
+            None => None,
+        };
+        next
+    }
+}
+
+impl<'a> FusedIterator for AncestorsWithBase<'a> {}
+
 /// Returns an iterator yielding ancestor directories of the given repository
 /// path.
 ///
@@ -79,6 +107,25 @@
     dirs
 }
 
+/// Returns an iterator yielding ancestor directories of the given repository
+/// path.
+///
+/// The path is separated by '/', and must not start with '/'.
+///
+/// The path itself isn't included unless it is b"" (meaning the root
+/// directory.)
+pub(crate) fn find_dirs_with_base<'a>(
+    path: &'a HgPath,
+) -> AncestorsWithBase<'a> {
+    let mut dirs = AncestorsWithBase {
+        next: Some((path, HgPath::new(b""))),
+    };
+    if !path.is_empty() {
+        dirs.next(); // skip itself
+    }
+    dirs
+}
+
 /// TODO more than ASCII?
 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
     #[cfg(windows)] // NTFS compares via upper()
@@ -344,4 +391,28 @@
             )
         );
     }
+
+    #[test]
+    fn test_find_dirs_with_base_some() {
+        let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
+        assert_eq!(
+            dirs.next(),
+            Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
+        );
+        assert_eq!(
+            dirs.next(),
+            Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
+        );
+        assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
+        assert_eq!(dirs.next(), None);
+        assert_eq!(dirs.next(), None);
+    }
+
+    #[test]
+    fn test_find_dirs_with_base_empty() {
+        let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
+        assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
+        assert_eq!(dirs.next(), None);
+        assert_eq!(dirs.next(), None);
+    }
 }
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -8,12 +8,15 @@
 //! A multiset of directory names.
 //!
 //! Used to counts the references to directories in a manifest or dirstate.
-use crate::utils::hg_path::{HgPath, HgPathBuf};
 use crate::{
-    dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError,
-    FastHashMap,
+    dirstate::EntryState,
+    utils::{
+        files,
+        hg_path::{HgPath, HgPathBuf},
+    },
+    DirstateEntry, DirstateMapError, FastHashMap,
 };
-use std::collections::hash_map::{self, Entry};
+use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
 
 // could be encapsulated if we care API stability more seriously
 pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
@@ -129,6 +132,65 @@
     }
 }
 
+/// This is basically a reimplementation of `DirsMultiset` that stores the
+/// children instead of just a count of them, plus a small optional
+/// optimization to avoid some directories we don't need.
+#[derive(PartialEq, Debug)]
+pub struct DirsChildrenMultiset<'a> {
+    inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
+    only_include: Option<HashSet<&'a HgPath>>,
+}
+
+impl<'a> DirsChildrenMultiset<'a> {
+    pub fn new(
+        paths: impl Iterator<Item = &'a HgPathBuf>,
+        only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
+    ) -> Self {
+        let mut new = Self {
+            inner: HashMap::default(),
+            only_include: only_include
+                .map(|s| s.iter().map(|p| p.as_ref()).collect()),
+        };
+
+        for path in paths {
+            new.add_path(path)
+        }
+
+        new
+    }
+    fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
+        if path.as_ref().is_empty() {
+            return;
+        }
+        for (directory, basename) in files::find_dirs_with_base(path.as_ref())
+        {
+            if !match &self.only_include {
+                None => false,
+                Some(i) => i.contains(&directory),
+            } {
+                continue;
+            }
+            self.inner
+                .entry(directory)
+                .and_modify(|e| {
+                    e.insert(basename);
+                })
+                .or_insert_with(|| {
+                    let mut set = HashSet::new();
+                    set.insert(basename);
+                    set
+                });
+        }
+    }
+
+    pub fn get(
+        &self,
+        path: impl AsRef<HgPath>,
+    ) -> Option<&HashSet<&'a HgPath>> {
+        self.inner.get(path.as_ref())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;



To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list