D7819: rust-nodemap: core implementation for shortest

gracinet (Georges Racinet) phabricator at mercurial-scm.org
Wed Jan 15 18:48:53 EST 2020


gracinet updated this revision to Diff 19331.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7819?vs=19139&id=19331

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7819/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7819

AFFECTED FILES
  rust/hg-core/src/revlog/node.rs
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -16,6 +16,7 @@
     node::get_nybble, node::NULL_NODE, Node, NodeError, NodePrefix,
     NodePrefixRef, Revision, RevlogIndex, NULL_REVISION,
 };
+use std::cmp::max;
 use std::fmt;
 use std::mem;
 use std::ops::Deref;
@@ -47,6 +48,20 @@
         prefix: NodePrefixRef<'a>,
     ) -> Result<Option<Revision>, NodeMapError>;
 
+    /// Give the size of the shortest node prefix that determines
+    /// the revision uniquely.
+    ///
+    /// From a binary node prefix, if it is matched in the node map, this
+    /// returns the number of hexadecimal digits that would had sufficed
+    /// to find the revision uniquely.
+    ///
+    /// Returns `None` if no `Revision` could be found for the prefix.
+    fn shortest_bin<'a>(
+        &self,
+        idx: &impl RevlogIndex,
+        node_prefix: NodePrefixRef<'a>,
+    ) -> Result<Option<usize>, NodeMapError>;
+
     fn find_hex(
         &self,
         idx: &impl RevlogIndex,
@@ -54,6 +69,16 @@
     ) -> Result<Option<Revision>, NodeMapError> {
         self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
     }
+
+    /// Same as `shortest_bin`, with the hexadecimal representation of the
+    /// prefix as input.
+    fn shortest_hex(
+        &self,
+        idx: &impl RevlogIndex,
+        prefix: &str,
+    ) -> Result<Option<usize>, NodeMapError> {
+        self.shortest_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
+    }
 }
 
 pub trait MutableNodeMap: NodeMap {
@@ -215,20 +240,24 @@
 fn validate_candidate<'p>(
     idx: &impl RevlogIndex,
     prefix: NodePrefixRef<'p>,
-    rev: Option<Revision>,
-) -> Result<Option<Revision>, NodeMapError> {
-    if prefix.is_prefix_of(&NULL_NODE) {
-        // NULL_REVISION always matches a prefix made only of zeros
+    cand: (Option<Revision>, usize),
+) -> Result<(Option<Revision>, usize), NodeMapError> {
+    let (rev, steps) = cand;
+    if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
+        rev.map_or(Ok((None, steps)), |r| {
+            has_prefix_or_none(idx, prefix, r)
+                .map(|opt| (opt, max(steps, nz_nybble + 1)))
+        })
+    } else {
+        // the prefix is only made of zeros; NULL_REVISION always matches it
         // and any other *valid* result is an ambiguity
         match rev {
-            None => Ok(Some(NULL_REVISION)),
+            None => Ok((Some(NULL_REVISION), steps + 1)),
             Some(r) => match has_prefix_or_none(idx, prefix, r)? {
-                None => Ok(Some(NULL_REVISION)),
+                None => Ok((Some(NULL_REVISION), steps + 1)),
                 _ => Err(NodeMapError::MultipleResults),
             },
         }
-    } else {
-        rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
     }
 }
 
@@ -308,10 +337,10 @@
     fn lookup<'p>(
         &self,
         prefix: NodePrefixRef<'p>,
-    ) -> Result<Option<Revision>, NodeMapError> {
-        for (leaf, _, _, opt) in self.visit(prefix) {
+    ) -> Result<(Option<Revision>, usize), NodeMapError> {
+        for (i, (leaf, _, _, opt)) in self.visit(prefix).enumerate() {
             if leaf {
-                return Ok(opt);
+                return Ok((opt, i + 1));
             }
         }
         Err(NodeMapError::MultipleResults)
@@ -540,6 +569,16 @@
         prefix: NodePrefixRef<'a>,
     ) -> Result<Option<Revision>, NodeMapError> {
         validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+            .map(|(opt, _shortest)| opt)
+    }
+
+    fn shortest_bin<'a>(
+        &self,
+        idx: &impl RevlogIndex,
+        prefix: NodePrefixRef<'a>,
+    ) -> Result<Option<usize>, NodeMapError> {
+        validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+            .map(|(opt, shortest)| opt.map(|_rev| shortest))
     }
 }
 
@@ -665,6 +704,7 @@
         assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
         assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
         assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
+        assert_eq!(nt.shortest_hex(&idx, "00a"), Ok(Some(3)));
         assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
     }
 
@@ -684,8 +724,10 @@
         };
         assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
         assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
+        assert_eq!(nt.shortest_hex(&idx, "c")?, Some(1));
         assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
         assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
+        assert_eq!(nt.shortest_hex(&idx, "000")?, Some(3));
         assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
         Ok(())
     }
@@ -721,6 +763,13 @@
             self.nt.find_hex(&self.index, prefix)
         }
 
+        fn shortest_hex(
+            &self,
+            prefix: &str,
+        ) -> Result<Option<usize>, NodeMapError> {
+            self.nt.shortest_hex(&self.index, prefix)
+        }
+
         /// Drain `added` and restart a new one
         fn commit(self) -> Self {
             let mut as_vec: Vec<Block> =
@@ -773,6 +822,28 @@
     }
 
     #[test]
+    fn test_shortest_zero_prefix() {
+        let mut idx = TestNtIndex::new();
+        idx.insert(0, "00000abcd").unwrap();
+
+        assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
+        // in the nodetree proper, this will be found at the first nybble
+        // yet the correct answer for shortest is not 1, nor 1+1, but the
+        // the first difference with `NULL_NODE`
+        assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
+        assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
+
+        // same with odd result
+        idx.insert(1, "00123").unwrap();
+        assert_eq!(idx.shortest_hex("001"), Ok(Some(3)));
+        assert_eq!(idx.shortest_hex("0012"), Ok(Some(3)));
+
+        // these are unchanged of course
+        assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
+        assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
+    }
+
+    #[test]
     fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
         // check that the splitting loop is long enough
         let mut nt_idx = TestNtIndex::new();
diff --git a/rust/hg-core/src/revlog/node.rs b/rust/hg-core/src/revlog/node.rs
--- a/rust/hg-core/src/revlog/node.rs
+++ b/rust/hg-core/src/revlog/node.rs
@@ -127,6 +127,36 @@
     pub fn get_nybble(&self, i: usize) -> u8 {
         get_nybble(i, self.buf)
     }
+
+    /// Return the index first nybble that's different from `node`
+    ///
+    /// If the return value is `None` that means that `self` is
+    /// a prefix of `node`, but the current method is a bit slower
+    /// than `is_prefix_of`.
+    ///
+    /// Returned index is as in `get_nybble`, i.e., starting at 0.
+    pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
+        let buf = self.buf;
+        let until = if self.is_odd {
+            buf.len() - 1
+        } else {
+            buf.len()
+        };
+        for i in 0..until {
+            if buf[i] != node[i] {
+                if buf[i] & 0xf0 == node[i] & 0xf0 {
+                    return Some(2 * i + 1);
+                } else {
+                    return Some(2 * i);
+                }
+            }
+        }
+        if self.is_odd && buf[until] & 0xf0 != node[until] & 0xf0 {
+            Some(until * 2)
+        } else {
+            None
+        }
+    }
 }
 
 /// A shortcut for full `Node` references
@@ -235,4 +265,34 @@
         assert_eq!(prefix.borrow().get_nybble(7), 9);
         Ok(())
     }
+
+    #[test]
+    fn test_first_different_nybble_even_prefix() {
+        let prefix = NodePrefix::from_hex("12ca").unwrap();
+        let prefref = prefix.borrow();
+        let mut node: Node = [0; 20];
+        assert_eq!(prefref.first_different_nybble(&node), Some(0));
+        node[0] = 0x13;
+        assert_eq!(prefref.first_different_nybble(&node), Some(1));
+        node[0] = 0x12;
+        assert_eq!(prefref.first_different_nybble(&node), Some(2));
+        node[1] = 0xca;
+        // now it is a prefix
+        assert_eq!(prefref.first_different_nybble(&node), None);
+    }
+
+    #[test]
+    fn test_first_different_nybble_odd_prefix() {
+        let prefix = NodePrefix::from_hex("12c").unwrap();
+        let prefref = prefix.borrow();
+        let mut node: Node = [0; 20];
+        assert_eq!(prefref.first_different_nybble(&node), Some(0));
+        node[0] = 0x13;
+        assert_eq!(prefref.first_different_nybble(&node), Some(1));
+        node[0] = 0x12;
+        assert_eq!(prefref.first_different_nybble(&node), Some(2));
+        node[1] = 0xca;
+        // now it is a prefix
+        assert_eq!(prefref.first_different_nybble(&node), None);
+    }
 }



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list