D7819: rust-nodemap: core implementation for shortest

gracinet (Georges Racinet) phabricator at mercurial-scm.org
Fri Jan 10 10:04:36 EST 2020


gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  In this implementation, we just make `lookup()` return also the number
  of steps that have been needed to come to a conclusion from the
  nodetree data, and `validate_candidate()` takes care of the special
  cases related to `NULL_NODE`.
  
  This way of doing minimizes code duplication, but it means that
  the comparatively slower finding of first non zero nybble will run
  for all calls to `find()` where it is not needed.
  
  Still running on the file generated for the mozilla-central repository,
  it seems indeed that we now get more ofter 320 ns than 310. The odds that
  this could have a significant impact on real life Mercurial performance
  are still looking low. Let's wait for actual benchmark runs to see if
  an optimization is needed here.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7819

AFFECTED FILES
  rust/hg-core/src/revlog/node.rs
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -16,6 +16,7 @@
     node::get_nybble, node::NULL_NODE, Node, NodeError, NodePrefix,
     NodePrefixRef, Revision, RevlogIndex, NULL_REVISION,
 };
+use std::cmp::max;
 use std::fmt;
 use std::mem;
 use std::ops::Deref;
@@ -47,6 +48,20 @@
         prefix: NodePrefixRef<'a>,
     ) -> Result<Option<Revision>, NodeMapError>;
 
+    /// Give the size of the shortest node prefix that determines
+    /// the revision uniquely.
+    ///
+    /// From a binary node prefix, if it is matched in the node map, this
+    /// returns the number of hexadecimal digits that would had sufficed
+    /// to find the revision uniquely.
+    ///
+    /// Returns `None` if no `Revision` could be found for the prefix.
+    fn shortest_bin<'a>(
+        &self,
+        idx: &impl RevlogIndex,
+        node_prefix: NodePrefixRef<'a>,
+    ) -> Result<Option<usize>, NodeMapError>;
+
     fn find_hex(
         &self,
         idx: &impl RevlogIndex,
@@ -54,6 +69,16 @@
     ) -> Result<Option<Revision>, NodeMapError> {
         self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
     }
+
+    /// Same as `shortest_bin`, with the hexadecimal representation of the
+    /// prefix as input.
+    fn shortest_hex(
+        &self,
+        idx: &impl RevlogIndex,
+        prefix: &str,
+    ) -> Result<Option<usize>, NodeMapError> {
+        self.shortest_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
+    }
 }
 
 pub trait MutableNodeMap: NodeMap {
@@ -217,20 +242,24 @@
 fn validate_candidate<'p>(
     idx: &impl RevlogIndex,
     prefix: NodePrefixRef<'p>,
-    rev: Option<Revision>,
-) -> Result<Option<Revision>, NodeMapError> {
-    if prefix.is_prefix_of(&NULL_NODE) {
-        // NULL_REVISION always matches a prefix made only of zeros
+    cand: (Option<Revision>, usize),
+) -> Result<(Option<Revision>, usize), NodeMapError> {
+    let (rev, steps) = cand;
+    if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
+        rev.map_or(Ok((None, steps)), |r| {
+            has_prefix_or_none(idx, prefix, r)
+                .map(|opt| (opt, max(steps, nz_nybble + 1)))
+        })
+    } else {
+        // the prefix is only made of zeros; NULL_REVISION always matches it
         // and any other *valid* result is an ambiguity
         match rev {
-            None => Ok(Some(NULL_REVISION)),
+            None => Ok((Some(NULL_REVISION), steps + 1)),
             Some(r) => match has_prefix_or_none(idx, prefix, r)? {
-                None => Ok(Some(NULL_REVISION)),
+                None => Ok((Some(NULL_REVISION), steps + 1)),
                 _ => Err(NodeMapError::MultipleResults),
             },
         }
-    } else {
-        rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
     }
 }
 
@@ -310,11 +339,13 @@
     fn lookup<'p>(
         &self,
         prefix: NodePrefixRef<'p>,
-    ) -> Result<Option<Revision>, NodeMapError> {
+    ) -> Result<(Option<Revision>, usize), NodeMapError> {
+        let mut i = 1;
         for (leaf, _, _, opt) in self.visit(prefix) {
             if leaf {
-                return Ok(opt);
+                return Ok((opt, i));
             }
+            i += 1;
         }
         Err(NodeMapError::MultipleResults)
     }
@@ -542,6 +573,16 @@
         prefix: NodePrefixRef<'a>,
     ) -> Result<Option<Revision>, NodeMapError> {
         validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+            .map(|(opt, _shortest)| opt)
+    }
+
+    fn shortest_bin<'a>(
+        &self,
+        idx: &impl RevlogIndex,
+        prefix: NodePrefixRef<'a>,
+    ) -> Result<Option<usize>, NodeMapError> {
+        validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+            .map(|(opt, shortest)| opt.map(|_rev| shortest))
     }
 }
 
@@ -667,6 +708,7 @@
         assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
         assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
         assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
+        assert_eq!(nt.shortest_hex(&idx, "00a"), Ok(Some(3)));
         assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
     }
 
@@ -686,8 +728,10 @@
         };
         assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
         assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
+        assert_eq!(nt.shortest_hex(&idx, "c")?, Some(1));
         assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
         assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
+        assert_eq!(nt.shortest_hex(&idx, "000")?, Some(3));
         assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
         Ok(())
     }
@@ -723,6 +767,13 @@
             self.nt.find_hex(&self.index, prefix)
         }
 
+        fn shortest_hex(
+            &self,
+            prefix: &str,
+        ) -> Result<Option<usize>, NodeMapError> {
+            self.nt.shortest_hex(&self.index, prefix)
+        }
+
         /// Drain `added` and restart a new one
         fn commit(self) -> Self {
             let mut as_vec: Vec<Block> =
@@ -775,6 +826,28 @@
     }
 
     #[test]
+    fn test_shortest_zero_prefix() {
+        let mut idx = TestNtIndex::new();
+        idx.insert(0, "00000abcd").unwrap();
+
+        assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
+        // in the nodetree proper, this will be found at the first nybble
+        // yet the correct answer for shortest is not 1, nor 1+1, but the
+        // the first difference with `NULL_NODE`
+        assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
+        assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
+
+        // same with odd result
+        idx.insert(1, "00123").unwrap();
+        assert_eq!(idx.shortest_hex("001"), Ok(Some(3)));
+        assert_eq!(idx.shortest_hex("0012"), Ok(Some(3)));
+
+        // these are unchanged of course
+        assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
+        assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
+    }
+
+    #[test]
     fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
         // check that the splitting loop is long enough
         let mut nt_idx = TestNtIndex::new();
diff --git a/rust/hg-core/src/revlog/node.rs b/rust/hg-core/src/revlog/node.rs
--- a/rust/hg-core/src/revlog/node.rs
+++ b/rust/hg-core/src/revlog/node.rs
@@ -127,6 +127,36 @@
     pub fn get_nybble(&self, i: usize) -> u8 {
         get_nybble(i, self.buf)
     }
+
+    /// Return the index first nybble that's different from `node`
+    ///
+    /// If the return value is `None` that means that `self` is
+    /// a prefix of `node`, but the current method is a bit slower
+    /// than `is_prefix_of`.
+    ///
+    /// Returned index is as in `get_nybble`, i.e., starting at 0.
+    pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
+        let buf = self.buf;
+        let until = if self.is_odd {
+            buf.len() - 1
+        } else {
+            buf.len()
+        };
+        for i in 0..until {
+            if buf[i] != node[i] {
+                if buf[i] & 0xf0 == node[i] & 0xf0 {
+                    return Some(2 * i + 1);
+                } else {
+                    return Some(2 * i);
+                }
+            }
+        }
+        if self.is_odd && buf[until] & 0xf0 != node[until] & 0xf0 {
+            Some(until * 2)
+        } else {
+            None
+        }
+    }
 }
 
 /// A shortcut for full `Node` references
@@ -235,4 +265,34 @@
         assert_eq!(prefix.borrow().get_nybble(7), 9);
         Ok(())
     }
+
+    #[test]
+    fn test_first_different_nybble_even_prefix() {
+        let prefix = NodePrefix::from_hex("12ca").unwrap();
+        let prefref = prefix.borrow();
+        let mut node: Node = [0; 20];
+        assert_eq!(prefref.first_different_nybble(&node), Some(0));
+        node[0] = 0x13;
+        assert_eq!(prefref.first_different_nybble(&node), Some(1));
+        node[0] = 0x12;
+        assert_eq!(prefref.first_different_nybble(&node), Some(2));
+        node[1] = 0xca;
+        // now it is a prefix
+        assert_eq!(prefref.first_different_nybble(&node), None);
+    }
+
+    #[test]
+    fn test_first_different_nybble_odd_prefix() {
+        let prefix = NodePrefix::from_hex("12c").unwrap();
+        let prefref = prefix.borrow();
+        let mut node: Node = [0; 20];
+        assert_eq!(prefref.first_different_nybble(&node), Some(0));
+        node[0] = 0x13;
+        assert_eq!(prefref.first_different_nybble(&node), Some(1));
+        node[0] = 0x12;
+        assert_eq!(prefref.first_different_nybble(&node), Some(2));
+        node[1] = 0xca;
+        // now it is a prefix
+        assert_eq!(prefref.first_different_nybble(&node), None);
+    }
 }



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list