D7819: rust-nodemap: core implementation for shortest
gracinet (Georges Racinet)
phabricator at mercurial-scm.org
Wed Jan 15 18:48:53 EST 2020
gracinet updated this revision to Diff 19331.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D7819?vs=19139&id=19331
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D7819/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D7819
AFFECTED FILES
rust/hg-core/src/revlog/node.rs
rust/hg-core/src/revlog/nodemap.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/revlog/nodemap.rs b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -16,6 +16,7 @@
node::get_nybble, node::NULL_NODE, Node, NodeError, NodePrefix,
NodePrefixRef, Revision, RevlogIndex, NULL_REVISION,
};
+use std::cmp::max;
use std::fmt;
use std::mem;
use std::ops::Deref;
@@ -47,6 +48,20 @@
prefix: NodePrefixRef<'a>,
) -> Result<Option<Revision>, NodeMapError>;
+ /// Give the size of the shortest node prefix that determines
+ /// the revision uniquely.
+ ///
+ /// From a binary node prefix, if it is matched in the node map, this
+ /// returns the number of hexadecimal digits that would had sufficed
+ /// to find the revision uniquely.
+ ///
+ /// Returns `None` if no `Revision` could be found for the prefix.
+ fn shortest_bin<'a>(
+ &self,
+ idx: &impl RevlogIndex,
+ node_prefix: NodePrefixRef<'a>,
+ ) -> Result<Option<usize>, NodeMapError>;
+
fn find_hex(
&self,
idx: &impl RevlogIndex,
@@ -54,6 +69,16 @@
) -> Result<Option<Revision>, NodeMapError> {
self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
}
+
+ /// Same as `shortest_bin`, with the hexadecimal representation of the
+ /// prefix as input.
+ fn shortest_hex(
+ &self,
+ idx: &impl RevlogIndex,
+ prefix: &str,
+ ) -> Result<Option<usize>, NodeMapError> {
+ self.shortest_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
+ }
}
pub trait MutableNodeMap: NodeMap {
@@ -215,20 +240,24 @@
fn validate_candidate<'p>(
idx: &impl RevlogIndex,
prefix: NodePrefixRef<'p>,
- rev: Option<Revision>,
-) -> Result<Option<Revision>, NodeMapError> {
- if prefix.is_prefix_of(&NULL_NODE) {
- // NULL_REVISION always matches a prefix made only of zeros
+ cand: (Option<Revision>, usize),
+) -> Result<(Option<Revision>, usize), NodeMapError> {
+ let (rev, steps) = cand;
+ if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
+ rev.map_or(Ok((None, steps)), |r| {
+ has_prefix_or_none(idx, prefix, r)
+ .map(|opt| (opt, max(steps, nz_nybble + 1)))
+ })
+ } else {
+ // the prefix is only made of zeros; NULL_REVISION always matches it
// and any other *valid* result is an ambiguity
match rev {
- None => Ok(Some(NULL_REVISION)),
+ None => Ok((Some(NULL_REVISION), steps + 1)),
Some(r) => match has_prefix_or_none(idx, prefix, r)? {
- None => Ok(Some(NULL_REVISION)),
+ None => Ok((Some(NULL_REVISION), steps + 1)),
_ => Err(NodeMapError::MultipleResults),
},
}
- } else {
- rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
}
}
@@ -308,10 +337,10 @@
fn lookup<'p>(
&self,
prefix: NodePrefixRef<'p>,
- ) -> Result<Option<Revision>, NodeMapError> {
- for (leaf, _, _, opt) in self.visit(prefix) {
+ ) -> Result<(Option<Revision>, usize), NodeMapError> {
+ for (i, (leaf, _, _, opt)) in self.visit(prefix).enumerate() {
if leaf {
- return Ok(opt);
+ return Ok((opt, i + 1));
}
}
Err(NodeMapError::MultipleResults)
@@ -540,6 +569,16 @@
prefix: NodePrefixRef<'a>,
) -> Result<Option<Revision>, NodeMapError> {
validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+ .map(|(opt, _shortest)| opt)
+ }
+
+ fn shortest_bin<'a>(
+ &self,
+ idx: &impl RevlogIndex,
+ prefix: NodePrefixRef<'a>,
+ ) -> Result<Option<usize>, NodeMapError> {
+ validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+ .map(|(opt, shortest)| opt.map(|_rev| shortest))
}
}
@@ -665,6 +704,7 @@
assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
+ assert_eq!(nt.shortest_hex(&idx, "00a"), Ok(Some(3)));
assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
}
@@ -684,8 +724,10 @@
};
assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
+ assert_eq!(nt.shortest_hex(&idx, "c")?, Some(1));
assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
+ assert_eq!(nt.shortest_hex(&idx, "000")?, Some(3));
assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
Ok(())
}
@@ -721,6 +763,13 @@
self.nt.find_hex(&self.index, prefix)
}
+ fn shortest_hex(
+ &self,
+ prefix: &str,
+ ) -> Result<Option<usize>, NodeMapError> {
+ self.nt.shortest_hex(&self.index, prefix)
+ }
+
/// Drain `added` and restart a new one
fn commit(self) -> Self {
let mut as_vec: Vec<Block> =
@@ -773,6 +822,28 @@
}
#[test]
+ fn test_shortest_zero_prefix() {
+ let mut idx = TestNtIndex::new();
+ idx.insert(0, "00000abcd").unwrap();
+
+ assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
+ // in the nodetree proper, this will be found at the first nybble
+ // yet the correct answer for shortest is not 1, nor 1+1, but the
+ // the first difference with `NULL_NODE`
+ assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
+ assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
+
+ // same with odd result
+ idx.insert(1, "00123").unwrap();
+ assert_eq!(idx.shortest_hex("001"), Ok(Some(3)));
+ assert_eq!(idx.shortest_hex("0012"), Ok(Some(3)));
+
+ // these are unchanged of course
+ assert_eq!(idx.shortest_hex("00000a"), Ok(Some(6)));
+ assert_eq!(idx.shortest_hex("00000ab"), Ok(Some(6)));
+ }
+
+ #[test]
fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
// check that the splitting loop is long enough
let mut nt_idx = TestNtIndex::new();
diff --git a/rust/hg-core/src/revlog/node.rs b/rust/hg-core/src/revlog/node.rs
--- a/rust/hg-core/src/revlog/node.rs
+++ b/rust/hg-core/src/revlog/node.rs
@@ -127,6 +127,36 @@
pub fn get_nybble(&self, i: usize) -> u8 {
get_nybble(i, self.buf)
}
+
+ /// Return the index first nybble that's different from `node`
+ ///
+ /// If the return value is `None` that means that `self` is
+ /// a prefix of `node`, but the current method is a bit slower
+ /// than `is_prefix_of`.
+ ///
+ /// Returned index is as in `get_nybble`, i.e., starting at 0.
+ pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
+ let buf = self.buf;
+ let until = if self.is_odd {
+ buf.len() - 1
+ } else {
+ buf.len()
+ };
+ for i in 0..until {
+ if buf[i] != node[i] {
+ if buf[i] & 0xf0 == node[i] & 0xf0 {
+ return Some(2 * i + 1);
+ } else {
+ return Some(2 * i);
+ }
+ }
+ }
+ if self.is_odd && buf[until] & 0xf0 != node[until] & 0xf0 {
+ Some(until * 2)
+ } else {
+ None
+ }
+ }
}
/// A shortcut for full `Node` references
@@ -235,4 +265,34 @@
assert_eq!(prefix.borrow().get_nybble(7), 9);
Ok(())
}
+
+ #[test]
+ fn test_first_different_nybble_even_prefix() {
+ let prefix = NodePrefix::from_hex("12ca").unwrap();
+ let prefref = prefix.borrow();
+ let mut node: Node = [0; 20];
+ assert_eq!(prefref.first_different_nybble(&node), Some(0));
+ node[0] = 0x13;
+ assert_eq!(prefref.first_different_nybble(&node), Some(1));
+ node[0] = 0x12;
+ assert_eq!(prefref.first_different_nybble(&node), Some(2));
+ node[1] = 0xca;
+ // now it is a prefix
+ assert_eq!(prefref.first_different_nybble(&node), None);
+ }
+
+ #[test]
+ fn test_first_different_nybble_odd_prefix() {
+ let prefix = NodePrefix::from_hex("12c").unwrap();
+ let prefref = prefix.borrow();
+ let mut node: Node = [0; 20];
+ assert_eq!(prefref.first_different_nybble(&node), Some(0));
+ node[0] = 0x13;
+ assert_eq!(prefref.first_different_nybble(&node), Some(1));
+ node[0] = 0x12;
+ assert_eq!(prefref.first_different_nybble(&node), Some(2));
+ node[1] = 0xca;
+ // now it is a prefix
+ assert_eq!(prefref.first_different_nybble(&node), None);
+ }
}
To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list