D7819: rust-nodemap: core implementation for shortest
gracinet (Georges Racinet)
phabricator at mercurial-scm.org
Mon Jan 27 11:55:51 EST 2020
gracinet marked an inline comment as done.
gracinet updated this revision to Diff 19640.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D7819?vs=19639&id=19640
BRANCH
default
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D7819/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D7819
AFFECTED FILES
rust/hg-core/src/revlog/node.rs
rust/hg-core/src/revlog/nodemap.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/revlog/nodemap.rs b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -17,6 +17,7 @@
RevlogIndex, NULL_REVISION,
};
+use std::cmp::max;
use std::fmt;
use std::mem;
use std::ops::Deref;
@@ -98,6 +99,42 @@
) -> Result<Option<Revision>, NodeMapError> {
self.find_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
}
+
+ /// Give the size of the shortest node prefix that determines
+ /// the revision uniquely.
+ ///
+ /// From a binary node prefix, if it is matched in the node map, this
+ /// returns the number of hexadecimal digits that would had sufficed
+ /// to find the revision uniquely.
+ ///
+ /// Returns `None` if no `Revision` could be found for the prefix.
+ ///
+ /// If several Revisions match the given prefix, a [`MultipleResults`]
+ /// error is returned.
+ fn unique_prefix_len_bin<'a>(
+ &self,
+ idx: &impl RevlogIndex,
+ node_prefix: NodePrefixRef<'a>,
+ ) -> Result<Option<usize>, NodeMapError>;
+
+ /// Same as `unique_prefix_len_bin`, with the hexadecimal representation
+ /// of the prefix as input.
+ fn unique_prefix_len_hex(
+ &self,
+ idx: &impl RevlogIndex,
+ prefix: &str,
+ ) -> Result<Option<usize>, NodeMapError> {
+ self.unique_prefix_len_bin(idx, NodePrefix::from_hex(prefix)?.borrow())
+ }
+
+ /// Same as `unique_prefix_len_bin`, with a full `Node` as input
+ fn unique_prefix_len_node(
+ &self,
+ idx: &impl RevlogIndex,
+ node: &Node,
+ ) -> Result<Option<usize>, NodeMapError> {
+ self.unique_prefix_len_bin(idx, node.into())
+ }
}
pub trait MutableNodeMap: NodeMap {
@@ -259,20 +296,24 @@
fn validate_candidate<'p>(
idx: &impl RevlogIndex,
prefix: NodePrefixRef<'p>,
- rev: Option<Revision>,
-) -> Result<Option<Revision>, NodeMapError> {
- if prefix.is_prefix_of(&NULL_NODE) {
- // NULL_REVISION always matches a prefix made only of zeros
+ candidate: (Option<Revision>, usize),
+) -> Result<(Option<Revision>, usize), NodeMapError> {
+ let (rev, steps) = candidate;
+ if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) {
+ rev.map_or(Ok((None, steps)), |r| {
+ has_prefix_or_none(idx, prefix, r)
+ .map(|opt| (opt, max(steps, nz_nybble + 1)))
+ })
+ } else {
+ // the prefix is only made of zeros; NULL_REVISION always matches it
// and any other *valid* result is an ambiguity
match rev {
- None => Ok(Some(NULL_REVISION)),
+ None => Ok((Some(NULL_REVISION), steps + 1)),
Some(r) => match has_prefix_or_none(idx, prefix, r)? {
- None => Ok(Some(NULL_REVISION)),
+ None => Ok((Some(NULL_REVISION), steps + 1)),
_ => Err(NodeMapError::MultipleResults),
},
}
- } else {
- rev.map_or(Ok(None), |r| has_prefix_or_none(idx, prefix, r))
}
}
@@ -356,13 +397,26 @@
}
/// Main working method for `NodeTree` searches
+ ///
+ /// The first returned value is the result of analysing `NodeTree` data
+ /// *alone*: whereas `None` guarantees that the given prefix is absent
+ /// from the `NodeTree` data (but still could match `NULL_NODE`), with
+ /// `Some(rev)`, it is to be understood that `rev` is the unique `Revision`
+ /// that could match the prefix. Actually, all that can be inferred from
+ /// the `NodeTree` data is that `rev` is the revision with the longest
+ /// common node prefix with the given prefix.
+ ///
+ /// The second returned value is the size of the smallest subprefix
+ /// of `prefix` that would give the same result, i.e. not the
+ /// `MultipleResults` error variant (again, using only the data of the
+ /// `NodeTree`).
fn lookup<'p>(
&self,
prefix: NodePrefixRef<'p>,
- ) -> Result<Option<Revision>, NodeMapError> {
- for visit_item in self.visit(prefix) {
+ ) -> Result<(Option<Revision>, usize), NodeMapError> {
+ for (i, visit_item) in self.visit(prefix).enumerate() {
if let Some(opt) = visit_item.final_revision() {
- return Ok(opt);
+ return Ok((opt, i + 1));
}
}
Err(NodeMapError::MultipleResults)
@@ -607,6 +661,16 @@
prefix: NodePrefixRef<'a>,
) -> Result<Option<Revision>, NodeMapError> {
validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+ .map(|(opt, _shortest)| opt)
+ }
+
+ fn unique_prefix_len_bin<'a>(
+ &self,
+ idx: &impl RevlogIndex,
+ prefix: NodePrefixRef<'a>,
+ ) -> Result<Option<usize>, NodeMapError> {
+ validate_candidate(idx, prefix.clone(), self.lookup(prefix)?)
+ .map(|(opt, shortest)| opt.map(|_rev| shortest))
}
}
@@ -737,6 +801,7 @@
assert_eq!(nt.find_hex(&idx, "01"), Ok(Some(9)));
assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
assert_eq!(nt.find_hex(&idx, "00a"), Ok(Some(0)));
+ assert_eq!(nt.unique_prefix_len_hex(&idx, "00a"), Ok(Some(3)));
assert_eq!(nt.find_hex(&idx, "000"), Ok(Some(NULL_REVISION)));
}
@@ -756,8 +821,10 @@
};
assert_eq!(nt.find_hex(&idx, "10")?, Some(1));
assert_eq!(nt.find_hex(&idx, "c")?, Some(2));
+ assert_eq!(nt.unique_prefix_len_hex(&idx, "c")?, Some(1));
assert_eq!(nt.find_hex(&idx, "00"), Err(MultipleResults));
assert_eq!(nt.find_hex(&idx, "000")?, Some(NULL_REVISION));
+ assert_eq!(nt.unique_prefix_len_hex(&idx, "000")?, Some(3));
assert_eq!(nt.find_hex(&idx, "01")?, Some(9));
Ok(())
}
@@ -793,6 +860,13 @@
self.nt.find_hex(&self.index, prefix)
}
+ fn unique_prefix_len_hex(
+ &self,
+ prefix: &str,
+ ) -> Result<Option<usize>, NodeMapError> {
+ self.nt.unique_prefix_len_hex(&self.index, prefix)
+ }
+
/// Drain `added` and restart a new one
fn commit(self) -> Self {
let mut as_vec: Vec<Block> =
@@ -845,6 +919,28 @@
}
#[test]
+ fn test_unique_prefix_len_zero_prefix() {
+ let mut idx = TestNtIndex::new();
+ idx.insert(0, "00000abcd").unwrap();
+
+ assert_eq!(idx.find_hex("000"), Err(NodeMapError::MultipleResults));
+ // in the nodetree proper, this will be found at the first nybble
+ // yet the correct answer for unique_prefix_len is not 1, nor 1+1,
+ // but the first difference with `NULL_NODE`
+ assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
+ assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
+
+ // same with odd result
+ idx.insert(1, "00123").unwrap();
+ assert_eq!(idx.unique_prefix_len_hex("001"), Ok(Some(3)));
+ assert_eq!(idx.unique_prefix_len_hex("0012"), Ok(Some(3)));
+
+ // these are unchanged of course
+ assert_eq!(idx.unique_prefix_len_hex("00000a"), Ok(Some(6)));
+ assert_eq!(idx.unique_prefix_len_hex("00000ab"), Ok(Some(6)));
+ }
+
+ #[test]
fn test_insert_extreme_splitting() -> Result<(), NodeMapError> {
// check that the splitting loop is long enough
let mut nt_idx = TestNtIndex::new();
diff --git a/rust/hg-core/src/revlog/node.rs b/rust/hg-core/src/revlog/node.rs
--- a/rust/hg-core/src/revlog/node.rs
+++ b/rust/hg-core/src/revlog/node.rs
@@ -225,6 +225,36 @@
pub fn get_nybble(&self, i: usize) -> u8 {
get_nybble(self.buf, i)
}
+
+ /// Return the index first nybble that's different from `node`
+ ///
+ /// If the return value is `None` that means that `self` is
+ /// a prefix of `node`, but the current method is a bit slower
+ /// than `is_prefix_of`.
+ ///
+ /// Returned index is as in `get_nybble`, i.e., starting at 0.
+ pub fn first_different_nybble(&self, node: &Node) -> Option<usize> {
+ let buf = self.buf;
+ let until = if self.is_odd {
+ buf.len() - 1
+ } else {
+ buf.len()
+ };
+ for i in 0..until {
+ if buf[i] != node.data[i] {
+ if buf[i] & 0xf0 == node.data[i] & 0xf0 {
+ return Some(2 * i + 1);
+ } else {
+ return Some(2 * i);
+ }
+ }
+ }
+ if self.is_odd && buf[until] & 0xf0 != node.data[until] & 0xf0 {
+ Some(until * 2)
+ } else {
+ None
+ }
+ }
}
/// A shortcut for full `Node` references
@@ -361,6 +391,36 @@
assert_eq!(prefix.borrow().get_nybble(7), 9);
Ok(())
}
+
+ #[test]
+ fn test_first_different_nybble_even_prefix() {
+ let prefix = NodePrefix::from_hex("12ca").unwrap();
+ let prefref = prefix.borrow();
+ let mut node = Node::from([0; NODE_BYTES_LENGTH]);
+ assert_eq!(prefref.first_different_nybble(&node), Some(0));
+ node.data[0] = 0x13;
+ assert_eq!(prefref.first_different_nybble(&node), Some(1));
+ node.data[0] = 0x12;
+ assert_eq!(prefref.first_different_nybble(&node), Some(2));
+ node.data[1] = 0xca;
+ // now it is a prefix
+ assert_eq!(prefref.first_different_nybble(&node), None);
+ }
+
+ #[test]
+ fn test_first_different_nybble_odd_prefix() {
+ let prefix = NodePrefix::from_hex("12c").unwrap();
+ let prefref = prefix.borrow();
+ let mut node = Node::from([0; NODE_BYTES_LENGTH]);
+ assert_eq!(prefref.first_different_nybble(&node), Some(0));
+ node.data[0] = 0x13;
+ assert_eq!(prefref.first_different_nybble(&node), Some(1));
+ node.data[0] = 0x12;
+ assert_eq!(prefref.first_different_nybble(&node), Some(2));
+ node.data[1] = 0xca;
+ // now it is a prefix
+ assert_eq!(prefref.first_different_nybble(&node), None);
+ }
}
#[cfg(test)]
To: gracinet, #hg-reviewers, kevincox
Cc: martinvonz, durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list