D6260: rust-discovery: takefullsample() core implementation
gracinet (Georges Racinet)
phabricator at mercurial-scm.org
Wed Apr 17 12:17:27 UTC 2019
gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
take_full_sample() browses the undecided set in both directions: from
its roots as well as from its heads.
Following what's done on the Python side, we alter update_sample()
signature to take a closure returning an iterator: either ParentsIterator
or an iterator over the children found in `children_cache`. These constructs
should probably be split off in a separate module.
This is a first concrete example where a more abstract graph notion (probably
a trait) would be useful, as this is nothing but an operation on the reversed
DAG.
A similar motivation in the context of the discovery
process would be to replace the call to dagops::range in
`add_missing_revisions()` with a simple iteration over descendents, again an
operation on the reversed graph.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D6260
AFFECTED FILES
rust/hg-core/src/discovery.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -26,6 +26,7 @@
graph: G, // plays the role of self._repo
common: MissingAncestors<G>,
undecided: Option<HashSet<Revision>>,
+ children_cache: Option<HashMap<Revision, Vec<Revision>>>,
missing: HashSet<Revision>,
rng: Rng,
}
@@ -49,13 +50,16 @@
/// - `sample`: a sample to update
/// - `parentfn`: a callable to resolve parents for a revision
/// - `quicksamplesize`: optional target size of the sample
-fn update_sample(
+fn update_sample<I>(
revs: Option<&HashSet<Revision>>,
heads: impl IntoIterator<Item = Revision>,
sample: &mut HashSet<Revision>,
- parentsfn: impl Fn(Revision) -> Result<[Revision; 2], GraphError>,
+ parentsfn: impl Fn(Revision) -> Result<I, GraphError>,
quicksamplesize: Option<usize>,
-) -> Result<(), GraphError> {
+) -> Result<(), GraphError>
+where
+ I: Iterator<Item = Revision>,
+{
let mut distances: HashMap<Revision, u32> = HashMap::new();
let mut visit: VecDeque<Revision> = heads.into_iter().collect();
let mut factor: u32 = 1;
@@ -84,10 +88,7 @@
}
}
seen.insert(current);
- for p in parentsfn(current)?.iter().cloned() {
- if p == NULL_REVISION {
- continue;
- }
+ for p in parentsfn(current)? {
if let Some(revs) = revs {
if !revs.contains(&p) {
continue;
@@ -100,6 +101,45 @@
Ok(())
}
+// TODO reread this and decide Vec/Box, and where to put it
+struct ParentsIterator {
+ parents: [Revision; 2],
+ cur: usize,
+}
+
+impl ParentsIterator {
+ fn graph_parents(
+ graph: &impl Graph,
+ r: Revision,
+ ) -> Result<ParentsIterator, GraphError> {
+ Ok(ParentsIterator {
+ parents: graph.parents(r)?,
+ cur: 0,
+ })
+ }
+}
+
+impl Iterator for ParentsIterator {
+ type Item = Revision;
+
+ fn next(&mut self) -> Option<Revision> {
+ if self.cur > 1 {
+ return None;
+ }
+ let rev = self.parents[self.cur];
+ self.cur += 1;
+ if rev == NULL_REVISION {
+ if self.cur == 2 {
+ return None;
+ } else {
+ // exceptional branch
+ return self.next();
+ }
+ }
+ Some(rev)
+ }
+}
+
impl<G: Graph + Clone> PartialDiscovery<G> {
/// Create a PartialDiscovery object, with the intent
/// of comparing our `::<target_heads>` revset to the contents of another
@@ -124,6 +164,7 @@
) -> Self {
PartialDiscovery {
undecided: None,
+ children_cache: None,
target_heads: Some(target_heads),
graph: graph.clone(),
common: MissingAncestors::new(graph, vec![]),
@@ -229,6 +270,24 @@
Ok(())
}
+ fn ensure_children_cache(&mut self) -> Result<(), GraphError> {
+ if self.children_cache.is_some() {
+ return Ok(());
+ }
+ self.ensure_undecided()?;
+
+ let mut children: HashMap<Revision, Vec<Revision>> = HashMap::new();
+ for rev in self.undecided.as_ref().unwrap().iter().cloned() {
+ for p in self.graph.parents(rev)?.iter().cloned() {
+ if p != NULL_REVISION {
+ children.entry(p).or_insert_with(|| Vec::new()).push(rev);
+ }
+ }
+ }
+ self.children_cache = Some(children);
+ Ok(())
+ }
+
/// Provide statistics about the current state of the discovery process
pub fn stats(&self) -> DiscoveryStats {
DiscoveryStats {
@@ -256,11 +315,82 @@
None,
headrevs,
&mut sample,
- |r| self.graph.parents(r),
+ |r| ParentsIterator::graph_parents(&self.graph, r),
Some(size),
)?;
Ok(sample.into_iter().collect())
}
+
+ fn bidirectional_sample(
+ &mut self,
+ size: usize,
+ ) -> Result<HashSet<Revision>, GraphError> {
+ self.ensure_undecided()?;
+ {
+ // we don't want to compute children_cache before this
+ // but doing it after extracting self.undecided takes a mutable
+ // ref to self while a shareable one is still active.
+ let undecided = self.undecided.as_ref().unwrap();
+ if undecided.len() <= size {
+ return Ok(undecided.clone());
+ }
+ }
+
+ self.ensure_children_cache()?;
+ let revs = self.undecided.as_ref().unwrap();
+ let mut sample: HashSet<Revision> = revs.clone();
+ if revs.len() <= size {
+ return Ok(sample);
+ }
+ // TODO children cache here ?
+ dagops::retain_heads(&self.graph, &mut sample)?;
+ let revsheads = sample.clone(); // was again heads(revs) in python
+
+ // update from heads
+ update_sample(
+ Some(revs),
+ revsheads.iter().cloned(),
+ &mut sample,
+ |r| ParentsIterator::graph_parents(&self.graph, r),
+ None,
+ )?;
+
+ // update from roots
+ let revroots: HashSet<Revision> =
+ dagops::roots(&self.graph, revs)?.iter().cloned().collect();
+
+ let children = self.children_cache.as_ref().unwrap();
+ let empty_vec: Vec<Revision> = Vec::new();
+ update_sample(
+ Some(revs),
+ revroots,
+ &mut sample,
+ |r| Ok(children.get(&r).unwrap_or(&empty_vec).iter().cloned()),
+ None,
+ )?;
+ Ok(sample)
+ }
+
+ pub fn take_full_sample(
+ &mut self,
+ size: usize,
+ ) -> Result<Vec<Revision>, GraphError> {
+ let sample = self.bidirectional_sample(size)?;
+ let more = size - sample.len();
+ let mut sample = self.limit_sample(sample.into_iter().collect(), size);
+ if more > 0 {
+ let take_from: Vec<Revision> = self
+ .undecided
+ .as_ref()
+ .unwrap()
+ .iter()
+ .filter(|&r| !sample.contains(r))
+ .cloned()
+ .collect();
+ sample.extend(self.limit_sample(take_from, more));
+ }
+ Ok(sample)
+ }
}
#[cfg(test)]
@@ -391,4 +521,24 @@
assert_eq!(sample_vec, vec![4, 9, 12]);
Ok(())
}
+
+ #[test]
+ fn test_children_cache() -> Result<(), GraphError> {
+ let mut disco = full_disco();
+ disco.ensure_children_cache()?;
+
+ let cache = disco.children_cache.unwrap();
+ assert_eq!(cache.get(&2).cloned(), Some(vec![4]));
+ assert_eq!(cache.get(&10).cloned(), None);
+
+ let mut children_4 = cache.get(&4).cloned().unwrap();
+ children_4.sort();
+ assert_eq!(children_4, vec![5, 6, 7]);
+
+ let mut children_7 = cache.get(&7).cloned().unwrap();
+ children_7.sort();
+ assert_eq!(children_7, vec![9, 11]);
+
+ Ok(())
+ }
}
To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list