D6345: rust-discovery: optionally don't randomize at all, for tests
gracinet (Georges Racinet)
phabricator at mercurial-scm.org
Mon May 6 15:31:04 UTC 2019
gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
As seen from Python, this is a new `randomize` kwarg in init of the
discovery object. It replaces random picking by some arbitrary yet
deterministic strategy.
This is the same as what test-setdiscovery.t does, with the added
benefit to be usable both in Python and Rust implementations.
REPOSITORY
rHG Mercurial
REVISION DETAIL
https://phab.mercurial-scm.org/D6345
AFFECTED FILES
mercurial/setdiscovery.py
rust/hg-core/src/discovery.rs
rust/hg-cpython/src/discovery.rs
tests/test-rust-discovery.py
CHANGE DETAILS
diff --git a/tests/test-rust-discovery.py b/tests/test-rust-discovery.py
--- a/tests/test-rust-discovery.py
+++ b/tests/test-rust-discovery.py
@@ -106,6 +106,10 @@
self.assertTrue(disco.iscomplete())
self.assertEqual(disco.commonheads(), {1})
+ def testinitnorandom(self):
+ idx = self.parseindex()
+ PartialDiscovery(idx, [3], randomize=False)
+
if __name__ == '__main__':
import silenttestrunner
silenttestrunner.main(__name__)
diff --git a/rust/hg-cpython/src/discovery.rs b/rust/hg-cpython/src/discovery.rs
--- a/rust/hg-cpython/src/discovery.rs
+++ b/rust/hg-cpython/src/discovery.rs
@@ -30,13 +30,15 @@
def __new__(
_cls,
index: PyObject,
- targetheads: PyObject
+ targetheads: PyObject,
+ randomize: bool = true
) -> PyResult<PartialDiscovery> {
Self::create_instance(
py,
RefCell::new(Box::new(CorePartialDiscovery::new(
Index::new(py, index)?,
rev_pyiter_collect(py, &targetheads)?,
+ randomize,
)))
)
}
diff --git a/rust/hg-core/src/discovery.rs b/rust/hg-core/src/discovery.rs
--- a/rust/hg-core/src/discovery.rs
+++ b/rust/hg-core/src/discovery.rs
@@ -29,6 +29,7 @@
children_cache: Option<HashMap<Revision, Vec<Revision>>>,
missing: HashSet<Revision>,
rng: Rng,
+ randomize: bool,
}
pub struct DiscoveryStats {
@@ -144,16 +145,32 @@
/// If we want to make the signature more flexible,
/// we'll have to make it a type argument of `PartialDiscovery` or a trait
/// object since we'll keep it in the meanwhile
- pub fn new(graph: G, target_heads: Vec<Revision>) -> Self {
+ ///
+ /// The `randomize` boolean affects sampling, and specifically how
+ /// limiting or last-minute expanding is been done:
+ ///
+ /// If `true`, both will perform random picking from `self.undecided`.
+ /// This is currently the best for actual discoveries.
+ ///
+ /// If `false`, a reproductible picking strategy is performed. This is
+ /// useful for integration tests.
+ pub fn new(
+ graph: G,
+ target_heads: Vec<Revision>,
+ randomize: bool,
+ ) -> Self {
let mut seed: [u8; 16] = [0; 16];
- thread_rng().fill_bytes(&mut seed);
- Self::new_with_seed(graph, target_heads, seed)
+ if randomize {
+ thread_rng().fill_bytes(&mut seed);
+ }
+ Self::new_with_seed(graph, target_heads, seed, randomize)
}
pub fn new_with_seed(
graph: G,
target_heads: Vec<Revision>,
seed: [u8; 16],
+ randomize: bool,
) -> Self {
PartialDiscovery {
undecided: None,
@@ -163,6 +180,7 @@
common: MissingAncestors::new(graph, vec![]),
missing: HashSet::new(),
rng: Rng::from_seed(seed),
+ randomize: randomize,
}
}
@@ -173,6 +191,11 @@
mut sample: Vec<Revision>,
size: usize,
) -> Vec<Revision> {
+ if !self.randomize {
+ sample.sort();
+ sample.truncate(size);
+ return sample
+ }
let sample_len = sample.len();
if sample_len <= size {
return sample;
@@ -413,20 +436,22 @@
/// A PartialDiscovery as for pushing all the heads of `SampleGraph`
///
- /// To avoid actual randomness in tests, we give it a fixed random seed.
+ /// To avoid actual randomness in these tests, we give it a fixed
+ /// random seed, but by default we'll test the random version.
fn full_disco() -> PartialDiscovery<SampleGraph> {
PartialDiscovery::new_with_seed(
SampleGraph,
vec![10, 11, 12, 13],
[0; 16],
+ true,
)
}
/// A PartialDiscovery as for pushing the 12 head of `SampleGraph`
///
/// To avoid actual randomness in tests, we give it a fixed random seed.
fn disco12() -> PartialDiscovery<SampleGraph> {
- PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16])
+ PartialDiscovery::new_with_seed(SampleGraph, vec![12], [0; 16], true)
}
fn sorted_undecided(
@@ -511,6 +536,14 @@
}
#[test]
+ fn test_limit_sample_no_random() {
+ let mut disco = full_disco();
+ disco.randomize = false;
+ assert_eq!(disco.limit_sample(vec![1, 8, 13, 5, 7, 3], 4),
+ vec![1, 3, 5, 7]);
+ }
+
+ #[test]
fn test_quick_sample_enough_undecided_heads() -> Result<(), GraphError> {
let mut disco = full_disco();
disco.undecided = Some((1..=13).collect());
diff --git a/mercurial/setdiscovery.py b/mercurial/setdiscovery.py
--- a/mercurial/setdiscovery.py
+++ b/mercurial/setdiscovery.py
@@ -92,11 +92,19 @@
dist.setdefault(p, d + 1)
visit.append(p)
-def _limitsample(sample, desiredlen):
- """return a random subset of sample of at most desiredlen item"""
- if len(sample) > desiredlen:
- sample = set(random.sample(sample, desiredlen))
- return sample
+def _limitsample(sample, desiredlen, randomize=True):
+ """return a random subset of sample of at most desiredlen item.
+
+ If randomize is False, though, a deterministic subset is returned.
+ This is meant for integration tests.
+ """
+ if len(sample) <= desiredlen:
+ return sample
+ if randomize:
+ return set(random.sample(sample, desiredlen))
+ sample = list(sample)
+ sample.sort()
+ return set(sample[:desiredlen])
class partialdiscovery(object):
"""an object representing ongoing discovery
@@ -110,13 +118,14 @@
(all tracked revisions are known locally)
"""
- def __init__(self, repo, targetheads):
+ def __init__(self, repo, targetheads, randomize=True):
self._repo = repo
self._targetheads = targetheads
self._common = repo.changelog.incrementalmissingrevs()
self._undecided = None
self.missing = set()
self._childrenmap = None
+ self.randomize = randomize
def addcommons(self, commons):
"""register nodes known as common"""
@@ -221,7 +230,7 @@
sample = set(self._repo.revs('heads(%ld)', revs))
if len(sample) >= size:
- return _limitsample(sample, size)
+ return _limitsample(sample, size, randomize=self.randomize)
_updatesample(None, headrevs, sample, self._parentsgetter(),
quicksamplesize=size)
@@ -246,10 +255,15 @@
_updatesample(revs, revsroots, sample, childrenrevs)
assert sample
- sample = _limitsample(sample, size)
+ sample = _limitsample(sample, size, randomize=self.randomize)
if len(sample) < size:
more = size - len(sample)
- sample.update(random.sample(list(revs - sample), more))
+ takefrom = list(revs - sample)
+ if self.randomize:
+ sample.update(random.sample(takefrom, more))
+ else:
+ takefrom.sort()
+ sample.update(takefrom[:more])
return sample
def findcommonheads(ui, local, remote,
To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list