D7928: rust-status: add function for sequential traversal of the working directory
Alphare (Raphaël Gomès)
phabricator at mercurial-scm.org
Fri Jan 17 15:34:08 UTC 2020
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
This change also introduces helper structs to make things clearer.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D7928
AFFECTED FILES
rust/hg-core/src/dirstate/status.rs
rust/hg-core/src/lib.rs
rust/hg-cpython/src/dirstate/status.rs
CHANGE DETAILS
diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs
--- a/rust/hg-cpython/src/dirstate/status.rs
+++ b/rust/hg-cpython/src/dirstate/status.rs
@@ -20,7 +20,7 @@
matchers::{AlwaysMatcher, FileMatcher},
status,
utils::{files::get_path_from_bytes, hg_path::HgPath},
- StatusResult,
+ DirstateStatus,
};
use std::borrow::Borrow;
@@ -114,7 +114,7 @@
fn build_response(
lookup: Vec<&HgPath>,
- status_res: StatusResult,
+ status_res: DirstateStatus,
py: Python,
) -> PyResult<(PyList, PyList, PyList, PyList, PyList, PyList, PyList)> {
let modified = collect_pybytes_list(py, status_res.modified.as_ref());
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -13,7 +13,7 @@
dirs_multiset::{DirsMultiset, DirsMultisetIter},
dirstate_map::DirstateMap,
parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
- status::{status, StatusResult},
+ status::{status, DirstateStatus},
CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
StateMap, StateMapIter,
};
diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -11,20 +11,39 @@
use crate::{
dirstate::SIZE_FROM_OTHER_PARENT,
- matchers::Matcher,
+ matchers::{Matcher, VisitChildrenSet},
utils::{
files::HgMetadata,
hg_path::{
hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
},
},
- CopyMap, DirstateEntry, DirstateMap, EntryState,
+ CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
};
use rayon::prelude::*;
-use std::collections::HashSet;
+use std::borrow::Cow;
+use std::collections::{HashSet, VecDeque};
use std::fs::{read_dir, DirEntry};
+use std::io::ErrorKind;
+use std::ops::Deref;
use std::path::Path;
+#[derive(Debug)]
+pub enum BadType {
+ CharacterDevice,
+ BlockDevice,
+ FIFO,
+ Socket,
+ Directory,
+ Unknown,
+}
+
+#[derive(Debug)]
+pub enum BadMatch {
+ OsError(i32),
+ BadType(BadType),
+}
+
/// Marker enum used to dispatch new status entries into the right collections.
/// Is similar to `crate::EntryState`, but represents the transient state of
/// entries during the lifetime of a command.
@@ -36,6 +55,11 @@
Deleted,
Clean,
Unknown,
+ Ignored,
+ /// Empty dispatch, the file is not worth listing
+ None,
+ /// Was explicitly matched but cannot be found/accessed
+ Bad(BadMatch),
}
type IoResult<T> = std::io::Result<T>;
@@ -81,9 +105,7 @@
entry: DirstateEntry,
metadata: HgMetadata,
copy_map: &CopyMap,
- check_exec: bool,
- list_clean: bool,
- last_normal_time: i64,
+ options: StatusOptions,
) -> Dispatch {
let DirstateEntry {
state,
@@ -103,7 +125,7 @@
EntryState::Normal => {
let size_changed = mod_compare(size, st_size as i32);
let mode_changed =
- (mode ^ st_mode as i32) & 0o100 != 0o000 && check_exec;
+ (mode ^ st_mode as i32) & 0o100 != 0o000 && options.check_exec;
let metadata_changed = size >= 0 && (size_changed || mode_changed);
let other_parent = size == SIZE_FROM_OTHER_PARENT;
if metadata_changed
@@ -113,14 +135,14 @@
Dispatch::Modified
} else if mod_compare(mtime, st_mtime as i32) {
Dispatch::Unsure
- } else if st_mtime == last_normal_time {
+ } else if st_mtime == options.last_normal_time {
// the file may have just been marked as normal and
// it may have changed in the same second without
// changing its size. This can happen if we quickly
// do multiple commits. Force lookup, so we don't
// miss such a racy file change.
Dispatch::Unsure
- } else if list_clean {
+ } else if options.list_clean {
Dispatch::Clean
} else {
Dispatch::Unknown
@@ -153,9 +175,7 @@
files: &'a HashSet<&HgPath>,
dmap: &'a DirstateMap,
root_dir: impl AsRef<Path> + Sync + Send,
- check_exec: bool,
- list_clean: bool,
- last_normal_time: i64,
+ options: StatusOptions,
) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
files.par_iter().filter_map(move |filename| {
// TODO normalization
@@ -179,9 +199,7 @@
*entry,
HgMetadata::from_metadata(meta),
&dmap.copy_map,
- check_exec,
- list_clean,
- last_normal_time,
+ options,
),
)));
}
@@ -204,14 +222,181 @@
})
}
+#[derive(Debug, Copy, Clone)]
+pub struct StatusOptions {
+ pub last_normal_time: i64,
+ pub check_exec: bool,
+ pub list_clean: bool,
+ pub list_ignored: bool,
+ pub list_unknown: bool,
+}
+
+/// Dispatch a single file found during `traverse`.
+/// If `file` is a folder that needs to be traversed, it will be pushed into
+/// `work`.
+fn traverse_worker<'a>(
+ work: &mut VecDeque<HgPathBuf>,
+ matcher: &impl Matcher,
+ dmap: &DirstateMap,
+ filename: impl AsRef<HgPath>,
+ dir_entry: &DirEntry,
+ ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
+ options: StatusOptions,
+) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
+ let file_type = match dir_entry.file_type() {
+ Ok(x) => x,
+ Err(e) => return Some(Err(e.into())),
+ };
+ let filename = filename.as_ref();
+ let entry_option = dmap.get(filename);
+
+ if file_type.is_dir() {
+ // Do we need to traverse it?
+ if !ignore_fn(&filename) {
+ work.push_front(filename.to_owned());
+ } else {
+ if options.list_ignored {
+ work.push_front(filename.to_owned());
+ }
+ }
+ // Nested `if` until `rust-lang/rust#53668` is stable
+ if let Some(entry) = entry_option {
+ // Used to be a file, is now a folder
+ if matcher.matches_everything() || matcher.matches(&filename) {
+ return Some(Ok((
+ Cow::Owned(filename.to_owned()),
+ dispatch_missing(entry.state),
+ )));
+ }
+ }
+ } else if file_type.is_file() || file_type.is_symlink() {
+ if let Some(entry) = entry_option {
+ if matcher.matches_everything() || matcher.matches(&filename) {
+ let metadata = match dir_entry.metadata() {
+ Ok(x) => x,
+ Err(e) => return Some(Err(e.into())),
+ };
+ return Some(Ok((
+ Cow::Owned(filename.to_owned()),
+ dispatch_found(
+ &filename,
+ *entry,
+ HgMetadata::from_metadata(metadata),
+ &dmap.copy_map,
+ options,
+ ),
+ )));
+ }
+ } else if (matcher.matches_everything() || matcher.matches(&filename))
+ && !ignore_fn(&filename)
+ {
+ return Some(Ok((
+ Cow::Owned(filename.to_owned()),
+ Dispatch::Unknown,
+ )));
+ } else if ignore_fn(&filename) {
+ return Some(Ok((
+ Cow::Owned(filename.to_owned()),
+ Dispatch::Ignored,
+ )));
+ }
+ } else if let Some(entry) = entry_option {
+ // Used to be a file or a folder, now something else.
+ if matcher.matches_everything() || matcher.matches(&filename) {
+ return Some(Ok((
+ Cow::Owned(filename.to_owned()),
+ dispatch_missing(entry.state),
+ )));
+ }
+ }
+ None
+}
+
+/// Walk the working directory recursively to look for changes compared to the
+/// current `DirstateMap`.
+fn traverse<'a>(
+ matcher: &(impl Matcher + Sync),
+ root_dir: impl AsRef<Path>,
+ dmap: &DirstateMap,
+ path: impl AsRef<HgPath>,
+ old_results: FastHashMap<Cow<'a, HgPath>, Dispatch>,
+ ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+ options: StatusOptions,
+) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
+ let root_dir = root_dir.as_ref();
+ let mut new_results = FastHashMap::default();
+
+ let mut work = VecDeque::new();
+ work.push_front(path.as_ref().to_owned());
+
+ while let Some(ref directory) = work.pop_front() {
+ if directory.as_bytes() == b".hg" {
+ continue;
+ }
+ let visit_entries = match matcher.visit_children_set(directory) {
+ VisitChildrenSet::Empty => continue,
+ VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
+ VisitChildrenSet::Set(set) => Some(set),
+ };
+ let buf = hg_path_to_path_buf(directory)?;
+ let dir_path = root_dir.join(buf);
+
+ let skip_dot_hg = !directory.as_bytes().is_empty();
+ let entries = match list_directory(dir_path, skip_dot_hg) {
+ Err(e) => match e.kind() {
+ ErrorKind::NotFound | ErrorKind::PermissionDenied => {
+ new_results.insert(
+ Cow::Owned(directory.to_owned()),
+ Dispatch::Bad(BadMatch::OsError(
+ // Unwrapping here is OK because the error always
+ // is a real os error
+ e.raw_os_error().unwrap(),
+ )),
+ );
+ continue;
+ }
+ _ => return Err(e),
+ },
+ Ok(entries) => entries,
+ };
+
+ for (filename, dir_entry) in entries {
+ if let Some(ref set) = visit_entries {
+ if !set.contains(filename.deref()) {
+ continue;
+ }
+ }
+ // TODO normalize
+ let filename = if directory.is_empty() {
+ filename.to_owned()
+ } else {
+ directory.join(&filename)
+ };
+
+ if !old_results.contains_key(filename.deref()) {
+ if let Some((res, dispatch)) = traverse_worker(
+ &mut work, matcher, &dmap, &filename, &dir_entry,
+ &ignore_fn, options,
+ )
+ .transpose()?
+ {
+ new_results.insert(res, dispatch);
+ }
+ }
+ }
+ }
+
+ new_results.extend(old_results.into_iter());
+
+ Ok(new_results)
+}
+
/// Stat all entries in the `DirstateMap` and mark them for dispatch into
/// the relevant collections.
fn stat_dmap_entries(
dmap: &DirstateMap,
root_dir: impl AsRef<Path> + Sync + Send,
- check_exec: bool,
- list_clean: bool,
- last_normal_time: i64,
+ options: StatusOptions,
) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
dmap.par_iter().map(move |(filename, entry)| {
let filename: &HgPath = filename;
@@ -232,9 +417,7 @@
*entry,
HgMetadata::from_metadata(m),
&dmap.copy_map,
- check_exec,
- list_clean,
- last_normal_time,
+ options,
),
)),
Err(ref e)
@@ -252,47 +435,59 @@
})
}
-pub struct StatusResult<'a> {
+pub struct DirstateStatus<'a> {
pub modified: Vec<&'a HgPath>,
pub added: Vec<&'a HgPath>,
pub removed: Vec<&'a HgPath>,
pub deleted: Vec<&'a HgPath>,
pub clean: Vec<&'a HgPath>,
+ pub ignored: Vec<&'a HgPath>,
+ pub unknown: Vec<&'a HgPath>,
+ pub bad: Vec<(&'a HgPath, BadMatch)>,
/* TODO ignored
* TODO unknown */
}
fn build_response<'a>(
results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>,
-) -> IoResult<(Vec<&'a HgPath>, StatusResult<'a>)> {
+) -> IoResult<(Vec<&'a HgPath>, DirstateStatus<'a>)> {
let mut lookup = vec![];
let mut modified = vec![];
let mut added = vec![];
let mut removed = vec![];
let mut deleted = vec![];
let mut clean = vec![];
+ let mut ignored = vec![];
+ let mut unknown = vec![];
+ let mut bad = vec![];
for res in results.into_iter() {
let (filename, dispatch) = res?;
match dispatch {
- Dispatch::Unknown => {}
+ Dispatch::Unknown => unknown.push(filename),
Dispatch::Unsure => lookup.push(filename),
Dispatch::Modified => modified.push(filename),
Dispatch::Added => added.push(filename),
Dispatch::Removed => removed.push(filename),
Dispatch::Deleted => deleted.push(filename),
Dispatch::Clean => clean.push(filename),
+ Dispatch::Ignored => ignored.push(filename),
+ Dispatch::None => {}
+ Dispatch::Bad(reason) => bad.push((filename, reason)),
}
}
Ok((
lookup,
- StatusResult {
+ DirstateStatus {
modified,
added,
removed,
deleted,
clean,
+ ignored,
+ unknown,
+ bad,
},
))
}
@@ -301,31 +496,16 @@
dmap: &'a DirstateMap,
matcher: &'b (impl Matcher),
root_dir: impl AsRef<Path> + Sync + Send + Copy,
- list_clean: bool,
- last_normal_time: i64,
- check_exec: bool,
-) -> IoResult<(Vec<&'c HgPath>, StatusResult<'c>)> {
+ options: StatusOptions,
+) -> IoResult<(Vec<&'c HgPath>, DirstateStatus<'c>)> {
let files = matcher.file_set();
let mut results = vec![];
if let Some(files) = files {
- results.par_extend(walk_explicit(
- &files,
- &dmap,
- root_dir,
- check_exec,
- list_clean,
- last_normal_time,
- ));
+ results.par_extend(walk_explicit(&files, &dmap, root_dir, options));
}
if !matcher.is_exact() {
- let stat_results = stat_dmap_entries(
- &dmap,
- root_dir,
- check_exec,
- list_clean,
- last_normal_time,
- );
+ let stat_results = stat_dmap_entries(&dmap, root_dir, options);
results.par_extend(stat_results);
}
To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list