D7254: rust-status: improve status performance
Alphare (Raphaël Gomès)
phabricator at mercurial-scm.org
Wed Nov 6 13:22:17 UTC 2019
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
This change does more things in the parallel loop, refactors the file-level
logic into two functions for added clarity.
This bit of Rust code takes 55ms to execute on a repo where the stat'ing part
of Valentin's fast path takes 40ms. While the code differs a bit and it's hard
to get an exact measurement of how much of a performance impact it has, I can
be fairly certain that this implementation is *at worse* twice as slow.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D7254
AFFECTED FILES
rust/hg-core/src/dirstate/status.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -10,22 +10,127 @@
//! and will only be triggered in narrow cases.
use crate::utils::files::HgMetadata;
-use crate::utils::hg_path::{hg_path_to_path_buf, HgPathBuf};
-use crate::{DirstateEntry, DirstateMap, EntryState};
+use crate::utils::hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf};
+use crate::{CopyMap, DirstateEntry, DirstateMap, EntryState};
use rayon::prelude::*;
use std::path::Path;
-// Stat all entries in the `DirstateMap` and return their new metadata.
-pub fn stat_dmap_entries(
+/// Marker enum used to dispatch new status entries into the right collections.
+/// Is similar to `crate::EntryState`, but represents the transient state of
+/// entries during the lifetime of a command.
+enum Dispatch {
+ Unsure,
+ Modified,
+ Added,
+ Removed,
+ Deleted,
+ Clean,
+ Unknown,
+}
+
+#[inline]
+/// The file corresponding to the dirstate entry was found on the filesystem.
+fn handle_found(
+ filename: impl AsRef<HgPath>,
+ entry: DirstateEntry,
+ metadata: HgMetadata,
+ copy_map: &CopyMap,
+ check_exec: bool,
+ list_clean: bool,
+ last_normal_time: i64,
+) -> (HgPathBuf, Dispatch) {
+ let filename = filename.as_ref();
+ let DirstateEntry {
+ state,
+ mode,
+ mtime,
+ size,
+ } = entry;
+
+ let HgMetadata {
+ st_mode,
+ st_size,
+ st_mtime,
+ ..
+ } = metadata;
+
+ match state {
+ EntryState::Normal => {
+ // Dates and times that are outside the 31-bit signed
+ // range are compared modulo 2^31. This should prevent
+ // it from behaving badly with very large files or
+ // corrupt dates while still having a high probability
+ // of detecting changes. (issue2608)
+ let range_mask = 0x7fffffff;
+
+ let size_changed = (size != st_size as i32)
+ && size != (st_size as i32 & range_mask);
+ let mode_changed =
+ (mode ^ st_mode as i32) & 0o100 != 0o000 && check_exec;
+ if size >= 0
+ && (size_changed || mode_changed)
+ || size == -2 // other parent
+ || copy_map.contains_key(filename)
+ {
+ (filename.to_owned(), Dispatch::Modified)
+ } else if mtime != st_mtime as i32
+ && mtime != (st_mtime as i32 & range_mask)
+ {
+ (filename.to_owned(), Dispatch::Unsure)
+ } else if st_mtime == last_normal_time {
+ // the file may have just been marked as normal and
+ // it may have changed in the same second without
+ // changing its size. This can happen if we quickly
+ // do multiple commits. Force lookup, so we don't
+ // miss such a racy file change.
+ (filename.to_owned(), Dispatch::Unsure)
+ } else if list_clean {
+ (filename.to_owned(), Dispatch::Clean)
+ } else {
+ (filename.to_owned(), Dispatch::Unknown)
+ }
+ }
+ EntryState::Merged => (filename.to_owned(), Dispatch::Modified),
+ EntryState::Added => (filename.to_owned(), Dispatch::Added),
+ EntryState::Removed => (filename.to_owned(), Dispatch::Removed),
+ EntryState::Unknown => (filename.to_owned(), Dispatch::Unknown),
+ }
+}
+
+#[inline]
+/// The file corresponding to this Dirstate entry is missing.
+fn handle_missing(
+ filename: impl AsRef<HgPath>,
+ state: EntryState,
+) -> (HgPathBuf, Dispatch) {
+ let filename = filename.as_ref();
+ match state {
+ // File was removed from the filesystem during commands
+ EntryState::Normal | EntryState::Merged | EntryState::Added => {
+ (filename.to_owned(), Dispatch::Deleted)
+ }
+ // File was removed, everything is normal
+ EntryState::Removed => (filename.to_owned(), Dispatch::Removed),
+ // File is unknown to Mercurial, everything is normal
+ EntryState::Unknown => (filename.to_owned(), Dispatch::Unknown),
+ }
+}
+
+/// Stat all entries in the `DirstateMap` and mark them for dispatch into
+/// the relevant collections.
+fn stat_dmap_entries(
dmap: &DirstateMap,
root_dir: impl AsRef<Path> + Sync,
-) -> std::io::Result<Vec<(HgPathBuf, Option<HgMetadata>)>> {
+ check_exec: bool,
+ list_clean: bool,
+ last_normal_time: i64,
+) -> std::io::Result<Vec<(HgPathBuf, Dispatch)>> {
dmap.par_iter()
.filter_map(
// Getting file metadata is costly, so we don't do it if the
// file is already present in the results, hence `filter_map`
- |(filename, _)| -> Option<
- std::io::Result<(HgPathBuf, Option<HgMetadata>)>
+ |(filename, entry)| -> Option<
+ std::io::Result<(HgPathBuf, Dispatch)>
> {
let meta = match hg_path_to_path_buf(filename) {
Ok(p) => root_dir.as_ref().join(p).symlink_metadata(),
@@ -37,12 +142,16 @@
if !(m.file_type().is_file()
|| m.file_type().is_symlink()) =>
{
- Ok((filename.to_owned(), None))
+ Ok(handle_missing(filename, entry.state))
}
- Ok(m) => Ok((
- filename.to_owned(),
- Some(HgMetadata::from_metadata(m)),
- )),
+ Ok(m) => Ok(handle_found(
+ filename,
+ *entry,
+ HgMetadata::from_metadata(m),
+ &dmap.copy_map,
+ check_exec,
+ list_clean,
+ last_normal_time)),
Err(ref e)
if e.kind() == std::io::ErrorKind::NotFound
|| e.raw_os_error() == Some(20) =>
@@ -51,7 +160,7 @@
// `NotADirectory` (errno 20)
// It happens if the dirstate contains `foo/bar` and
// foo is not a directory
- Ok((filename.to_owned(), None))
+ Ok(handle_missing(filename.to_owned(), entry.state))
}
Err(e) => Err(e),
})
@@ -71,11 +180,7 @@
}
fn build_response(
- dmap: &DirstateMap,
- list_clean: bool,
- last_normal_time: i64,
- check_exec: bool,
- results: Vec<(HgPathBuf, Option<HgMetadata>)>,
+ results: Vec<(HgPathBuf, Dispatch)>,
) -> (Vec<HgPathBuf>, StatusResult) {
let mut lookup = vec![];
let mut modified = vec![];
@@ -84,76 +189,15 @@
let mut deleted = vec![];
let mut clean = vec![];
- for (filename, metadata_option) in results.into_iter() {
- let DirstateEntry {
- state,
- mode,
- mtime,
- size,
- } = match dmap.get(&filename) {
- None => {
- continue;
- }
- Some(e) => *e,
- };
-
- match metadata_option {
- None => {
- match state {
- EntryState::Normal
- | EntryState::Merged
- | EntryState::Added => deleted.push(filename),
- EntryState::Removed => removed.push(filename),
- _ => {}
- };
- }
- Some(HgMetadata {
- st_mode,
- st_size,
- st_mtime,
- ..
- }) => {
- match state {
- EntryState::Normal => {
- // Dates and times that are outside the 31-bit signed
- // range are compared modulo 2^31. This should prevent
- // it from behaving badly with very large files or
- // corrupt dates while still having a high probability
- // of detecting changes. (issue2608)
- let range_mask = 0x7fffffff;
-
- let size_changed = (size != st_size as i32)
- && size != (st_size as i32 & range_mask);
- let mode_changed = (mode ^ st_mode as i32) & 0o100
- != 0o000
- && check_exec;
- if size >= 0
- && (size_changed || mode_changed)
- || size == -2 // other parent
- || dmap.copy_map.contains_key(&filename)
- {
- modified.push(filename);
- } else if mtime != st_mtime as i32
- && mtime != (st_mtime as i32 & range_mask)
- {
- lookup.push(filename);
- } else if st_mtime == last_normal_time {
- // the file may have just been marked as normal and
- // it may have changed in the same second without
- // changing its size. This can happen if we quickly
- // do multiple commits. Force lookup, so we don't
- // miss such a racy file change.
- lookup.push(filename);
- } else if list_clean {
- clean.push(filename);
- }
- }
- EntryState::Merged => modified.push(filename),
- EntryState::Added => added.push(filename),
- EntryState::Removed => removed.push(filename),
- EntryState::Unknown => {}
- }
- }
+ for (filename, dispatch) in results.into_iter() {
+ match dispatch {
+ Dispatch::Unknown => {}
+ Dispatch::Unsure => lookup.push(filename),
+ Dispatch::Modified => modified.push(filename),
+ Dispatch::Added => added.push(filename),
+ Dispatch::Removed => removed.push(filename),
+ Dispatch::Deleted => deleted.push(filename),
+ Dispatch::Clean => clean.push(filename),
}
}
@@ -176,13 +220,13 @@
last_normal_time: i64,
check_exec: bool,
) -> std::io::Result<(Vec<HgPathBuf>, StatusResult)> {
- let results = stat_dmap_entries(&dmap, root_dir)?;
-
- Ok(build_response(
+ let results = stat_dmap_entries(
&dmap,
+ root_dir,
+ check_exec,
list_clean,
last_normal_time,
- check_exec,
- results,
- ))
+ )?;
+
+ Ok(build_response(results))
}
To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list