D7864: rust-utils: add Rust implementation of Python's "os.path.splitdrive"
Alphare (Raphaël Gomès)
phabricator at mercurial-scm.org
Tue Jan 14 17:33:31 UTC 2020
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.
REVISION SUMMARY
I also wrote the NT version although I didn't mean to at first, so I thought
I would keep it, so that any further effort to get the Rust code working on
Windows is a little easier.
REPOSITORY
rHG Mercurial
BRANCH
default
REVISION DETAIL
https://phab.mercurial-scm.org/D7864
AFFECTED FILES
rust/hg-core/src/utils/files.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/utils/files.rs b/rust/hg-core/src/utils/files.rs
--- a/rust/hg-core/src/utils/files.rs
+++ b/rust/hg-core/src/utils/files.rs
@@ -85,6 +85,83 @@
path.to_ascii_lowercase()
}
+#[cfg(windows)]
+/// Copied from the Python stdlib's `os.path.splitdrive` implementation.
+///
+/// Split a pathname into drive/UNC sharepoint and relative path specifiers.
+/// Returns a 2-tuple (drive_or_unc, path); either part may be empty.
+///
+/// If you assign
+/// result = split_drive(p)
+/// It is always true that:
+/// result[0] + result[1] == p
+///
+/// If the path contained a drive letter, drive_or_unc will contain everything
+/// up to and including the colon.
+/// e.g. split_drive("c:/dir") returns ("c:", "/dir")
+///
+/// If the path contained a UNC path, the drive_or_unc will contain the host
+/// name and share up to but not including the fourth directory separator
+/// character.
+/// e.g. split_drive("//host/computer/dir") returns ("//host/computer", "/dir")
+///
+/// Paths cannot contain both a drive letter and a UNC path.
+pub fn split_drive(path: impl AsRef<HgPath>) -> (HgPathBuf, HgPathBuf) {
+ let path = path.as_ref();
+ let sep = std::path::MAIN_SEPARATOR as u8;
+ let bytes = path.as_bytes();
+ let norm_bytes: Vec<_> = path
+ .as_bytes()
+ .iter()
+ .map(|c| if *c == b'\\' { sep } else { *c })
+ .collect();
+ if norm_bytes.len() > 1 {
+ if norm_bytes[0] == sep
+ && norm_bytes[1] == sep
+ && (norm_bytes.len() < 3 || norm_bytes[2] != sep)
+ {
+ // Is a UNC path:
+ // vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
+ // \\machine\mountpoint\directory\etc\...
+ // directory ^^^^^^^^^^^^^^^
+ let index = norm_bytes[2..].iter().position(|b| *b == sep);
+ if index.is_none() {
+ return (HgPathBuf::new(), path.to_owned());
+ }
+ let index = index.unwrap() + 2;
+
+ let index2 =
+ norm_bytes[index + 1..].iter().position(|b| *b == sep);
+ // A UNC path can't have two slashes in a row
+ // (after the initial two)
+ if index2 == Some(0) {
+ return (HgPathBuf::new(), path.to_owned());
+ }
+ let index2 = match index2 {
+ Some(i) => i + index + 1,
+ None => norm_bytes.len(),
+ };
+ return (
+ HgPathBuf::from_bytes(&bytes[..index2]),
+ HgPathBuf::from_bytes(&bytes[index2..]),
+ );
+ }
+ if norm_bytes[1] == b':' {
+ return (
+ HgPathBuf::from_bytes(&bytes[..2]),
+ HgPathBuf::from_bytes(&bytes[2..]),
+ );
+ }
+ }
+ (HgPathBuf::new(), path.to_owned())
+}
+
+#[cfg(unix)]
+/// Split a pathname into drive and path. On Posix, drive is always empty.
+pub fn split_drive(path: impl AsRef<HgPath>) -> (HgPathBuf, HgPathBuf) {
+ (HgPathBuf::new(), path.as_ref().into())
+}
+
#[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
pub struct HgMetadata {
pub st_dev: u64,
@@ -133,4 +210,101 @@
assert_eq!(dirs.next(), None);
assert_eq!(dirs.next(), None);
}
+
+ #[test]
+ #[cfg(unix)]
+ fn test_split_drive() {
+ // Taken from the Python stdlib's tests
+ assert_eq!(
+ split_drive(HgPath::new(br"/foo/bar")),
+ (HgPathBuf::new(), HgPathBuf::from_bytes(br"/foo/bar"))
+ );
+ assert_eq!(
+ split_drive(HgPath::new(br"foo:bar")),
+ (HgPathBuf::new(), HgPathBuf::from_bytes(br"foo:bar"))
+ );
+ assert_eq!(
+ split_drive(HgPath::new(br":foo:bar")),
+ (HgPathBuf::new(), HgPathBuf::from_bytes(br":foo:bar"))
+ );
+ }
+
+ #[test]
+ #[cfg(windows)]
+ fn test_split_drive() {
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(br"c:\foo\bar")),
+ (
+ HgPathBuf::from_bytes(br"c:"),
+ HgPathBuf::from_bytes(br"\foo\bar")
+ )
+ );
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(b"c:/foo/bar")),
+ (
+ HgPathBuf::from_bytes(br"c:"),
+ HgPathBuf::from_bytes(br"/foo/bar")
+ )
+ );
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(br"\\conky\mountpoint\foo\bar")),
+ (
+ HgPathBuf::from_bytes(br"\\conky\mountpoint"),
+ HgPathBuf::from_bytes(br"\foo\bar")
+ )
+ );
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(br"//conky/mountpoint/foo/bar")),
+ (
+ HgPathBuf::from_bytes(br"//conky/mountpoint"),
+ HgPathBuf::from_bytes(br"/foo/bar")
+ )
+ );
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(
+ br"\\\conky\mountpoint\foo\bar"
+ )),
+ (
+ HgPathBuf::from_bytes(br""),
+ HgPathBuf::from_bytes(br"\\\conky\mountpoint\foo\bar")
+ )
+ );
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(
+ br"///conky/mountpoint/foo/bar"
+ )),
+ (
+ HgPathBuf::from_bytes(br""),
+ HgPathBuf::from_bytes(br"///conky/mountpoint/foo/bar")
+ )
+ );
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(
+ br"\\conky\\mountpoint\foo\bar"
+ )),
+ (
+ HgPathBuf::from_bytes(br""),
+ HgPathBuf::from_bytes(br"\\conky\\mountpoint\foo\bar")
+ )
+ );
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(
+ br"//conky//mountpoint/foo/bar"
+ )),
+ (
+ HgPathBuf::from_bytes(br""),
+ HgPathBuf::from_bytes(br"//conky//mountpoint/foo/bar")
+ )
+ );
+ // UNC part containing U+0130
+ assert_eq!(
+ split_drive(HgPathBuf::from_bytes(
+ b"//conky/MOUNTPO\xc4\xb0NT/foo/bar"
+ )),
+ (
+ HgPathBuf::from_bytes(b"//conky/MOUNTPO\xc4\xb0NT"),
+ HgPathBuf::from_bytes(br"/foo/bar")
+ )
+ );
+ }
}
To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list