D6773: rust-hgpath: add HgPath and HgPathBuf structs to encapsulate handling of paths
Alphare (Raphaël Gomès)
phabricator at mercurial-scm.org
Fri Aug 30 10:38:47 EDT 2019
Alphare updated this revision to Diff 16339.
REPOSITORY
rHG Mercurial
CHANGES SINCE LAST UPDATE
https://phab.mercurial-scm.org/D6773?vs=16338&id=16339
CHANGES SINCE LAST ACTION
https://phab.mercurial-scm.org/D6773/new/
REVISION DETAIL
https://phab.mercurial-scm.org/D6773
AFFECTED FILES
rust/hg-core/src/utils.rs
rust/hg-core/src/utils/hg_path.rs
CHANGE DETAILS
diff --git a/rust/hg-core/src/utils/hg_path.rs b/rust/hg-core/src/utils/hg_path.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/utils/hg_path.rs
@@ -0,0 +1,436 @@
+use std::borrow::Borrow;
+use std::convert::TryInto;
+use std::ffi::OsString;
+use std::iter::FusedIterator;
+use std::ops::{Deref, Index, Range, RangeFrom, RangeFull, RangeTo};
+use std::path::{Path, PathBuf};
+
+/// This is a repository-relative path (or canonical path):
+/// - no null characters
+/// - `/` separates directories
+/// - no consecutive slashes
+/// - no leading slash,
+/// - no `.` nor `..` of special meaning
+/// - stored in repository and shared across platforms
+///
+/// This allows us to be encoding-transparent as much as possible, until really
+/// needed; `HgPath` can be transformed into a platform-specific path (`OsStr`
+/// or `Path`) whenever more complex operations are needed:
+/// On Unix, it's just byte-to-byte conversion. On Windows, it has to be
+/// decoded from MBCS to WTF-8. If WindowsUTF8Plan is implemented, the source
+/// character encoding will be determined per repository basis.
+#[derive(Eq, Ord, PartialEq, PartialOrd, Debug, Hash)]
+pub struct HgPath {
+ inner: [u8],
+}
+
+#[derive(Debug, Eq, PartialEq)]
+pub enum HgPathError {
+ LeadingSlash,
+ /// Index of the second slash
+ ConsecutiveSlashes(usize),
+ /// Index of the null byte
+ ContainsNullByte(usize),
+}
+
+impl HgPath {
+ fn unchecked_new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
+ unsafe { &*(s.as_ref() as *const [u8] as *const Self) }
+ }
+ pub fn new<S: AsRef<[u8]> + ?Sized>(s: &S) -> &Self {
+ let new = Self::unchecked_new(s);
+ debug_assert_eq!(Ok(()), new.check_state());
+ new
+ }
+ pub fn is_empty(&self) -> bool {
+ self.inner.len() == 0
+ }
+ pub fn len(&self) -> usize {
+ self.inner.len()
+ }
+ fn to_hg_path_buf(&self) -> HgPathBuf {
+ HgPathBuf {
+ inner: self.inner.to_owned(),
+ }
+ }
+ fn from_inner(inner: &[u8]) -> &Self {
+ let new = unsafe { &*(inner as *const [u8] as *const HgPath) };
+ debug_assert_eq!(Ok(()), new.check_state());
+ new
+ }
+ pub fn bytes(&self) -> HgPathBytesIterator {
+ HgPathBytesIterator { path: &self }
+ }
+ pub fn to_ascii_uppercase(&self) -> HgPathBuf {
+ HgPathBuf::from(self.inner.to_ascii_uppercase())
+ }
+ pub fn to_ascii_lowercase(&self) -> HgPathBuf {
+ HgPathBuf::from(self.inner.to_ascii_lowercase())
+ }
+ pub fn as_bytes(&self) -> &[u8] {
+ unsafe { &*(&self.inner as *const _ as *const [u8]) }
+ }
+ /// Checks for errors in the path, short-circuiting at the first one.
+ /// Useful to get finer-grained errors. To simply check if the path is
+ /// valid, use `is_valid`.
+ pub fn check_state(&self) -> Result<(), HgPathError> {
+ if self.len() == 0 {
+ return Ok(());
+ }
+ let bytes = self.as_bytes();
+ let mut previous_byte = None;
+
+ if bytes[0] == b'/' {
+ return Err(HgPathError::LeadingSlash);
+ }
+ for (index, byte) in bytes.iter().enumerate() {
+ match byte {
+ 0 => return Err(HgPathError::ContainsNullByte(index)),
+ b'/' => {
+ if previous_byte.is_some() && previous_byte == Some(b'/') {
+ return Err(HgPathError::ConsecutiveSlashes(index));
+ }
+ }
+ _ => (),
+ };
+ previous_byte = Some(*byte);
+ }
+ Ok(())
+ }
+ pub fn is_valid(&self) -> bool {
+ self.check_state().is_ok()
+ }
+}
+
+impl Index<usize> for HgPath {
+ type Output = u8;
+
+ fn index(&self, i: usize) -> &Self::Output {
+ &self.inner[i]
+ }
+}
+
+impl Index<RangeFull> for HgPath {
+ type Output = HgPath;
+
+ #[inline]
+ fn index(&self, _index: RangeFull) -> &HgPath {
+ &self
+ }
+}
+
+impl Index<RangeTo<usize>> for HgPath {
+ type Output = HgPath;
+
+ #[inline]
+ fn index(&self, range_to: RangeTo<usize>) -> &HgPath {
+ HgPath::new(&self.inner[range_to])
+ }
+}
+
+impl Index<RangeFrom<usize>> for HgPath {
+ type Output = HgPath;
+
+ #[inline]
+ fn index(&self, range_from: RangeFrom<usize>) -> &HgPath {
+ HgPath::new(&self.inner[range_from])
+ }
+}
+impl Index<Range<usize>> for HgPath {
+ type Output = HgPath;
+
+ #[inline]
+ fn index(&self, range: Range<usize>) -> &HgPath {
+ HgPath::new(&self.inner[range])
+ }
+}
+
+impl Index<usize> for HgPathBuf {
+ type Output = u8;
+
+ fn index(&self, i: usize) -> &Self::Output {
+ &self.inner[i]
+ }
+}
+
+impl Index<RangeFull> for HgPathBuf {
+ type Output = HgPath;
+
+ #[inline]
+ fn index(&self, _index: RangeFull) -> &HgPath {
+ HgPath::from_inner(self.inner.as_slice())
+ }
+}
+
+impl Index<RangeTo<usize>> for HgPathBuf {
+ type Output = HgPath;
+
+ #[inline]
+ fn index(&self, range_to: RangeTo<usize>) -> &HgPath {
+ &HgPath::new(&self.inner[range_to])
+ }
+}
+
+impl Index<RangeFrom<usize>> for HgPathBuf {
+ type Output = HgPath;
+
+ #[inline]
+ fn index(&self, range_from: RangeFrom<usize>) -> &HgPath {
+ HgPath::new(&self.inner[range_from])
+ }
+}
+
+#[derive(Debug)]
+pub struct HgPathBytesIterator<'a> {
+ path: &'a HgPath,
+}
+
+impl<'a> Iterator for HgPathBytesIterator<'a> {
+ type Item = u8;
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.path.len() == 0 {
+ None
+ } else {
+ let ret = self.path[0];
+ self.path = &self.path[1..self.path.len()];
+ Some(ret)
+ }
+ }
+}
+
+impl<'a> ExactSizeIterator for HgPathBytesIterator<'a> {
+ // We can easily calculate the remaining number of iterations.
+ fn len(&self) -> usize {
+ self.path.len()
+ }
+}
+
+impl<'a> DoubleEndedIterator for HgPathBytesIterator<'a> {
+ fn next_back(&mut self) -> Option<Self::Item> {
+ if self.path.len() == 0 {
+ None
+ } else {
+ let back_position = self.path.len() - 1;
+ let ret = self.path[back_position];
+ self.path = &self.path[..back_position];
+ Some(ret)
+ }
+ }
+}
+
+impl<'a> FusedIterator for HgPathBytesIterator<'a> {}
+
+#[derive(Eq, Ord, Clone, PartialEq, PartialOrd, Debug, Hash)]
+pub struct HgPathBuf {
+ inner: Vec<u8>,
+}
+
+impl HgPathBuf {
+ pub fn new() -> Self {
+ Self { inner: Vec::new() }
+ }
+ pub fn push(&mut self, byte: u8) {
+ self.inner.push(byte);
+ debug_assert_eq!(Ok(()), self.check_state())
+ }
+ pub fn join<T: ?Sized + AsRef<HgPath>>(&self, other: &T) -> Self {
+ let mut inner = self.inner.to_owned();
+ inner.extend(other.as_ref().bytes());
+ let res = Self { inner };
+ debug_assert_eq!(Ok(()), res.check_state());
+ res
+ }
+ pub fn contains(&self, other: u8) -> bool {
+ self.inner.contains(&other)
+ }
+ pub fn from_bytes(s: &[u8]) -> HgPathBuf {
+ HgPath::new(s).to_owned()
+ }
+ pub fn into_vec(self) -> Vec<u8> {
+ self.inner
+ }
+ pub fn as_vec(&self) -> &Vec<u8> {
+ &self.inner
+ }
+ pub fn as_ref(&self) -> &[u8] {
+ self.inner.as_ref()
+ }
+}
+
+impl Deref for HgPathBuf {
+ type Target = HgPath;
+
+ #[inline]
+ fn deref(&self) -> &HgPath {
+ &self[..]
+ }
+}
+
+impl From<Vec<u8>> for HgPathBuf {
+ fn from(vec: Vec<u8>) -> Self {
+ let new = Self { inner: vec };
+ debug_assert_eq!(Ok(()), new.check_state());
+ new
+ }
+}
+
+impl<T: ?Sized + AsRef<HgPath>> From<&T> for HgPathBuf {
+ fn from(s: &T) -> HgPathBuf {
+ let new = s.as_ref().to_owned();
+ debug_assert_eq!(Ok(()), new.check_state());
+ new
+ }
+}
+
+impl Into<Vec<u8>> for HgPathBuf {
+ fn into(self) -> Vec<u8> {
+ self.inner
+ }
+}
+
+impl Borrow<HgPath> for HgPathBuf {
+ fn borrow(&self) -> &HgPath {
+ &self[..]
+ }
+}
+
+impl ToOwned for HgPath {
+ type Owned = HgPathBuf;
+ fn to_owned(&self) -> HgPathBuf {
+ self.to_hg_path_buf()
+ }
+}
+
+impl AsRef<HgPath> for HgPath {
+ fn as_ref(&self) -> &HgPath {
+ self
+ }
+}
+
+impl AsRef<HgPath> for HgPathBuf {
+ fn as_ref(&self) -> &HgPath {
+ self
+ }
+}
+
+impl Extend<u8> for HgPathBuf {
+ fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
+ self.inner.extend(iter);
+ debug_assert_eq!(Ok(()), self.check_state())
+ }
+}
+
+impl TryInto<PathBuf> for HgPathBuf {
+ type Error = std::io::Error;
+
+ fn try_into(self) -> Result<PathBuf, Self::Error> {
+ let os_str;
+ #[cfg(unix)]
+ {
+ use std::os::unix::ffi::OsStrExt;
+ os_str = std::ffi::OsStr::from_bytes(&self.inner);
+ }
+ #[cfg(windows)]
+ {
+ // TODO: convert from Windows MBCS (ANSI encoding) to WTF8.
+ // Perhaps, the return type would have to be Result<PathBuf>.
+ unimplemented!();
+ }
+
+ Ok(Path::new(os_str).to_path_buf())
+ }
+}
+
+impl TryInto<OsString> for HgPathBuf {
+ type Error = std::io::Error;
+
+ fn try_into(self) -> Result<OsString, Self::Error> {
+ let os_str;
+ #[cfg(unix)]
+ {
+ use std::os::unix::ffi::OsStrExt;
+ os_str = std::ffi::OsStr::from_bytes(&self.inner);
+ }
+ #[cfg(windows)]
+ {
+ // TODO: convert from Windows MBCS (ANSI encoding) to WTF8.
+ // Perhaps, the return type would have to be Result<PathBuf>.
+ unimplemented!()
+ }
+
+ Ok(os_str.to_os_string())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_path_states() {
+ assert_eq!(
+ Err(HgPathError::LeadingSlash),
+ HgPath::unchecked_new(b"/").check_state()
+ );
+ assert_eq!(
+ Err(HgPathError::ConsecutiveSlashes(4)),
+ HgPath::unchecked_new(b"a/b//c").check_state()
+ );
+ assert_eq!(
+ Err(HgPathError::ContainsNullByte(4)),
+ HgPath::unchecked_new(b"a/b/\0c").check_state()
+ );
+ assert_eq!(true, HgPath::new(b"").is_valid());
+ assert_eq!(true, HgPath::new(b"a/b/c").is_valid());
+ // Backslashes in paths are not significant, but allowed
+ assert_eq!(true, HgPath::new(br"a\b/c").is_valid());
+ // Dots in paths are not significant, but allowed
+ assert_eq!(true, HgPath::new(b"a/b/../c/").is_valid());
+ assert_eq!(true, HgPath::new(b"./a/b/../c/").is_valid());
+ }
+
+ #[test]
+ fn test_iter() {
+ let path = HgPath::new(b"a");
+ let mut iter = path.bytes();
+ assert_eq!(Some(b'a'), iter.next());
+ assert_eq!(None, iter.next_back());
+ assert_eq!(None, iter.next());
+
+ let path = HgPath::new(b"a");
+ let mut iter = path.bytes();
+ assert_eq!(Some(b'a'), iter.next_back());
+ assert_eq!(None, iter.next_back());
+ assert_eq!(None, iter.next());
+
+ let path = HgPath::new(b"abc");
+ let mut iter = path.bytes();
+ assert_eq!(Some(b'a'), iter.next());
+ assert_eq!(Some(b'c'), iter.next_back());
+ assert_eq!(Some(b'b'), iter.next_back());
+ assert_eq!(None, iter.next_back());
+ assert_eq!(None, iter.next());
+
+ let path = HgPath::new(b"abc");
+ let mut iter = path.bytes();
+ assert_eq!(Some(b'a'), iter.next());
+ assert_eq!(Some(b'b'), iter.next());
+ assert_eq!(Some(b'c'), iter.next());
+ assert_eq!(None, iter.next_back());
+ assert_eq!(None, iter.next());
+
+ let path = HgPath::new(b"abc");
+ let iter = path.bytes();
+ let mut vec = Vec::new();
+ vec.extend(iter);
+ assert_eq!(vec![b'a', b'b', b'c'], vec);
+
+ let path = HgPath::new(b"abc");
+ let mut iter = path.bytes();
+ assert_eq!(Some(2), iter.rposition(|c| c == b'c'));
+
+ let path = HgPath::new(b"abc");
+ let mut iter = path.bytes();
+ assert_eq!(None, iter.rposition(|c| c == b'd'));
+ }
+}
diff --git a/rust/hg-core/src/utils.rs b/rust/hg-core/src/utils.rs
--- a/rust/hg-core/src/utils.rs
+++ b/rust/hg-core/src/utils.rs
@@ -8,6 +8,7 @@
//! Contains useful functions, traits, structs, etc. for use in core.
pub mod files;
+pub mod hg_path;
/// Replaces the `from` slice with the `to` slice inside the `buf` slice.
///
To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel
More information about the Mercurial-devel
mailing list