D7796: rust-nodemap: input/output primitives

gracinet (Georges Racinet) phabricator at mercurial-scm.org
Mon Jan 6 19:26:24 UTC 2020


gracinet created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  These allow to initiate a `NodeTree` from an immutable opaque
  sequence of bytes, which could be passed over from Python
  (extracted from a `PyBuffer`) or directly mmapped from a file.
  
  Conversely, we can consume
  a `NodeTree`, extracting the bytes that express what
  has been added to the immutable part, together with the
  original immutable part.
  This gives callers the choice to start a new Nodetree.
  After writing to disk, some would prefer to reread for
  best guarantees (very cheap if mmapping), some others will
  find it more convenient to grow the memory that was considered
  immutable in the `NodeTree` and continue from there.
  
  In `load_bytes`, we anticipate a bit on the file format for
  the final version, allowing an offset for fixed data at the
  beginning of the file.
  
  This is enough to build examples running on real data and
  start gathering performance hints.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D7796

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS

diff --git a/rust/hg-core/src/revlog/nodemap.rs b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -17,8 +17,10 @@
     RevlogIndex,
 };
 use std::fmt;
+use std::mem;
 use std::ops::Deref;
 use std::ops::Index;
+use std::slice;
 
 #[derive(Debug, PartialEq)]
 pub enum NodeMapError {
@@ -132,6 +134,8 @@
 #[derive(Clone, PartialEq)]
 pub struct Block([RawElement; 16]);
 
+pub const BLOCK_SIZE: usize = mem::size_of::<Block>();
+
 impl Block {
     fn new() -> Self {
         Block([-1; 16])
@@ -221,6 +225,57 @@
         }
     }
 
+    /// Create from an opaque bunch of bytes
+    ///
+    /// The created `NodeTreeBytes` is taken after the fixed `offset` from
+    /// `buffer`, of which exactly `amount` bytes are used.
+    ///
+    /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
+    /// - `offset` allows for the final file format to include fixed data
+    ///   (generation number, behavioural flags)
+    /// - `amount` is expressed in bytes, and is not automatically derived from
+    ///   `bytes`, so that a caller that manages them atomically can perform
+    ///   temporary disk serializations and still rollback easily if needed.
+    ///   First use-case for this would be to support Mercurial shell hooks.
+    ///
+    /// panics if `buffer` is smaller than `offset + amount`
+    pub fn load_bytes(
+        bytes: Box<dyn Deref<Target = [u8]> + Send>,
+        offset: usize,
+        amount: usize,
+    ) -> Self {
+        NodeTree::new(Box::new(NodeTreeBytes::new(bytes, offset, amount)))
+    }
+
+    /// Retrieve added `Block` and the original immutable data
+    pub fn into_readonly_and_added(
+        self,
+    ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
+        let mut vec = self.growable;
+        let readonly = self.readonly;
+        if readonly.last() != Some(&self.root) {
+            vec.push(self.root);
+        }
+        (readonly, vec)
+    }
+
+    /// Retrieve added `Blocks` as bytes, ready to be written to persistent
+    /// storage
+    pub fn into_readonly_and_added_bytes(
+        self,
+    ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
+        let (readonly, vec) = self.into_readonly_and_added();
+        let bytes = unsafe {
+            Vec::from_raw_parts(
+                vec.as_ptr() as *mut u8,
+                vec.len() * BLOCK_SIZE,
+                vec.capacity() * BLOCK_SIZE,
+            )
+        };
+        mem::forget(vec);
+        (readonly, bytes)
+    }
+
     /// Total number of blocks
     fn len(&self) -> usize {
         self.readonly.len() + self.growable.len() + 1
@@ -366,6 +421,42 @@
     }
 }
 
+pub struct NodeTreeBytes {
+    buffer: Box<dyn Deref<Target = [u8]> + Send>,
+    offset: usize,
+    len_in_blocks: usize,
+}
+
+impl NodeTreeBytes {
+    fn new(
+        buffer: Box<dyn Deref<Target = [u8]> + Send>,
+        offset: usize,
+        amount: usize,
+    ) -> Self {
+        assert!(buffer.len() >= offset + amount);
+        let len_in_blocks = amount / BLOCK_SIZE;
+        NodeTreeBytes {
+            buffer,
+            offset,
+            len_in_blocks,
+        }
+    }
+}
+
+impl Deref for NodeTreeBytes {
+    type Target = [Block];
+
+    fn deref(&self) -> &[Block] {
+        unsafe {
+            slice::from_raw_parts(
+                (&self.buffer).as_ptr().offset(self.offset as isize)
+                    as *const Block,
+                self.len_in_blocks,
+            )
+        }
+    }
+}
+
 struct NodeTreeVisitor<'n, 'p> {
     nt: &'n NodeTree,
     prefix: NodePrefixRef<'p>,
@@ -710,4 +801,31 @@
 
         Ok(())
     }
+
+    #[test]
+    fn test_into_added_empty() {
+        assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
+        assert!(sample_nodetree()
+            .into_readonly_and_added_bytes()
+            .1
+            .is_empty());
+    }
+
+    #[test]
+    fn test_into_added_bytes() -> Result<(), NodeMapError> {
+        let mut idx = TestNtIndex::new();
+        idx.insert(0, "1234")?;
+        let mut idx = idx.commit();
+        idx.insert(4, "cafe")?;
+        let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
+
+        // only the root block has been changed
+        assert_eq!(bytes.len(), BLOCK_SIZE);
+        // big endian for -2
+        assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
+        // big endian for -6
+        assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
+        Ok(())
+    }
+
 }



To: gracinet, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel


More information about the Mercurial-devel mailing list