[PATCH 4 of 4] obsolete: introduce a new binary encoding for obsmarkers (version 1)

Pierre-Yves David pierre-yves.david at ens-lyon.org
Tue Sep 16 21:28:34 CDT 2014


# HG changeset patch
# User Pierre-Yves David <pierre-yves.david at fb.com>
# Date 1410372923 -7200
#      Wed Sep 10 20:15:23 2014 +0200
# Node ID ce25ab065627b537d4ccf05e1ea959be6318414a
# Parent  78d7d968abda48db22b94f59e6b3ddc926aa9ef8
obsolete: introduce a new binary encoding for obsmarkers (version 1)

This new encoding explicitly stores the date and parents allowing a significant
faster marker decoding. See inline documentation for details.

This format is not yet used to store format on disk. But it will be used in
bundle2 exchange if both side support it. Support for on-disk format is coming
in another changesets.

diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
--- a/mercurial/obsolete.py
+++ b/mercurial/obsolete.py
@@ -206,13 +206,123 @@ def _fm0encodeonemarker(marker):
     format = _fm0fixed + (_fm0node * nbsuc)
     data = [nbsuc, len(metadata), flags, pre]
     data.extend(sucs)
     return _pack(format, *data) + metadata
 
+## Parsing and writing of version "1"
+#
+# The header is followed by the markers. Each marker is made of:
+#
+# - 1 unsigned integer: total size of the marker (including this field)
+#
+# - 1 64 bits float: date in second since epoch
+#
+# - 1 16 bits integer: timezone offset in minute
+#
+# - 1 unsigned byte: number of succesors "N", can be zero.
+#
+# - 2 byte: a bit field. It is reserved for flags used in common
+#   obsolete marker operations, to avoid repeated decoding of metadata
+#   entries.
+
+#   The highest two bits are use to encode information about number (P) of
+#   percursor's parent stored in the markers:
+#
+#     0: parents data stored but no parent,
+#     1: one parent stored,
+#     2: two parents stored,
+#     3: no parent data stored
+#
+# - 20 bytes: precursor changeset identifier.
+#
+# - N*20 bytes: successors changesets identifiers.
+#
+# - P*20 bytes: parents of the precursors changesets.
+#
+# - remaining bytes: metadata as a sequence of nul-terminated strings. Each
+#   string contains a key and a value, separated by a colon ':', without
+#   additional encoding. Keys cannot contain '\0' or ':' and values cannot
+#   contain '\0'.
+_fm1version = 1
+_fm1fixed = '>IdhBH20s'
+_fm1node = '20s'
+_fm1fsize = struct.calcsize(_fm1fixed)
+_fm1fnodesize = struct.calcsize(_fm1node)
+_fm1parentnone = 3
+_fm1parentshift = 14
+_fm1parentmask = (_fm1parentnone << _fm1parentshift)
+
+def _fm1readmarkers(data, off=0):
+    # Loop on markers
+    l = len(data)
+    while off + _fm1fsize <= l:
+        initialoff = off
+        # read fixed part
+        cur = data[off:off + _fm1fsize]
+        off += _fm1fsize
+        ttsize, seconds, tz, nbsuc, flags, prec = _unpack(_fm1fixed, cur)
+        # extract the number of parent information
+        nbpar = (flags & _fm1parentmask) >> _fm1parentshift
+        flags &= ~ _fm1parentmask
+        if nbpar == _fm1parentnone:
+            nbpar = None
+        # build the date tuple (upgrade tz minute to second)
+        date = (seconds, tz * 60)
+        # read replacement
+        sucs = ()
+        if nbsuc:
+            s = (_fm1fnodesize * nbsuc)
+            cur = data[off:off + s]
+            sucs = _unpack(_fm1node * nbsuc, cur)
+            off += s
+        # read parents
+        if nbpar is None:
+            parents = None
+        elif nbpar == 0:
+            parents = ()
+        elif nbpar:  # neither None nor zero
+            s = (_fm1fnodesize * nbpar)
+            cur = data[off:off + s]
+            parents = _unpack(_fm1node * nbpar, cur)
+            off += s
+        # read metadata
+        # (metadata will be decoded on demand)
+        #
+        # (size of medata data is total size minus all data already read)
+        mdsize = ttsize - (off - initialoff)
+        metadata = data[off:off + mdsize]
+        if len(metadata) != mdsize:
+            raise util.Abort(_('parsing obsolete marker: metadata is too '
+                               'short, %d bytes expected, got %d')
+                             % (mdsize, len(metadata)))
+        off += mdsize
+
+        yield (prec, sucs, flags, metadata, date, parents)
+
+def _fm1encodeonemarker(marker):
+    pre, sucs, flags, metadata, date, parents = marker
+    nbsuc = len(sucs)
+    nbextranodes = nbsuc
+    if parents is None:
+        nbpar = _fm1parentnone
+    else:
+        nbpar = len(parents)
+        nbextranodes += nbpar
+    flags |= (nbpar << _fm1parentshift)
+    format = _fm1fixed + (_fm1node * nbextranodes)
+    totalsize = struct.calcsize(format) + len(metadata)
+    # tz is store in minute so we divide by 60
+    data = [totalsize, date[0], date[1]//60, nbsuc, flags, pre]
+    data.extend(sucs)
+    if parents is not None:
+        data.extend(parents)
+    return _pack(format, *data) + metadata
+
 # mapping to read/write various marker formats
 # <version> -> (decoder, encoder)
-formats = {_fm0version: (_fm0readmarkers, _fm0encodeonemarker)}
+formats = {_fm0version: (_fm0readmarkers, _fm0encodeonemarker),
+           _fm1version: (_fm1readmarkers, _fm1encodeonemarker)}
 
 def _readmarkers(data):
     """Read and enumerate markers from raw data"""
     off = 0
     diskversion = _unpack('>B', data[off:off + 1])[0]


More information about the Mercurial-devel mailing list