[PATCH 1 of 8] obsolete: introduction of obsolete markers

pierre-yves.david at logilab.fr pierre-yves.david at logilab.fr
Thu Jun 7 12:24:52 CDT 2012


# HG changeset patch
# User Pierre-Yves.David at ens-lyon.org
# Date 1339088859 -7200
# Node ID 83d66f9957e9f73404c2c776fc3dbe5dce1c18e8
# Parent  2255950e1f7663a9faa6b57040cc5c0debe7d4dd
obsolete: introduction of obsolete markers

Markers are stored as binary records in a log structured file in
.hg/store/obsstore.

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -5,11 +5,11 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
 from node import bin, hex, nullid, nullrev, short
 from i18n import _
-import repo, changegroup, subrepo, discovery, pushkey
+import repo, changegroup, subrepo, discovery, pushkey, obsolete
 import changelog, dirstate, filelog, manifest, context, bookmarks, phases
 import lock, transaction, store, encoding
 import scmutil, util, extensions, hook, error, revset
 import match as matchmod
 import merge as mergemod
@@ -190,10 +190,18 @@ class localrepository(repo.repository):
 
     @storecache('phaseroots')
     def _phasecache(self):
         return phases.phasecache(self, self._phasedefaults)
 
+    @storecache('obsstore')
+    def obsstore(self):
+        store = obsolete.obsstore()
+        data = self.sopener.tryread('obsstore')
+        if data:
+            store.loadmarkers(data)
+        return store
+
     @storecache('00changelog.i')
     def changelog(self):
         c = changelog.changelog(self.sopener)
         if 'HG_PENDING' in os.environ:
             p = os.environ['HG_PENDING']
@@ -941,10 +949,20 @@ class localrepository(repo.repository):
 
         def unlock():
             self.store.write()
             if '_phasecache' in vars(self):
                 self._phasecache.write()
+            if 'obsstore' in vars(self) and self.obsstore._new:
+                # XXX: transaction logic should be used here. But for
+                # now rewriting the whole file is good enough.
+                f = self.sopener('obsstore', 'wb', atomictemp=True)
+                try:
+                    self.obsstore.flushmarkers(f)
+                    f.close()
+                except: # re-raises
+                    f.discard()
+                    raise
             for k, ce in self._filecache.items():
                 if k == 'dirstate':
                     continue
                 ce.refresh()
 
diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
new file mode 100644
--- /dev/null
+++ b/mercurial/obsolete.py
@@ -0,0 +1,175 @@
+# obsolete.py - obsolete markers handling
+#
+# Copyright 2012 Pierre-Yves David <pierre-yves.david at ens-lyon.org>
+#                Logilab SA        <contact at logilab.fr>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Obsolete markers handling
+
+An obsolete marker maps an old changeset to a list of new
+changesets. If the list of new changesets is empty, the old changeset
+is said to be "killed". Otherwise, the old changeset is being
+"replaced" by the new changesets.
+
+Obsolete markers can be used to record and distribute changeset graph
+transformations performed by history rewriting operations, and help
+building new tools to reconciliate conflicting rewriting actions. To
+facilitate conflicts resolution, markers include various annotations
+besides old and news changeset identifiers, such as creation date or
+author name.
+
+
+Format
+------
+
+Markers are stored in an append-only file stored in
+'.hg/store/obsstore'.
+
+The file starts with a version header:
+
+- 1 unsigned byte: version number, starting at zero.
+
+
+The header is followed by the markers. Each marker is made of:
+
+- 1 unsigned byte: number of new changesets "R", could be zero.
+
+- 1 unsigned 32-bits integer: metadata size "M" in bytes.
+
+- 1 byte: a bit field. It is reserved for flags used in obsolete
+  markers common operations, to avoid repeated decoding of metadata
+  entries.
+
+- 20 bytes: obsoleted changeset identifier.
+
+- N*20 bytes: new changesets identifiers.
+
+- M bytes: metadata as a sequence of nul-terminated strings. Each
+  string contains a key and a value, separated by a color ':', without
+  additional encoding. Keys cannot contain '\0' or ':' and values
+  cannot contain '\0'.
+"""
+import struct
+from mercurial import util
+from i18n import _
+
+_pack = struct.pack
+_unpack = struct.unpack
+
+
+
+# data used for parsing and writing
+_fmversion = 0
+_fmfixed   = '>BIB20s'
+_fmnode = '20s'
+_fmfsize = struct.calcsize(_fmfixed)
+_fnodesize = struct.calcsize(_fmnode)
+
+def _readmarkers(data):
+    """Read and enumerate markers from raw data"""
+    off = 0
+    diskversion = _unpack('>B', data[off:off + 1])[0]
+    off += 1
+    if diskversion != _fmversion:
+        raise util.Abort(_('parsing obsolete marker: unknown version %r')
+                         % diskversion)
+
+    # Loop on markers
+    l = len(data)
+    while off + _fmfsize <= l:
+        # read fixed part
+        cur = data[off:off + _fmfsize]
+        off += _fmfsize
+        nbsuc, mdsize, flags, pre = _unpack(_fmfixed, cur)
+        # read replacement
+        sucs = ()
+        if nbsuc:
+            s = (_fnodesize * nbsuc)
+            cur = data[off:off + s]
+            sucs = _unpack(_fmnode * nbsuc, cur)
+            off += s
+        # read metadata
+        # (metadata will be decoded on demand)
+        metadata = data[off:off + mdsize]
+        if len(metadata) != mdsize:
+            raise util.Abort(_('parsing obsolete marker: metadata is too '
+                               'short, %d bytes expected, got %d')
+                             % (len(metadata), mdsize))
+        off += mdsize
+        yield (pre, sucs, flags, metadata)
+
+def encodemeta(meta):
+    """Return encoded metadata string to string mapping.
+
+    Assume no ':' in key and no '\0' in both key and value."""
+    for key, value in meta.iteritems():
+        if ':' in key or '\0' in key:
+            raise ValueError("':' and '\0' are forbidden in metadata key'")
+        if '\0' in value:
+            raise ValueError("':' are forbidden in metadata value'")
+    return '\0'.join(['%s:%s' % (k, meta[k]) for k in sorted(meta)])
+
+def decodemeta(data):
+    """Return string to string dictionary from encoded version."""
+    d = {}
+    for l in data.split('\0'):
+        if l:
+            key, value = l.split(':')
+            d[key] = value
+    return d
+
+class obsstore(object):
+    """Store obsolete markers
+
+    Markers can be accessed with two mappings:
+    - precursors: old -> set(new)
+    - successors: new -> set(old)
+    """
+
+    def __init__(self):
+        self._all = []
+        # new markers to serialize
+        self._new = []
+        self.precursors = {}
+        self.successors = {}
+
+    def add(self, marker):
+        """Add a new marker to the store
+
+        This marker still needs to be written to disk"""
+        self._new.append(marker)
+        self._load(marker)
+
+    def loadmarkers(self, data):
+        """Load all markers in data, mark them as known."""
+        for marker in _readmarkers(data):
+            self._load(marker)
+
+    def flushmarkers(self, stream):
+        """Write all markers to a stream
+
+        After this operation, "new" markers are considered "known"."""
+        self._writemarkers(stream)
+        self._new[:] = []
+
+    def _load(self, marker):
+        self._all.append(marker)
+        pre, sucs = marker[:2]
+        self.precursors.setdefault(pre, set()).add(marker)
+        for suc in sucs:
+            self.successors.setdefault(suc, set()).add(marker)
+
+    def _writemarkers(self, stream):
+        # Kept separate from flushmarkers(), it will be reused for
+        # markers exchange.
+        stream.write(_pack('>B', _fmversion))
+        for marker in self._all:
+            pre, sucs, flags, metadata = marker
+            nbsuc = len(sucs)
+            format = _fmfixed + (_fmnode * nbsuc)
+            data = [nbsuc, len(metadata), flags, pre]
+            data.extend(sucs)
+            stream.write(_pack(format, *data))
+            stream.write(metadata)


More information about the Mercurial-devel mailing list