[PATCH 1 of 2] merge: separate read-only mergestate into separate module

Simon Farnsworth simonfar at fb.com
Tue Mar 1 11:07:47 EST 2016


# HG changeset patch
# User Simon Farnsworth <simonfar at fb.com>
# Date 1456843967 0
#      Tue Mar 01 14:52:47 2016 +0000
# Node ID 37fe1f9d08245f7540cb6137c312ae30dbcde688
# Parent  c7f89ad87baef87f00c507545dfd4cc824bc3131
merge: separate read-only mergestate into separate module

This is code motion to prepare for revset predicates that read the
mergestate; without this code motion, there would be an import loop.

diff --git a/mercurial/merge.py b/mercurial/merge.py
--- a/mercurial/merge.py
+++ b/mercurial/merge.py
@@ -14,7 +14,6 @@
 
 from .i18n import _
 from .node import (
-    bin,
     hex,
     nullhex,
     nullid,
@@ -25,6 +24,7 @@
     destutil,
     error,
     filemerge,
+    mergestate as mergestatemod,
     obsolete,
     scmutil,
     subrepo,
@@ -33,52 +33,8 @@
 )
 
 _pack = struct.pack
-_unpack = struct.unpack
 
-def _droponode(data):
-    # used for compatibility for v1
-    bits = data.split('\0')
-    bits = bits[:-2] + bits[-1:]
-    return '\0'.join(bits)
-
-class mergestate(object):
-    '''track 3-way merge state of individual files
-
-    The merge state is stored on disk when needed. Two files are used: one with
-    an old format (version 1), and one with a new format (version 2). Version 2
-    stores a superset of the data in version 1, including new kinds of records
-    in the future. For more about the new format, see the documentation for
-    `_readrecordsv2`.
-
-    Each record can contain arbitrary content, and has an associated type. This
-    `type` should be a letter. If `type` is uppercase, the record is mandatory:
-    versions of Mercurial that don't support it should abort. If `type` is
-    lowercase, the record can be safely ignored.
-
-    Currently known records:
-
-    L: the node of the "local" part of the merge (hexified version)
-    O: the node of the "other" part of the merge (hexified version)
-    F: a file to be merged entry
-    C: a change/delete or delete/change conflict
-    D: a file that the external merge driver will merge internally
-       (experimental)
-    m: the external merge driver defined for this merge plus its run state
-       (experimental)
-    f: a (filename, dictonary) tuple of optional values for a given file
-    X: unsupported mandatory record type (used in tests)
-    x: unsupported advisory record type (used in tests)
-
-    Merge driver run states (experimental):
-    u: driver-resolved files unmarked -- needs to be run next time we're about
-       to resolve or commit
-    m: driver-resolved files marked -- only needs to be run before commit
-    s: success/skipped -- does not need to be run any more
-
-    '''
-    statepathv1 = 'merge/state'
-    statepathv2 = 'merge/state2'
-
+class mergestate(mergestatemod.mergestatereadonly):
     @staticmethod
     def clean(repo, node=None, other=None):
         """Initialize a brand new merge state, removing any existing state on
@@ -94,13 +50,6 @@
         ms._read()
         return ms
 
-    def __init__(self, repo):
-        """Initialize the merge state.
-
-        Do not use this directly! Instead call read() or clean()."""
-        self._repo = repo
-        self._dirty = False
-
     def reset(self, node=None, other=None):
         self._state = {}
         self._stateextras = {}
@@ -121,211 +70,6 @@
         self._results = {}
         self._dirty = False
 
-    def _read(self):
-        """Analyse each record content to restore a serialized state from disk
-
-        This function process "record" entry produced by the de-serialization
-        of on disk file.
-        """
-        self._state = {}
-        self._stateextras = {}
-        self._local = None
-        self._other = None
-        for var in ('localctx', 'otherctx'):
-            if var in vars(self):
-                delattr(self, var)
-        self._readmergedriver = None
-        self._mdstate = 's'
-        unsupported = set()
-        records = self._readrecords()
-        for rtype, record in records:
-            if rtype == 'L':
-                self._local = bin(record)
-            elif rtype == 'O':
-                self._other = bin(record)
-            elif rtype == 'm':
-                bits = record.split('\0', 1)
-                mdstate = bits[1]
-                if len(mdstate) != 1 or mdstate not in 'ums':
-                    # the merge driver should be idempotent, so just rerun it
-                    mdstate = 'u'
-
-                self._readmergedriver = bits[0]
-                self._mdstate = mdstate
-            elif rtype in 'FDC':
-                bits = record.split('\0')
-                self._state[bits[0]] = bits[1:]
-            elif rtype == 'f':
-                filename, rawextras = record.split('\0', 1)
-                extraparts = rawextras.split('\0')
-                extras = {}
-                i = 0
-                while i < len(extraparts):
-                    extras[extraparts[i]] = extraparts[i + 1]
-                    i += 2
-
-                self._stateextras[filename] = extras
-            elif not rtype.islower():
-                unsupported.add(rtype)
-        self._results = {}
-        self._dirty = False
-
-        if unsupported:
-            raise error.UnsupportedMergeRecords(unsupported)
-
-    def _readrecords(self):
-        """Read merge state from disk and return a list of record (TYPE, data)
-
-        We read data from both v1 and v2 files and decide which one to use.
-
-        V1 has been used by version prior to 2.9.1 and contains less data than
-        v2. We read both versions and check if no data in v2 contradicts
-        v1. If there is not contradiction we can safely assume that both v1
-        and v2 were written at the same time and use the extract data in v2. If
-        there is contradiction we ignore v2 content as we assume an old version
-        of Mercurial has overwritten the mergestate file and left an old v2
-        file around.
-
-        returns list of record [(TYPE, data), ...]"""
-        v1records = self._readrecordsv1()
-        v2records = self._readrecordsv2()
-        if self._v1v2match(v1records, v2records):
-            return v2records
-        else:
-            # v1 file is newer than v2 file, use it
-            # we have to infer the "other" changeset of the merge
-            # we cannot do better than that with v1 of the format
-            mctx = self._repo[None].parents()[-1]
-            v1records.append(('O', mctx.hex()))
-            # add place holder "other" file node information
-            # nobody is using it yet so we do no need to fetch the data
-            # if mctx was wrong `mctx[bits[-2]]` may fails.
-            for idx, r in enumerate(v1records):
-                if r[0] == 'F':
-                    bits = r[1].split('\0')
-                    bits.insert(-2, '')
-                    v1records[idx] = (r[0], '\0'.join(bits))
-            return v1records
-
-    def _v1v2match(self, v1records, v2records):
-        oldv2 = set() # old format version of v2 record
-        for rec in v2records:
-            if rec[0] == 'L':
-                oldv2.add(rec)
-            elif rec[0] == 'F':
-                # drop the onode data (not contained in v1)
-                oldv2.add(('F', _droponode(rec[1])))
-        for rec in v1records:
-            if rec not in oldv2:
-                return False
-        else:
-            return True
-
-    def _readrecordsv1(self):
-        """read on disk merge state for version 1 file
-
-        returns list of record [(TYPE, data), ...]
-
-        Note: the "F" data from this file are one entry short
-              (no "other file node" entry)
-        """
-        records = []
-        try:
-            f = self._repo.vfs(self.statepathv1)
-            for i, l in enumerate(f):
-                if i == 0:
-                    records.append(('L', l[:-1]))
-                else:
-                    records.append(('F', l[:-1]))
-            f.close()
-        except IOError as err:
-            if err.errno != errno.ENOENT:
-                raise
-        return records
-
-    def _readrecordsv2(self):
-        """read on disk merge state for version 2 file
-
-        This format is a list of arbitrary records of the form:
-
-          [type][length][content]
-
-        `type` is a single character, `length` is a 4 byte integer, and
-        `content` is an arbitrary byte sequence of length `length`.
-
-        Mercurial versions prior to 3.7 have a bug where if there are
-        unsupported mandatory merge records, attempting to clear out the merge
-        state with hg update --clean or similar aborts. The 't' record type
-        works around that by writing out what those versions treat as an
-        advisory record, but later versions interpret as special: the first
-        character is the 'real' record type and everything onwards is the data.
-
-        Returns list of records [(TYPE, data), ...]."""
-        records = []
-        try:
-            f = self._repo.vfs(self.statepathv2)
-            data = f.read()
-            off = 0
-            end = len(data)
-            while off < end:
-                rtype = data[off]
-                off += 1
-                length = _unpack('>I', data[off:(off + 4)])[0]
-                off += 4
-                record = data[off:(off + length)]
-                off += length
-                if rtype == 't':
-                    rtype, record = record[0], record[1:]
-                records.append((rtype, record))
-            f.close()
-        except IOError as err:
-            if err.errno != errno.ENOENT:
-                raise
-        return records
-
-    @util.propertycache
-    def mergedriver(self):
-        # protect against the following:
-        # - A configures a malicious merge driver in their hgrc, then
-        #   pauses the merge
-        # - A edits their hgrc to remove references to the merge driver
-        # - A gives a copy of their entire repo, including .hg, to B
-        # - B inspects .hgrc and finds it to be clean
-        # - B then continues the merge and the malicious merge driver
-        #  gets invoked
-        configmergedriver = self._repo.ui.config('experimental', 'mergedriver')
-        if (self._readmergedriver is not None
-            and self._readmergedriver != configmergedriver):
-            raise error.ConfigError(
-                _("merge driver changed since merge started"),
-                hint=_("revert merge driver change or abort merge"))
-
-        return configmergedriver
-
-    @util.propertycache
-    def localctx(self):
-        if self._local is None:
-            raise RuntimeError("localctx accessed but self._local isn't set")
-        return self._repo[self._local]
-
-    @util.propertycache
-    def otherctx(self):
-        if self._other is None:
-            raise RuntimeError("otherctx accessed but self._other isn't set")
-        return self._repo[self._other]
-
-    def active(self):
-        """Whether mergestate is active.
-
-        Returns True if there appears to be mergestate. This is a rough proxy
-        for "is a merge in progress."
-        """
-        # Check local variables before looking at filesystem for performance
-        # reasons.
-        return bool(self._local) or bool(self._state) or \
-               self._repo.vfs.exists(self.statepathv1) or \
-               self._repo.vfs.exists(self.statepathv2)
-
     def commit(self):
         """Write current state on disk (if necessary)"""
         if self._dirty:
@@ -369,7 +113,7 @@
         f.write(hex(self._local) + '\n')
         for rtype, data in irecords:
             if rtype == 'F':
-                f.write('%s\n' % _droponode(data))
+                f.write('%s\n' % mergestatemod._droponode(data))
         f.close()
 
     def _writerecordsv2(self, records):
@@ -408,39 +152,10 @@
         self._stateextras[fd] = { 'ancestorlinknode' : hex(fca.node()) }
         self._dirty = True
 
-    def __contains__(self, dfile):
-        return dfile in self._state
-
-    def __getitem__(self, dfile):
-        return self._state[dfile][0]
-
-    def __iter__(self):
-        return iter(sorted(self._state))
-
-    def files(self):
-        return self._state.keys()
-
     def mark(self, dfile, state):
         self._state[dfile][0] = state
         self._dirty = True
 
-    def mdstate(self):
-        return self._mdstate
-
-    def unresolved(self):
-        """Obtain the paths of unresolved files."""
-
-        for f, entry in self._state.items():
-            if entry[0] == 'u':
-                yield f
-
-    def driverresolved(self):
-        """Obtain the paths of driver-resolved files."""
-
-        for f, entry in self._state.items():
-            if entry[0] == 'd':
-                yield f
-
     def extras(self, filename):
         return self._stateextras.setdefault(filename, {})
 
@@ -534,34 +249,6 @@
 
         Returns the exit code of the merge."""
         return self._resolve(False, dfile, wctx, labels=labels)[1]
-
-    def counts(self):
-        """return counts for updated, merged and removed files in this
-        session"""
-        updated, merged, removed = 0, 0, 0
-        for r, action in self._results.itervalues():
-            if r is None:
-                updated += 1
-            elif r == 0:
-                if action == 'r':
-                    removed += 1
-                else:
-                    merged += 1
-        return updated, merged, removed
-
-    def unresolvedcount(self):
-        """get unresolved count for this merge (persistent)"""
-        return len([True for f, entry in self._state.iteritems()
-                    if entry[0] == 'u'])
-
-    def actions(self):
-        """return lists of actions to perform on the dirstate"""
-        actions = {'r': [], 'f': [], 'a': [], 'am': [], 'g': []}
-        for f, (r, action) in self._results.iteritems():
-            if action is not None:
-                actions[action].append((f, None, "merge result"))
-        return actions
-
     def recordactions(self):
         """record remove/add/get actions in the dirstate"""
         branchmerge = self._repo.dirstate.p2() != nullid
diff --git a/mercurial/mergestate.py b/mercurial/mergestate.py
new file mode 100644
--- /dev/null
+++ b/mercurial/mergestate.py
@@ -0,0 +1,343 @@
+# mergestate.py - on-disk merge state for Mercurial
+#
+# Copyright 2006, 2007 Matt Mackall <mpm at selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import errno
+import struct
+
+from .i18n import _
+from .node import bin
+from . import (
+    error,
+    util,
+)
+
+_unpack = struct.unpack
+
+def _droponode(data):
+    # used for compatibility for v1
+    bits = data.split('\0')
+    bits = bits[:-2] + bits[-1:]
+    return '\0'.join(bits)
+
+class mergestatereadonly(object):
+    '''track 3-way merge state of individual files
+
+    The merge state is stored on disk when needed. Two files are used: one with
+    an old format (version 1), and one with a new format (version 2). Version 2
+    stores a superset of the data in version 1, including new kinds of records
+    in the future. For more about the new format, see the documentation for
+    `_readrecordsv2`.
+
+    Each record can contain arbitrary content, and has an associated type. This
+    `type` should be a letter. If `type` is uppercase, the record is mandatory:
+    versions of Mercurial that don't support it should abort. If `type` is
+    lowercase, the record can be safely ignored.
+
+    Currently known records:
+
+    L: the node of the "local" part of the merge (hexified version)
+    O: the node of the "other" part of the merge (hexified version)
+    F: a file to be merged entry
+    C: a change/delete or delete/change conflict
+    D: a file that the external merge driver will merge internally
+       (experimental)
+    m: the external merge driver defined for this merge plus its run state
+       (experimental)
+    f: a (filename, dictonary) tuple of optional values for a given file
+    X: unsupported mandatory record type (used in tests)
+    x: unsupported advisory record type (used in tests)
+
+    Merge driver run states (experimental):
+    u: driver-resolved files unmarked -- needs to be run next time we're about
+       to resolve or commit
+    m: driver-resolved files marked -- only needs to be run before commit
+    s: success/skipped -- does not need to be run any more
+
+    '''
+    @staticmethod
+    def read(repo):
+        """Initialize the merge state, reading it from disk."""
+        ms = mergestatereadonly(repo)
+        ms._read()
+        return ms
+
+    def __init__(self, repo):
+        """Initialize the merge state.
+
+        Do not use this directly! Instead call read() or clean()."""
+        self._repo = repo
+        self._dirty = False
+
+    statepathv1 = 'merge/state'
+    statepathv2 = 'merge/state2'
+
+    def _read(self):
+        """Analyse each record content to restore a serialized state from disk
+
+        This function process "record" entry produced by the de-serialization
+        of on disk file.
+        """
+        self._state = {}
+        self._stateextras = {}
+        self._local = None
+        self._other = None
+        for var in ('localctx', 'otherctx'):
+            if var in vars(self):
+                delattr(self, var)
+        self._readmergedriver = None
+        self._mdstate = 's'
+        unsupported = set()
+        records = self._readrecords()
+        for rtype, record in records:
+            if rtype == 'L':
+                self._local = bin(record)
+            elif rtype == 'O':
+                self._other = bin(record)
+            elif rtype == 'm':
+                bits = record.split('\0', 1)
+                mdstate = bits[1]
+                if len(mdstate) != 1 or mdstate not in 'ums':
+                    # the merge driver should be idempotent, so just rerun it
+                    mdstate = 'u'
+
+                self._readmergedriver = bits[0]
+                self._mdstate = mdstate
+            elif rtype in 'FDC':
+                bits = record.split('\0')
+                self._state[bits[0]] = bits[1:]
+            elif rtype == 'f':
+                filename, rawextras = record.split('\0', 1)
+                extraparts = rawextras.split('\0')
+                extras = {}
+                i = 0
+                while i < len(extraparts):
+                    extras[extraparts[i]] = extraparts[i + 1]
+                    i += 2
+
+                self._stateextras[filename] = extras
+            elif not rtype.islower():
+                unsupported.add(rtype)
+        self._results = {}
+        self._dirty = False
+
+        if unsupported:
+            raise error.UnsupportedMergeRecords(unsupported)
+
+    def _readrecords(self):
+        """Read merge state from disk and return a list of record (TYPE, data)
+
+        We read data from both v1 and v2 files and decide which one to use.
+
+        V1 has been used by version prior to 2.9.1 and contains less data than
+        v2. We read both versions and check if no data in v2 contradicts
+        v1. If there is not contradiction we can safely assume that both v1
+        and v2 were written at the same time and use the extract data in v2. If
+        there is contradiction we ignore v2 content as we assume an old version
+        of Mercurial has overwritten the mergestate file and left an old v2
+        file around.
+
+        returns list of record [(TYPE, data), ...]"""
+        v1records = self._readrecordsv1()
+        v2records = self._readrecordsv2()
+        if self._v1v2match(v1records, v2records):
+            return v2records
+        else:
+            # v1 file is newer than v2 file, use it
+            # we have to infer the "other" changeset of the merge
+            # we cannot do better than that with v1 of the format
+            mctx = self._repo[None].parents()[-1]
+            v1records.append(('O', mctx.hex()))
+            # add place holder "other" file node information
+            # nobody is using it yet so we do no need to fetch the data
+            # if mctx was wrong `mctx[bits[-2]]` may fails.
+            for idx, r in enumerate(v1records):
+                if r[0] == 'F':
+                    bits = r[1].split('\0')
+                    bits.insert(-2, '')
+                    v1records[idx] = (r[0], '\0'.join(bits))
+            return v1records
+
+    def _v1v2match(self, v1records, v2records):
+        oldv2 = set() # old format version of v2 record
+        for rec in v2records:
+            if rec[0] == 'L':
+                oldv2.add(rec)
+            elif rec[0] == 'F':
+                # drop the onode data (not contained in v1)
+                oldv2.add(('F', _droponode(rec[1])))
+        for rec in v1records:
+            if rec not in oldv2:
+                return False
+        else:
+            return True
+
+    def _readrecordsv1(self):
+        """read on disk merge state for version 1 file
+
+        returns list of record [(TYPE, data), ...]
+
+        Note: the "F" data from this file are one entry short
+              (no "other file node" entry)
+        """
+        records = []
+        try:
+            f = self._repo.vfs(self.statepathv1)
+            for i, l in enumerate(f):
+                if i == 0:
+                    records.append(('L', l[:-1]))
+                else:
+                    records.append(('F', l[:-1]))
+            f.close()
+        except IOError as err:
+            if err.errno != errno.ENOENT:
+                raise
+        return records
+
+    def _readrecordsv2(self):
+        """read on disk merge state for version 2 file
+
+        This format is a list of arbitrary records of the form:
+
+          [type][length][content]
+
+        `type` is a single character, `length` is a 4 byte integer, and
+        `content` is an arbitrary byte sequence of length `length`.
+
+        Mercurial versions prior to 3.7 have a bug where if there are
+        unsupported mandatory merge records, attempting to clear out the merge
+        state with hg update --clean or similar aborts. The 't' record type
+        works around that by writing out what those versions treat as an
+        advisory record, but later versions interpret as special: the first
+        character is the 'real' record type and everything onwards is the data.
+
+        Returns list of records [(TYPE, data), ...]."""
+        records = []
+        try:
+            f = self._repo.vfs(self.statepathv2)
+            data = f.read()
+            off = 0
+            end = len(data)
+            while off < end:
+                rtype = data[off]
+                off += 1
+                length = _unpack('>I', data[off:(off + 4)])[0]
+                off += 4
+                record = data[off:(off + length)]
+                off += length
+                if rtype == 't':
+                    rtype, record = record[0], record[1:]
+                records.append((rtype, record))
+            f.close()
+        except IOError as err:
+            if err.errno != errno.ENOENT:
+                raise
+        return records
+
+    @util.propertycache
+    def mergedriver(self):
+        # protect against the following:
+        # - A configures a malicious merge driver in their hgrc, then
+        #   pauses the merge
+        # - A edits their hgrc to remove references to the merge driver
+        # - A gives a copy of their entire repo, including .hg, to B
+        # - B inspects .hgrc and finds it to be clean
+        # - B then continues the merge and the malicious merge driver
+        #  gets invoked
+        configmergedriver = self._repo.ui.config('experimental', 'mergedriver')
+        if (self._readmergedriver is not None
+            and self._readmergedriver != configmergedriver):
+            raise error.ConfigError(
+                _("merge driver changed since merge started"),
+                hint=_("revert merge driver change or abort merge"))
+
+        return configmergedriver
+
+    @util.propertycache
+    def localctx(self):
+        if self._local is None:
+            raise RuntimeError("localctx accessed but self._local isn't set")
+        return self._repo[self._local]
+
+    @util.propertycache
+    def otherctx(self):
+        if self._other is None:
+            raise RuntimeError("otherctx accessed but self._other isn't set")
+        return self._repo[self._other]
+
+    def active(self):
+        """Whether mergestate is active.
+
+        Returns True if there appears to be mergestate. This is a rough proxy
+        for "is a merge in progress."
+        """
+        # Check local variables before looking at filesystem for performance
+        # reasons.
+        return bool(self._local) or bool(self._state) or \
+               self._repo.vfs.exists(self.statepathv1) or \
+               self._repo.vfs.exists(self.statepathv2)
+
+    def __contains__(self, dfile):
+        return dfile in self._state
+
+    def __getitem__(self, dfile):
+        return self._state[dfile][0]
+
+    def __iter__(self):
+        return iter(sorted(self._state))
+
+    def files(self):
+        return self._state.keys()
+
+    def mdstate(self):
+        return self._mdstate
+
+    def unresolved(self):
+        """Obtain the paths of unresolved files."""
+
+        for f, entry in self._state.items():
+            if entry[0] == 'u':
+                yield f
+
+    def driverresolved(self):
+        """Obtain the paths of driver-resolved files."""
+
+        for f, entry in self._state.items():
+            if entry[0] == 'd':
+                yield f
+
+    def extras(self, filename):
+        return self._stateextras.get(filename, {})
+
+    def counts(self):
+        """return counts for updated, merged and removed files in this
+        session"""
+        updated, merged, removed = 0, 0, 0
+        for r, action in self._results.itervalues():
+            if r is None:
+                updated += 1
+            elif r == 0:
+                if action == 'r':
+                    removed += 1
+                else:
+                    merged += 1
+        return updated, merged, removed
+
+    def unresolvedcount(self):
+        """get unresolved count for this merge (persistent)"""
+        return len([True for f, entry in self._state.iteritems()
+                    if entry[0] == 'u'])
+
+    def actions(self):
+        """return lists of actions to perform on the dirstate"""
+        actions = {'r': [], 'f': [], 'a': [], 'am': [], 'g': []}
+        for f, (r, action) in self._results.iteritems():
+            if action is not None:
+                actions[action].append((f, None, "merge result"))
+        return actions
+


More information about the Mercurial-devel mailing list