[PATCH 4 of 8 V2] util: declare wire protocol support of compression engines

Gregory Szorc gregory.szorc at gmail.com
Tue Nov 29 01:58:21 EST 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1480395436 28800
#      Mon Nov 28 20:57:16 2016 -0800
# Node ID 52cbc32c11454f5a72ea233ea49fed41eaec6407
# Parent  2540270e3fab858be2f20fc30ee2011600bdbee9
util: declare wire protocol support of compression engines

This patch implements a new compression engine API allowing
compression engines to declare support for the wire protocol.

Support is declared by returning a character compression format
string identifier that will be added to payloads to signal the
compression type of data that follows and default integer
priorities of the engine.

Accessor methods have been added to the compression engine manager
class to facilitate use.

Note that the "none" and "bz2" engines declare wire protocol support
but aren't enabled by default due to their priorities being 0. It
is essentially free from a coding perspective to support these
compression formats, so we do it in case anyone may derive use from
it.

diff --git a/mercurial/help/internals/wireprotocol.txt b/mercurial/help/internals/wireprotocol.txt
--- a/mercurial/help/internals/wireprotocol.txt
+++ b/mercurial/help/internals/wireprotocol.txt
@@ -269,6 +269,17 @@ The value of the capability is a comma-d
 supported compression formats. The order of the compression formats is in
 server-preferred order, most preferred first.
 
+The identifiers used by the official Mercurial distribution are:
+
+bzip2
+   bzip2
+none
+   uncompressed / raw data
+zlib
+   zlib (no gzip header)
+zstd
+   zstd
+
 This capability was introduced in Mercurial 4.1 (released February 2017).
 
 getbundle
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -2946,6 +2946,9 @@ class ctxmanager(object):
 
 # compression code
 
+SERVERROLE = 'server'
+CLIENTROLE = 'client'
+
 class compressormanager(object):
     """Holds registrations of various compression engines.
 
@@ -2962,6 +2965,8 @@ class compressormanager(object):
         self._bundlenames = {}
         # Internal bundle identifier to engine name.
         self._bundletypes = {}
+        # Wire proto identifier to engine name.
+        self._wiretypes = {}
 
     def __getitem__(self, key):
         return self._engines[key]
@@ -3003,6 +3008,16 @@ class compressormanager(object):
 
             self._bundletypes[bundletype] = name
 
+        wireinfo = engine.wireprotosupport()
+        if wireinfo:
+            wiretype = wireinfo[0]
+            if wiretype in self._wiretypes:
+                raise error.Abort(_('wire protocol compression %s already '
+                                    'registered by %s') %
+                                  (wiretype, self._wiretypes[wiretype]))
+
+            self._wiretypes[wiretype] = name
+
         self._engines[name] = engine
 
     @property
@@ -3039,6 +3054,32 @@ class compressormanager(object):
                               engine.name())
         return engine
 
+    def supportedwireengines(self, role, onlyavailable=True):
+        """Obtain compression engines that support the wire protocol.
+
+        Returns a list of engines in prioritized order, most desired first.
+
+        If ``onlyavailable`` is set, filter out engines that can't be
+        loaded.
+        """
+        assert role in (CLIENTROLE, SERVERROLE)
+
+        engines = [self._engines[e] for e in self._wiretypes.values()]
+        if onlyavailable:
+            engines = [e for e in engines if e.available()]
+
+        idx = 1 if role == SERVERROLE else 2
+
+        return list(sorted(engines, key=lambda e: e.wireprotosupport()[idx],
+                           reverse=True))
+
+    def forwiretype(self, wiretype):
+        engine = self._engines[self._wiretypes[wiretype]]
+        if not engine.available():
+            raise error.Abort(_('compression engine %s could not be loaded') %
+                              engine.name())
+        return engine
+
 compengines = compressormanager()
 
 class compressionengine(object):
@@ -3079,6 +3120,30 @@ class compressionengine(object):
         """
         return None
 
+    def wireprotosupport(self):
+        """Declare support for this compression format on the wire protocol.
+
+        If this compression engine isn't supported for compressing wire
+        protocol payloads, returns None.
+
+        Otherwise, returns a 3-tuple of the following elements:
+
+        * String format identifier
+        * Integer priority for the server
+        * Integer priority for the client
+
+        The integer priorities are used to order the advertisement of format
+        support by server and client. The highest integer is advertised
+        first. Integers with non-positive values aren't advertised.
+
+        The priority values are somewhat arbitrary and only used for default
+        ordering. The relative order can be changed via config options.
+
+        If wire protocol compression is supported, the class must also implement
+        ``compressstream`` and ``decompressorreader``.
+        """
+        return None
+
     def compressstream(self, it, opts=None):
         """Compress an iterator of chunks.
 
@@ -3107,6 +3172,9 @@ class _zlibengine(compressionengine):
     def bundletype(self):
         return 'gzip', 'GZ'
 
+    def wireprotosupport(self):
+        return 'zlib', 20, 20
+
     def compressstream(self, it, opts=None):
         opts = opts or {}
 
@@ -3140,6 +3208,11 @@ class _bz2engine(compressionengine):
     def bundletype(self):
         return 'bzip2', 'BZ'
 
+    # We declare a protocol name but don't advertise by default because
+    # it is slow.
+    def wireprotosupport(self):
+        return 'bzip2', 0, 0
+
     def compressstream(self, it, opts=None):
         opts = opts or {}
         z = bz2.BZ2Compressor(opts.get('level', 9))
@@ -3188,6 +3261,12 @@ class _noopengine(compressionengine):
     def bundletype(self):
         return 'none', 'UN'
 
+    # Clients always support uncompressed payloads. Servers don't because
+    # unless you are on a fast network, uncompressed payloads can easily
+    # saturate your network pipe.
+    def wireprotosupport(self):
+        return 'none', 0, 10
+
     def compressstream(self, it, opts=None):
         return it
 
@@ -3218,6 +3297,9 @@ class _zstdengine(compressionengine):
     def bundletype(self):
         return 'zstd', 'ZS'
 
+    def wireprotosupport(self):
+        return 'zstd', 50, 50
+
     def compressstream(self, it, opts=None):
         opts = opts or {}
         # zstd level 3 is almost always significantly faster than zlib


More information about the Mercurial-devel mailing list