D2851: wireproto: define and implement protocol for issuing requests

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Mon Mar 19 20:01:13 EDT 2018


indygreg updated this revision to Diff 7139.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2851?vs=7047&id=7139

REVISION DETAIL
  https://phab.mercurial-scm.org/D2851

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/help/internals/wireprotocol.txt
  mercurial/wireprotoframing.py
  mercurial/wireprotoserver.py
  tests/test-http-api-httpv2.t

CHANGE DETAILS

diff --git a/tests/test-http-api-httpv2.t b/tests/test-http-api-httpv2.t
--- a/tests/test-http-api-httpv2.t
+++ b/tests/test-http-api-httpv2.t
@@ -1,5 +1,5 @@
   $ HTTPV2=exp-http-v2-0001
-  $ MEDIATYPE=application/mercurial-tbd
+  $ MEDIATYPE=application/mercurial-exp-framing-0001
 
   $ send() {
   >   hg --verbose debugwireproto --peer raw http://$LOCALIP:$HGPORT/
@@ -120,9 +120,9 @@
   s>     Server: testing stub value\r\n
   s>     Date: $HTTP_DATE$\r\n
   s>     Content-Type: text/plain\r\n
-  s>     Content-Length: 72\r\n
+  s>     Content-Length: 85\r\n
   s>     \r\n
-  s>     client MUST specify Accept header with value: application/mercurial-tbd\n
+  s>     client MUST specify Accept header with value: application/mercurial-exp-framing-0001\n
 
 Bad Accept header results in 406
 
@@ -143,9 +143,9 @@
   s>     Server: testing stub value\r\n
   s>     Date: $HTTP_DATE$\r\n
   s>     Content-Type: text/plain\r\n
-  s>     Content-Length: 72\r\n
+  s>     Content-Length: 85\r\n
   s>     \r\n
-  s>     client MUST specify Accept header with value: application/mercurial-tbd\n
+  s>     client MUST specify Accept header with value: application/mercurial-exp-framing-0001\n
 
 Bad Content-Type header results in 415
 
@@ -158,7 +158,7 @@
   using raw connection to peer
   s>     POST /api/exp-http-v2-0001/ro/customreadonly HTTP/1.1\r\n
   s>     Accept-Encoding: identity\r\n
-  s>     accept: application/mercurial-tbd\r\n
+  s>     accept: application/mercurial-exp-framing-0001\r\n
   s>     content-type: badmedia\r\n
   s>     user-agent: test\r\n
   s>     host: $LOCALIP:$HGPORT\r\n (glob)
@@ -168,26 +168,29 @@
   s>     Server: testing stub value\r\n
   s>     Date: $HTTP_DATE$\r\n
   s>     Content-Type: text/plain\r\n
-  s>     Content-Length: 75\r\n
+  s>     Content-Length: 88\r\n
   s>     \r\n
-  s>     client MUST send Content-Type header with value: application/mercurial-tbd\n
+  s>     client MUST send Content-Type header with value: application/mercurial-exp-framing-0001\n
 
 Request to read-only command works out of the box
 
   $ send << EOF
   > httprequest POST api/$HTTPV2/ro/customreadonly
   >     accept: $MEDIATYPE
   >     content-type: $MEDIATYPE
   >     user-agent: test
+  >     frame command-name eos customreadonly
   > EOF
   using raw connection to peer
   s>     POST /api/exp-http-v2-0001/ro/customreadonly HTTP/1.1\r\n
   s>     Accept-Encoding: identity\r\n
-  s>     accept: application/mercurial-tbd\r\n
-  s>     content-type: application/mercurial-tbd\r\n
+  s>     accept: application/mercurial-exp-framing-0001\r\n
+  s>     content-type: application/mercurial-exp-framing-0001\r\n
   s>     user-agent: test\r\n
+  s>     content-length: 18\r\n
   s>     host: $LOCALIP:$HGPORT\r\n (glob)
   s>     \r\n
+  s>     \x0e\x00\x00\x11customreadonly
   s> makefile('rb', None)
   s>     HTTP/1.1 200 OK\r\n
   s>     Server: testing stub value\r\n
@@ -283,15 +286,18 @@
   >     user-agent: test
   >     accept: $MEDIATYPE
   >     content-type: $MEDIATYPE
+  >     frame command-name eos customreadonly
   > EOF
   using raw connection to peer
   s>     POST /api/exp-http-v2-0001/rw/customreadonly HTTP/1.1\r\n
   s>     Accept-Encoding: identity\r\n
-  s>     accept: application/mercurial-tbd\r\n
-  s>     content-type: application/mercurial-tbd\r\n
+  s>     accept: application/mercurial-exp-framing-0001\r\n
+  s>     content-type: application/mercurial-exp-framing-0001\r\n
   s>     user-agent: test\r\n
+  s>     content-length: 18\r\n
   s>     host: $LOCALIP:$HGPORT\r\n (glob)
   s>     \r\n
+  s>     \x0e\x00\x00\x11customreadonly
   s> makefile('rb', None)
   s>     HTTP/1.1 200 OK\r\n
   s>     Server: testing stub value\r\n
@@ -311,7 +317,7 @@
   using raw connection to peer
   s>     POST /api/exp-http-v2-0001/rw/badcommand HTTP/1.1\r\n
   s>     Accept-Encoding: identity\r\n
-  s>     accept: application/mercurial-tbd\r\n
+  s>     accept: application/mercurial-exp-framing-0001\r\n
   s>     user-agent: test\r\n
   s>     host: $LOCALIP:$HGPORT\r\n (glob)
   s>     \r\n
diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -32,7 +32,7 @@
 HGTYPE = 'application/mercurial-0.1'
 HGTYPE2 = 'application/mercurial-0.2'
 HGERRTYPE = 'application/hg-error'
-HTTPV2TYPE = 'application/mercurial-tbd'
+FRAMINGTYPE = b'application/mercurial-exp-framing-0001'
 
 HTTPV2 = wireprototypes.HTTPV2
 SSHV1 = wireprototypes.SSHV1
@@ -336,21 +336,21 @@
         res.setbodybytes(_('invalid wire protocol command: %s') % command)
         return
 
-    if req.headers.get(b'Accept') != HTTPV2TYPE:
+    if req.headers.get(b'Accept') != FRAMINGTYPE:
         res.status = b'406 Not Acceptable'
         res.headers[b'Content-Type'] = b'text/plain'
         res.setbodybytes(_('client MUST specify Accept header with value: %s\n')
-                           % HTTPV2TYPE)
+                           % FRAMINGTYPE)
         return
 
     if (b'Content-Type' in req.headers
-        and req.headers[b'Content-Type'] != HTTPV2TYPE):
+        and req.headers[b'Content-Type'] != FRAMINGTYPE):
         res.status = b'415 Unsupported Media Type'
         # TODO we should send a response with appropriate media type,
         # since client does Accept it.
         res.headers[b'Content-Type'] = b'text/plain'
         res.setbodybytes(_('client MUST send Content-Type header with '
-                           'value: %s\n') % HTTPV2TYPE)
+                           'value: %s\n') % FRAMINGTYPE)
         return
 
     # We don't do anything meaningful yet.
diff --git a/mercurial/wireprotoframing.py b/mercurial/wireprotoframing.py
new file mode 100644
--- /dev/null
+++ b/mercurial/wireprotoframing.py
@@ -0,0 +1,156 @@
+# wireprotoframing.py - unified framing protocol for wire protocol
+#
+# Copyright 2018 Gregory Szorc <gregory.szorc at gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+# This file contains functionality to support the unified frame-based wire
+# protocol. For details about the protocol, see
+# `hg help internals.wireprotocol`.
+
+from __future__ import absolute_import
+
+import struct
+
+from . import (
+    util,
+)
+
+FRAME_HEADER_SIZE = 4
+DEFAULT_MAX_FRAME_SIZE = 32768
+
+FRAME_TYPE_COMMAND_NAME = 0x01
+FRAME_TYPE_COMMAND_ARGUMENT = 0x02
+FRAME_TYPE_COMMAND_DATA = 0x03
+
+FRAME_TYPES = {
+    b'command-name': FRAME_TYPE_COMMAND_NAME,
+    b'command-argument': FRAME_TYPE_COMMAND_ARGUMENT,
+    b'command-data': FRAME_TYPE_COMMAND_DATA,
+}
+
+FLAG_COMMAND_NAME_EOS = 0x01
+FLAG_COMMAND_NAME_HAVE_ARGS = 0x02
+FLAG_COMMAND_NAME_HAVE_DATA = 0x04
+
+FLAGS_COMMAND = {
+    b'eos': FLAG_COMMAND_NAME_EOS,
+    b'have-args': FLAG_COMMAND_NAME_HAVE_ARGS,
+    b'have-data': FLAG_COMMAND_NAME_HAVE_DATA,
+}
+
+FLAG_COMMAND_ARGUMENT_CONTINUATION = 0x01
+FLAG_COMMAND_ARGUMENT_EOA = 0x02
+
+FLAGS_COMMAND_ARGUMENT = {
+    b'continuation': FLAG_COMMAND_ARGUMENT_CONTINUATION,
+    b'eoa': FLAG_COMMAND_ARGUMENT_EOA,
+}
+
+FLAG_COMMAND_DATA_CONTINUATION = 0x01
+FLAG_COMMAND_DATA_EOS = 0x02
+
+FLAGS_COMMAND_DATA = {
+    b'continuation': FLAG_COMMAND_DATA_CONTINUATION,
+    b'eos': FLAG_COMMAND_DATA_EOS,
+}
+
+# Maps frame types to their available flags.
+FRAME_TYPE_FLAGS = {
+    FRAME_TYPE_COMMAND_NAME: FLAGS_COMMAND,
+    FRAME_TYPE_COMMAND_ARGUMENT: FLAGS_COMMAND_ARGUMENT,
+    FRAME_TYPE_COMMAND_DATA: FLAGS_COMMAND_DATA,
+}
+
+ARGUMENT_FRAME_HEADER = struct.Struct(r'<HH')
+
+def makeframe(frametype, frameflags, payload):
+    """Assemble a frame into a byte array."""
+    # TODO assert size of payload.
+    frame = bytearray(FRAME_HEADER_SIZE + len(payload))
+
+    l = struct.pack(r'<I', len(payload))
+    frame[0:3] = l[0:3]
+    frame[3] = (frametype << 4) | frameflags
+    frame[4:] = payload
+
+    return frame
+
+def makeframefromhumanstring(s):
+    """Given a string of the form: <type> <flags> <payload>, creates a frame.
+
+    This can be used by user-facing applications and tests for creating
+    frames easily without having to type out a bunch of constants.
+
+    Frame type and flags can be specified by integer or named constant.
+    Flags can be delimited by `|` to bitwise OR them together.
+    """
+    frametype, frameflags, payload = s.split(b' ', 2)
+
+    if frametype in FRAME_TYPES:
+        frametype = FRAME_TYPES[frametype]
+    else:
+        frametype = int(frametype)
+
+    finalflags = 0
+    validflags = FRAME_TYPE_FLAGS[frametype]
+    for flag in frameflags.split(b'|'):
+        if flag in validflags:
+            finalflags |= validflags[flag]
+        else:
+            finalflags |= int(flag)
+
+    payload = util.unescapestr(payload)
+
+    return makeframe(frametype, finalflags, payload)
+
+def createcommandframes(cmd, args, datafh=None):
+    """Create frames necessary to transmit a request to run a command.
+
+    This is a generator of bytearrays. Each item represents a frame
+    ready to be sent over the wire to a peer.
+    """
+    flags = 0
+    if args:
+        flags |= FLAG_COMMAND_NAME_HAVE_ARGS
+    if datafh:
+        flags |= FLAG_COMMAND_NAME_HAVE_DATA
+
+    if not flags:
+        flags |= FLAG_COMMAND_NAME_EOS
+
+    yield makeframe(FRAME_TYPE_COMMAND_NAME, flags, cmd)
+
+    for i, k in enumerate(sorted(args)):
+        v = args[k]
+        last = i == len(args) - 1
+
+        # TODO handle splitting of argument values across frames.
+        payload = bytearray(ARGUMENT_FRAME_HEADER.size + len(k) + len(v))
+        offset = 0
+        ARGUMENT_FRAME_HEADER.pack_into(payload, offset, len(k), len(v))
+        offset += ARGUMENT_FRAME_HEADER.size
+        payload[offset:offset + len(k)] = k
+        offset += len(k)
+        payload[offset:offset + len(v)] = v
+
+        flags = FLAG_COMMAND_ARGUMENT_EOA if last else 0
+        yield makeframe(FRAME_TYPE_COMMAND_ARGUMENT, flags, payload)
+
+    if datafh:
+        while True:
+            data = datafh.read(DEFAULT_MAX_FRAME_SIZE)
+
+            done = False
+            if len(data) == DEFAULT_MAX_FRAME_SIZE:
+                flags = FLAG_COMMAND_DATA_CONTINUATION
+            else:
+                flags = FLAG_COMMAND_DATA_EOS
+                assert datafh.read(1) == b''
+                done = True
+
+            yield makeframe(FRAME_TYPE_COMMAND_DATA, flags, data)
+
+            if done:
+                break
diff --git a/mercurial/help/internals/wireprotocol.txt b/mercurial/help/internals/wireprotocol.txt
--- a/mercurial/help/internals/wireprotocol.txt
+++ b/mercurial/help/internals/wireprotocol.txt
@@ -187,12 +187,15 @@
 Requests to unknown commands or URLS result in an HTTP 404.
 TODO formally define response type, how error is communicated, etc.
 
-HTTP request and response bodies use the *TBD Protocol* for media exchange.
+HTTP request and response bodies use the *Unified Frame-Based Protocol*
+(defined below) for media exchange. The entirety of the HTTP message
+body is 0 or more frames as defined by this protocol.
 
 Clients and servers MUST advertise the ``TBD`` media type via the
 ``Content-Type`` request and response headers. In addition, clients MUST
 advertise this media type value in their ``Accept`` request header in all
 requests.
+TODO finalize the media type. For now, it is defined in wireprotoserver.py.
 
 Servers receiving requests without an ``Accept`` header SHOULD respond with
 an HTTP 406.
@@ -429,7 +432,7 @@
 SSH Version 2 Transport
 -----------------------
 
-**Experimental**
+**Experimental and under development**
 
 Version 2 of the SSH transport behaves identically to version 1 of the SSH
 transport with the exception of handshake semantics. See above for how
@@ -451,6 +454,164 @@
 Following capabilities advertisement, the peers communicate using version
 1 of the SSH transport.
 
+Unified Frame-Based Protocol
+============================
+
+**Experimental and under development**
+
+The *Unified Frame-Based Protocol* is a communications protocol between
+Mercurial peers. The protocol aims to be mostly transport agnostic
+(works similarly on HTTP, SSH, etc).
+
+To operate the protocol, a bi-directional, half-duplex pipe supporting
+ordered sends and receives is required. That is, each peer has one pipe
+for sending data and another for receiving.
+
+The protocol is request-response based: the client issues requests to
+the server, which issues replies to those requests. Server-initiated
+messaging is not supported.
+
+All data is read and written in atomic units called *frames*. These
+are conceptually similar to TCP packets. Higher-level functionality
+is built on the exchange and processing of frames.
+
+Frames begin with a 4 octet header followed by a variable length
+payload::
+
+    +-----------------------------------------------+
+    |                 Length (24)                   |
+    +-----------+-----------------------------------+
+    | Type (4)  |
+    +-----------+
+    | Flags (4) |
+    +===========+===================================================|
+    |                     Frame Payload (0...)                    ...
+    +---------------------------------------------------------------+
+
+The length of the frame payload is expressed as an unsigned 24 bit
+little endian integer. Values larger than 65535 MUST NOT be used unless
+given permission by the server as part of the negotiated capabilities
+during the handshake. The frame header is not part of the advertised
+frame length.
+
+The 4-bit ``Type`` field denotes the type of message being sent.
+
+The 4-bit ``Flags`` field defines special, per-type attributes for
+the frame.
+
+The sections below define the frame types and their behavior.
+
+Command Request (``0x01``)
+--------------------------
+
+This frame contains a request to run a command.
+
+The name of the command to run constitutes the entirety of the frame
+payload.
+
+This frame type MUST ONLY be sent from clients to servers: it is illegal
+for a server to send this frame to a client.
+
+The following flag values are defined for this type:
+
+0x01
+   End of command data. When set, the client will not send any command
+   arguments or additional command data. When set, the command has been
+   fully issued and the server has the full context to process the command.
+   The next frame issued by the client is not part of this command.
+0x02
+   Command argument frames expected. When set, the client will send
+   *Command Argument* frames containing command argument data.
+0x04
+   Command data frames expected. When set, the client will send
+   *Command Data* frames containing a raw stream of data for this
+   command.
+
+The ``0x01`` flag is mutually exclusive with both the ``0x02`` and ``0x04``
+flags.
+
+Command Argument (``0x02``)
+---------------------------
+
+This frame contains a named argument for a command.
+
+The frame type MUST ONLY be sent from clients to servers: it is illegal
+for a server to send this frame to a client.
+
+The payload consists of:
+
+* A 16-bit little endian integer denoting the length of the
+  argument name.
+* A 16-bit little endian integer denoting the length of the
+  argument value.
+* N bytes of ASCII data containing the argument name.
+* N bytes of binary data containing the argument value.
+
+The payload MUST hold the entirety of the 32-bit header and the
+argument name. The argument value MAY span multiple frames. If this
+occurs, the appropriate frame flag should be set to indicate this.
+
+The following flag values are defined for this type:
+
+0x01
+   Argument data continuation. When set, the data for this argument did
+   not fit in a single frame and the next frame will contain additional
+   argument data.
+
+0x02
+   End of arguments data. When set, the client will not send any more
+   command arguments for the command this frame is associated with.
+   The next frame issued by the client will be command data or
+   belong to a separate request.
+
+Command Data (``0x03``)
+-----------------------
+
+This frame contains raw data for a command.
+
+Most commands can be executed by specifying arguments. However,
+arguments have an upper bound to their length. For commands that
+accept data that is beyond this length or whose length isn't known
+when the command is initially sent, they will need to stream
+arbitrary data to the server. This frame type facilitates the sending
+of this data.
+
+The payload of this frame type consists of a stream of raw data to be
+consumed by the command handler on the server. The format of the data
+is command specific.
+
+The following flag values are defined for this type:
+
+0x01
+   Command data continuation. When set, the data for this command
+   continues into a subsequent frame.
+
+0x02
+   End of data. When set, command data has been fully sent to the
+   server. The command has been fully issued and no new data for this
+   command will be sent. The next frame will belong to a new command.
+
+Issuing Commands
+----------------
+
+A client can request that a remote run a command by sending it
+frames defining that command. This logical stream is composed of
+1 ``Command Request`` frame, 0 or more ``Command Argument`` frames,
+and 0 or more ``Command Data`` frames.
+
+Argument frames are the recommended mechanism for transferring fixed
+sets of parameters to a command. Data frames are appropriate for
+transferring variable data. A similar comparison would be to HTTP:
+argument frames are headers and the message body is data frames.
+
+It is recommended for servers to delay the dispatch of a command
+until all argument frames for that command have been received. Servers
+MAY impose limits on the maximum argument size.
+TODO define failure mechanism.
+
+Servers MAY dispatch to commands immediately once argument data
+is available or delay until command data is received in full.
+
 Capabilities
 ============
 
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -78,6 +78,7 @@
     url as urlmod,
     util,
     vfs as vfsmod,
+    wireprotoframing,
     wireprotoserver,
 )
 from .utils import dateutil
@@ -2711,6 +2712,12 @@
         The content of the file defined as the value to this argument will be
         transferred verbatim as the HTTP request body.
 
+    ``frame <type> <flags> <payload>``
+        Send a unified protocol frame as part of the request body.
+
+        All frames will be collected and sent as the body to the HTTP
+        request.
+
     close
     -----
 
@@ -2750,6 +2757,28 @@
     ---------
 
     ``read()`` N bytes from the server's stderr pipe, if available.
+
+    Specifying Unified Frame-Based Protocol Frames
+    ----------------------------------------------
+
+    It is possible to emit a *Unified Frame-Based Protocol* by using special
+    syntax.
+
+    A frame is composed as a type, flags, and payload. These can be parsed
+    from a string of the form ``<type> <flags> <payload>``. That is, 3
+    space-delimited strings.
+
+    ``payload`` is the simplest: it is evaluated as a Python byte string
+    literal.
+
+    ``type`` can be an integer value for the frame type or the string name
+    of the type. The strings are defined in ``wireprotoframing.py``. e.g.
+    ``command-name``.
+
+    ``flags`` is a ``|`` delimited list of flag components. Each component
+    (and there can be just one) can be an integer or a flag name for the
+    specified frame type. Values are resolved to integers and then bitwise
+    OR'd together.
     """
     opts = pycompat.byteskwargs(opts)
 
@@ -2953,6 +2982,7 @@
             method, httppath = request[1:]
             headers = {}
             body = None
+            frames = []
             for line in lines:
                 line = line.lstrip()
                 m = re.match(b'^([a-zA-Z0-9_-]+): (.*)$', line)
@@ -2963,11 +2993,20 @@
                 if line.startswith(b'BODYFILE '):
                     with open(line.split(b' ', 1), 'rb') as fh:
                         body = fh.read()
+                elif line.startswith(b'frame '):
+                    frame = wireprotoframing.makeframefromhumanstring(
+                        line[len(b'frame '):])
+
+                    frames.append(frame)
                 else:
                     raise error.Abort(_('unknown argument to httprequest: %s') %
                                       line)
 
             url = path + httppath
+
+            if frames:
+                body = b''.join(bytes(f) for f in frames)
+
             req = urlmod.urlreq.request(pycompat.strurl(url), body, headers)
 
             # urllib.Request insists on using has_data() as a proxy for



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list