[PATCH 7 of 9 RFC] pushkey: support for encoding and decoding raw listkeys dicts

Gregory Szorc gregory.szorc at gmail.com
Sun Aug 14 17:10:06 EDT 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1471207500 25200
#      Sun Aug 14 13:45:00 2016 -0700
# Node ID eb2bc1ac7869ad255965d16004524a95cea83c9d
# Parent  1fe812eb8b9e79d1182c4a6593e7ce8fa2938264
pushkey: support for encoding and decoding raw listkeys dicts

Now that we have support for retrieving raw/binary versions of pushkey
data, the last step before we expose it on the wire protocol is a
method to encode and decode it. This patch implements those functions.

The new listkeys data representation is framed binary data. We simply
have pairs of frames corresponding to keys and values. A 0 length
key signals end of payload.

All binary sequences can be encoded in keys and values. Of course, not
all binary values can be used in existing namespaces because it may
not be encoded properly in the existing wire protocol command. But
going forward we can do whatever we want in new namespaces.

diff --git a/mercurial/pushkey.py b/mercurial/pushkey.py
--- a/mercurial/pushkey.py
+++ b/mercurial/pushkey.py
@@ -2,16 +2,18 @@
 #
 # Copyright 2010 Matt Mackall <mpm at selenic.com>
 #
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
 from __future__ import absolute_import
 
+import struct
+
 from . import (
     bookmarks,
     encoding,
     obsolete,
     phases,
 )
 
 def _nslist(repo):
@@ -63,8 +65,62 @@ def encodekeys(keys):
 
 def decodekeys(data):
     """decode the content of a pushkey namespace from exchange over the wire"""
     result = {}
     for l in data.splitlines():
         k, v = l.split('\t')
         result[decode(k)] = decode(v)
     return result
+
+def encodekeysraw(keys):
+    """Encode pushkey namespace keys using a binary encoding.
+
+    The response consists of framed data packets of the form:
+
+        <size> <data>
+
+    Where the ``size`` is a little endian 32-bit integer.
+
+    Data is emitted in pairs of frames where the first frame is the key
+    name and the second frame is the value.
+
+    A frame with size 0 indicates end of stream.
+    """
+    s = struct.struct('<I')
+
+    chunks = []
+    for k, v in keys:
+        assert not isinstance(k, encoding.localstr)
+        assert not isinstance(v, encoding.localstr)
+
+        chunks.append(s.pack(len(k)))
+        chunks.append(k)
+        chunks.append(s.pack(len(v)))
+        chunks.append(v)
+
+    # Size 0 chunk signals end of payload.
+    chunks.append(s.pack(0))
+
+    return ''.join(chunks)
+
+def decodekeysraw(data):
+    """Decode value encoded by ``rawencodekeys``
+
+    Returns a dict with bytes keys and values.
+    """
+    s = struct.struct('<')
+    offset = 0
+    result = {}
+
+    while True:
+        l = s.unpack_from(data, offset)
+        offset += s.size
+        if l == 0:
+            break
+
+        key = data[offset:offset + l]
+        l = s.unpack_from(data, offset)
+        offset += s.size
+        value = data[offset:offset + l]
+        result[key] = value
+
+    return result


More information about the Mercurial-devel mailing list