Could we use an unrepr module?

Martin Geisler mg at aragost.com
Thu May 5 10:17:30 CDT 2011


Hi guys,

I needed a way to serialize data for the lock extension, so I wrote a
small module that reverses the normal repr function in Python. It is
like eval, but does not execute anything.

I think we could use such a module here and there in Mercurial. As an
example, I happened to look at the code that writes the merge state:

  # write state:
  f.write(hex(self._local) + "\n")
  for d, v in self._state.iteritems():
      f.write("\0".join([d] + v) + "\n")

  # read state:
  for i, l in enumerate(f):
      if i == 0:
          self._local = bin(l[:-1])
      else:
          bits = l[:-1].split("\0")
          self._state[bits[0]] = bits[1:]

That seems to be a place where

  # write state
  f.write(repr((self._local, self._state.items())))

  # read state
  x, y = unrepr(f.read())
  self._local = x
  self._state = dict(y)

is much easier. I'm sure there are other places where we could use such
module so I'm including it below:


# safe implementation of eval, undoes repr
#
# Copyright 2011 aragost Trifork
#
# This software may be used and distributed according to the terms of
# the GNU General Public License version 2 or any later version.

def unrepr(s):
    """Parse a string produced by repr in simple cases.

    >>> unrepr("123")
    123
    >>> unrepr("'foo\\nbar'")
    'foo\\nbar'
    >>> unrepr("[]")
    []
    >>> unrepr("['foo', 'bar']")
    ['foo', 'bar']
    >>> unrepr("()")
    ()
    >>> unrepr("(1,)")
    (1,)
    >>> unrepr("('foo', 'bar')")
    ('foo', 'bar')
    >>> unrepr("['foo', 123, []]")
    ['foo', 123, []]
    >>> unrepr('True')
    True
    >>> unrepr('[True, False, None]')
    [True, False, None]
    """
    def skipspaces(s, i):
        while i < len(s) and s[i] == ' ':
            i += 1
        return i

    def advance(s, i, expected):
        if not s.startswith(expected, i):
            raise ValueError('expected %r, found %r at pos %d'
                             % (expected, s[i], i))
        return i + len(expected)

    def parse(s, i):
        if i >= len(s):
            raise ValueError('nothing left to parse')
        if s[i] == '[':
            return parselist(s, i)
        if s[i] == '(':
            return parsetuple(s, i)
        if s[i] == '"' or s[i] == "'":
            return parsestr(s, i)
        if s[i].isdigit():
            return parseint(s, i)
        if s.startswith('True', i):
            return True, i+4
        if s.startswith('False', i):
            return False, i+5
        if s.startswith('None', i):
            return None, i+4
        raise ValueError('cannot parse %r' % s)

    def parselist(s, i):
        result = []
        i += 1 # advance past [
        while i < len(s):
            i = skipspaces(s, i)
            if s[i] == ']':
                break
            elem, i = parse(s, i)
            result.append(elem)
            i = skipspaces(s, i)
            if s[i] == ']':
                break
            i = advance(s, i, ',')
        return result, i+1

    def parsetuple(s, i):
        result = []
        i += 1 # advance past (
        istuple = True
        while i < len(s):
            i = skipspaces(s, i)
            if s[i] == ')':
                break
            elem, i = parse(s, i)
            result.append(elem)
            i = skipspaces(s, i)
            if s[i] == ')':
                istuple = len(result) != 1
                break
            i = advance(s, i, ',')
        if istuple:
            result = tuple(result)
        else: # not really a tuple after all
            result = result[0]
        return result, i+1

    def parsestr(s, i):
        quote = s[i]
        j = i+1
        while j < len(s) and s[j] != quote:
            if s[j] == '\\':
                j += 2
            else:
                j += 1
        return s[i+1:j].decode('string-escape'), j+1

    def parseint(s, i):
        j = i+1
        while j < len(s) and s[j].isdigit():
            j += 1
        return int(s[i:j]), j

    s = s.strip()
    try:
        result, i = parse(s, 0)
    except ValueError, e:
        raise ValueError('parse error "%s" while parsing %s' % (e, s))

    if i < len(s):
        raise ValueError('could not parse %r' % s[i:])
    return result


-- 
Martin Geisler

aragost Trifork
Professional Mercurial support
http://mercurial.aragost.com/kick-start/


More information about the Mercurial-devel mailing list