[PATCH] pure: write a really lazy version of pure indexObject. On PyPy this version performs

Augie Fackler raf at durin42.com
Mon Apr 25 14:35:17 EDT 2016


On Sun, Apr 24, 2016 at 02:32:33PM +0300, Maciej Fijalkowski wrote:
> # HG changeset patch
> # User Maciej Fijalkowski <fijall at gmail.com>
> # Date 1461496898 -10800
> #      Sun Apr 24 14:21:38 2016 +0300
> # Branch stable
> # Node ID de5eed1fbf88ed5a514eecdeeba83ccb26ec5a1e
> # Parent  2d3837a4bded5362f26f91033c0a83376c207593
> pure: write a really lazy version of pure indexObject. On PyPy this version performs
> reasonably well compared to C version. There is potential for improvements by storing
> extra as a condensed struct too.

Modulo smf's comments about commit message style, I'm a big fan. Can I
get you to mail a v2 sometime around May 2nd once our freeze ends?

Thanks!

>
> diff -r 2d3837a4bded -r de5eed1fbf88 mercurial/pure/parsers.py
> --- a/mercurial/pure/parsers.py	Thu Mar 24 22:55:56 2016 +0900
> +++ b/mercurial/pure/parsers.py	Sun Apr 24 14:21:38 2016 +0300
> @@ -25,49 +25,101 @@
>      # x is a tuple
>      return x
>
> +INDEXFORMATNG = ">Qiiiiii20s12x"
> +INDEX_FIRST = struct.calcsize('Q')
> +SIZE_INT = struct.calcsize('i')
> +INDEX_SIZE = struct.calcsize(INDEXFORMATNG)
> +
> +def gettype(q):
> +    return int(q & 0xFFFF)
> +
> +def offset_type(offset, type):
> +    return long(long(offset) << 16 | type)
> +
> +class BaseIndexObject(object):
> +    def __len__(self):
> +        return self._lgt + len(self._extra) + 1
> +
> +    def insert(self, i, tup):
> +        assert i == -1
> +        self._extra.append(tup)
> +
> +    def _fix_index(self, i):
> +        if not isinstance(i, int):
> +            raise TypeError("expecting int indexes")
> +        if i < 0:
> +            i = len(self) + i
> +        if i < 0 or i >= len(self):
> +            raise IndexError
> +        return i
> +
> +    def __getitem__(self, i):
> +        i = self._fix_index(i)
> +        if i == len(self) - 1:
> +            return (0, 0, 0, -1, -1, -1, -1, nullid)
> +        if i >= self._lgt:
> +            return self._extra[i - self._lgt]
> +        index = self._calculate_index(i)
> +        r = struct.unpack(INDEXFORMATNG, self._data[index:index + INDEX_SIZE])
> +        if i == 0:
> +            e = list(r)
> +            type = gettype(e[0])
> +            e[0] = offset_type(0, type)
> +            return tuple(e)
> +        return r
> +
> +class IndexObject(BaseIndexObject):
> +    def __init__(self, data):
> +        assert len(data) % INDEX_SIZE == 0
> +        self._data = data
> +        self._lgt = len(data) // INDEX_SIZE
> +        self._extra = []
> +
> +    def _calculate_index(self, i):
> +        return i * INDEX_SIZE
> +
> +class InlinedIndexObject(BaseIndexObject):
> +    def __init__(self, data, inline=0):
> +        self._data = data
> +        self._lgt = self._inline_scan(None)
> +        self._inline_scan(self._lgt)
> +        self._extra = []
> +
> +    def _inline_scan(self, lgt):
> +        off = 0
> +        if lgt is not None:
> +            self._offsets = [0] * lgt
> +        count = 0
> +        while off <= len(self._data) - INDEX_SIZE:
> +            s, = struct.unpack('>i',
> +                self._data[off + INDEX_FIRST:off + SIZE_INT + INDEX_FIRST])
> +            if lgt is not None:
> +                self._offsets[count] = off
> +            count += 1
> +            off += INDEX_SIZE + s
> +        if off != len(self._data):
> +            raise ValueError("corrupted data")
> +        return count
> +
> +    def __delitem__(self, i):
> +        if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
> +            raise ValueError("deleting slices only supports a:-1 with step 1")
> +        i = self._fix_index(i.start)
> +        if i < self._lgt:
> +            self._offsets = self._offsets[:i]
> +            self._lgt = i
> +            self._extra = []
> +        else:
> +            self._extra = self._extra[:i - self._lgt]
> +
> +    def _calculate_index(self, i):
> +        return self._offsets[i]
> +
> +
>  def parse_index2(data, inline):
> -    def gettype(q):
> -        return int(q & 0xFFFF)
> -
> -    def offset_type(offset, type):
> -        return long(long(offset) << 16 | type)
> -
> -    indexformatng = ">Qiiiiii20s12x"
> -
> -    s = struct.calcsize(indexformatng)
> -    index = []
> -    cache = None
> -    off = 0
> -
> -    l = len(data) - s
> -    append = index.append
> -    if inline:
> -        cache = (0, data)
> -        while off <= l:
> -            e = _unpack(indexformatng, data[off:off + s])
> -            append(e)
> -            if e[1] < 0:
> -                break
> -            off += e[1] + s
> -    else:
> -        while off <= l:
> -            e = _unpack(indexformatng, data[off:off + s])
> -            append(e)
> -            off += s
> -
> -    if off != len(data):
> -        raise ValueError('corrupt index file')
> -
> -    if index:
> -        e = list(index[0])
> -        type = gettype(e[0])
> -        e[0] = offset_type(0, type)
> -        index[0] = tuple(e)
> -
> -    # add the magic null revision at -1
> -    index.append((0, 0, 0, -1, -1, -1, -1, nullid))
> -
> -    return index, cache
> +    if not inline:
> +        return IndexObject(data), None
> +    return InlinedIndexObject(data, inline), (0, data)
>
>  def parse_dirstate(dmap, copymap, st):
>      parents = [st[:20], st[20: 40]]
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list