[PATCH v2] pure: write a really lazy version of pure indexObject

Augie Fackler raf at durin42.com
Tue May 10 21:35:51 EDT 2016


On Tue, May 10, 2016 at 10:41:39AM +0200, Maciej Fijalkowski wrote:
> # HG changeset patch
> # User Maciej Fijalkowski <fijall at gmail.com>
> # Date 1461496898 -10800
> #      Sun Apr 24 14:21:38 2016 +0300
> # Branch stable
> # Node ID a404d575cabf159786d75ee49c834234721e1f53
> # Parent  2d3837a4bded5362f26f91033c0a83376c207593
> pure: write a really lazy version of pure indexObject.
>
> On PyPy this version performs reasonably well compared to C version.
> Example command is "hg id" which gets faster, depending on details
> of your operating system and hard drive (it's bottlenecked on stat mostly)
> There is potential for improvements by storing extra as a condensed struct too.

Queued this, many thanks. Nice to see pure code getting some expert attention!

>
> diff -r 2d3837a4bded -r a404d575cabf mercurial/pure/parsers.py
> --- a/mercurial/pure/parsers.py	Thu Mar 24 22:55:56 2016 +0900
> +++ b/mercurial/pure/parsers.py	Sun Apr 24 14:21:38 2016 +0300
> @@ -25,49 +25,112 @@
>      # x is a tuple
>      return x
>
> +indexformatng = ">Qiiiiii20s12x"
> +indexfirst = struct.calcsize('Q')
> +sizeint = struct.calcsize('i')
> +indexsize = struct.calcsize(indexformatng)
> +
> +def gettype(q):
> +    return int(q & 0xFFFF)
> +
> +def offset_type(offset, type):
> +    return long(long(offset) << 16 | type)
> +
> +class BaseIndexObject(object):
> +    def __len__(self):
> +        return self._lgt + len(self._extra) + 1
> +
> +    def insert(self, i, tup):
> +        assert i == -1
> +        self._extra.append(tup)
> +
> +    def _fix_index(self, i):
> +        if not isinstance(i, int):
> +            raise TypeError("expecting int indexes")
> +        if i < 0:
> +            i = len(self) + i
> +        if i < 0 or i >= len(self):
> +            raise IndexError
> +        return i
> +
> +    def __getitem__(self, i):
> +        i = self._fix_index(i)
> +        if i == len(self) - 1:
> +            return (0, 0, 0, -1, -1, -1, -1, nullid)
> +        if i >= self._lgt:
> +            return self._extra[i - self._lgt]
> +        index = self._calculate_index(i)
> +        r = struct.unpack(indexformatng, self._data[index:index + indexsize])
> +        if i == 0:
> +            e = list(r)
> +            type = gettype(e[0])
> +            e[0] = offset_type(0, type)
> +            return tuple(e)
> +        return r
> +
> +class IndexObject(BaseIndexObject):
> +    def __init__(self, data):
> +        assert len(data) % indexsize == 0
> +        self._data = data
> +        self._lgt = len(data) // indexsize
> +        self._extra = []
> +
> +    def _calculate_index(self, i):
> +        return i * indexsize
> +
> +    def __delitem__(self, i):
> +        if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
> +            raise ValueError("deleting slices only supports a:-1 with step 1")
> +        i = self._fix_index(i.start)
> +        if i < self._lgt:
> +            self._data = self._data[:i * indexsize]
> +            self._lgt = i
> +            self._extra = []
> +        else:
> +            self._extra = self._extra[:i - self._lgt]
> +
> +class InlinedIndexObject(BaseIndexObject):
> +    def __init__(self, data, inline=0):
> +        self._data = data
> +        self._lgt = self._inline_scan(None)
> +        self._inline_scan(self._lgt)
> +        self._extra = []
> +
> +    def _inline_scan(self, lgt):
> +        off = 0
> +        if lgt is not None:
> +            self._offsets = [0] * lgt
> +        count = 0
> +        while off <= len(self._data) - indexsize:
> +            s, = struct.unpack('>i',
> +                self._data[off + indexfirst:off + sizeint + indexfirst])
> +            if lgt is not None:
> +                self._offsets[count] = off
> +            count += 1
> +            off += indexsize + s
> +        if off != len(self._data):
> +            raise ValueError("corrupted data")
> +        return count
> +
> +    def __delitem__(self, i):
> +        if not isinstance(i, slice) or not i.stop == -1 or not i.step is None:
> +            raise ValueError("deleting slices only supports a:-1 with step 1")
> +        i = self._fix_index(i.start)
> +        if i < self._lgt:
> +            self._offsets = self._offsets[:i]
> +            self._lgt = i
> +            self._extra = []
> +        else:
> +            self._extra = self._extra[:i - self._lgt]
> +
> +    def _calculate_index(self, i):
> +        return self._offsets[i]
> +
> +
>  def parse_index2(data, inline):
> -    def gettype(q):
> -        return int(q & 0xFFFF)
> -
> -    def offset_type(offset, type):
> -        return long(long(offset) << 16 | type)
> -
> -    indexformatng = ">Qiiiiii20s12x"
> -
> -    s = struct.calcsize(indexformatng)
> -    index = []
> -    cache = None
> -    off = 0
> -
> -    l = len(data) - s
> -    append = index.append
> -    if inline:
> -        cache = (0, data)
> -        while off <= l:
> -            e = _unpack(indexformatng, data[off:off + s])
> -            append(e)
> -            if e[1] < 0:
> -                break
> -            off += e[1] + s
> -    else:
> -        while off <= l:
> -            e = _unpack(indexformatng, data[off:off + s])
> -            append(e)
> -            off += s
> -
> -    if off != len(data):
> -        raise ValueError('corrupt index file')
> -
> -    if index:
> -        e = list(index[0])
> -        type = gettype(e[0])
> -        e[0] = offset_type(0, type)
> -        index[0] = tuple(e)
> -
> -    # add the magic null revision at -1
> -    index.append((0, 0, 0, -1, -1, -1, -1, nullid))
> -
> -    return index, cache
> +    if not inline:
> +        return IndexObject(data), None
> +    return InlinedIndexObject(data, inline), (0, data)
>
>  def parse_dirstate(dmap, copymap, st):
>      parents = [st[:20], st[20: 40]]
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


More information about the Mercurial-devel mailing list