[PATCH 07 of 10 lazy-changelog-parse] changelog: lazily parse files

Gregory Szorc gregory.szorc at gmail.com
Fri Mar 11 13:04:01 EST 2016


On Mon, Mar 7, 2016 at 9:44 PM, Martin von Zweigbergk <martinvonz at google.com
> wrote:

> What do you think about folding in the following? Seems correct? Would
> it be noticeably slower with the extra array slicing?
>
> diff -r 18f6505b3b79 mercurial/changelog.py
> --- a/mercurial/changelog.py    Sun Mar 06 14:31:06 2016 -0800
> +++ b/mercurial/changelog.py    Mon Mar 07 21:37:24 2016 -0800
> @@ -196,12 +196,7 @@ class changelogrevision(object):
>          nl3 = text.index('\n', nl2 + 1)
>          self._rawdateextra = text[nl2 + 1:nl3]
>
> -        # The list of files may be empty. Which means nl3 is the first of
> the
> -        # double newline that precedes the description.
> -        if nl3 == doublenl:
> -            self._rawfiles = None
> -        else:
> -            self._rawfiles = text[nl3 + 1:doublenl]
> +        self._rawfiles = text[nl3:doublenl]
>
>          return self
>
> @@ -247,10 +242,7 @@ class changelogrevision(object):
>
>      @property
>      def files(self):
> -        if self._rawfiles is None:
> -            return []
> -
> -        return self._rawfiles.split('\n')
> +        return self._rawfiles.split('\n')[1:]
>
>      @property
>      def description(self):
>
>
I like the shorter code. If you measure this to have no performance impact,
I'm fine with it. I suppose we could always do it as a follow-up.


>
>
> On Sun, Mar 6, 2016 at 3:58 PM, Gregory Szorc <gregory.szorc at gmail.com>
> wrote:
> > # HG changeset patch
> > # User Gregory Szorc <gregory.szorc at gmail.com>
> > # Date 1457303466 28800
> > #      Sun Mar 06 14:31:06 2016 -0800
> > # Node ID e37bac9adf9215f82850e7b105e3e47e1bee3d7a
> > # Parent  7559d2bcdaeb320212bf8d37e0e5e2075dec6d18
> > changelog: lazily parse files
> >
> > More of the same.
> >
> > Again, modest revset performance wins:
> >
> > author(mpm)
> > 0.896565
> > 0.822961
> > 0.805156
> >
> > desc(bug)
> > 0.887169
> > 0.847054
> > 0.798101
> >
> > date(2015)
> > 0.878797
> > 0.811613
> > 0.786689
> >
> > extra(rebase_source)
> > 0.865446
> > 0.797756
> > 0.777408
> >
> >  author(mpm) or author(greg)
> > 1.801832
> > 1.668172
> > 1.626547
> >
> > author(mpm) or desc(bug)
> > 1.812438
> > 1.677608
> > 1.613941
> >
> > date(2015) or branch(default)
> > 0.968276
> > 0.896032
> > 0.869017
> >
> > diff --git a/mercurial/changelog.py b/mercurial/changelog.py
> > --- a/mercurial/changelog.py
> > +++ b/mercurial/changelog.py
> > @@ -148,17 +148,17 @@ class changelogrevision(object):
> >      Changelog revisions consist of multiple pieces of data, including
> >      the manifest node, user, and date. This object exposes a view into
> >      the parsed object.
> >      """
> >
> >      __slots__ = (
> >          '_rawdateextra',
> >          '_rawdesc',
> > -        'files',
> > +        '_rawfiles',
> >          '_rawmanifest',
> >          '_rawuser',
> >      )
> >
> >      def __new__(cls, text):
> >          if not text:
> >              return _changelogrevision(
> >                  manifest=nullid,
> > @@ -191,18 +191,22 @@ class changelogrevision(object):
> >          self._rawmanifest = text[0:nl1]
> >
> >          nl2 = text.index('\n', nl1 + 1)
> >          self._rawuser = text[nl1 + 1:nl2]
> >
> >          nl3 = text.index('\n', nl2 + 1)
> >          self._rawdateextra = text[nl2 + 1:nl3]
> >
> > -        l = text[:doublenl].split('\n')
> > -        self.files = l[3:]
> > +        # The list of files may be empty. Which means nl3 is the first
> of the
> > +        # double newline that precedes the description.
> > +        if nl3 == doublenl:
> > +            self._rawfiles = None
> > +        else:
> > +            self._rawfiles = text[nl3 + 1:doublenl]
> >
> >          return self
> >
> >      @property
> >      def manifest(self):
> >          return bin(self._rawmanifest)
> >
> >      @property
> > @@ -237,16 +241,23 @@ class changelogrevision(object):
> >      def extra(self):
> >          raw = self._rawextra
> >          if raw is None:
> >              return _defaultextra
> >
> >          return decodeextra(raw)
> >
> >      @property
> > +    def files(self):
> > +        if self._rawfiles is None:
> > +            return []
> > +
> > +        return self._rawfiles.split('\n')
> > +
> > +    @property
> >      def description(self):
> >          return encoding.tolocal(self._rawdesc)
> >
> >  class changelog(revlog.revlog):
> >      def __init__(self, opener):
> >          revlog.revlog.__init__(self, opener, "00changelog.i")
> >          if self._initempty:
> >              # changelogs don't benefit from generaldelta
> > _______________________________________________
> > Mercurial-devel mailing list
> > Mercurial-devel at mercurial-scm.org
> > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.mercurial-scm.org/pipermail/mercurial-devel/attachments/20160311/6eb21d92/attachment.html>


More information about the Mercurial-devel mailing list