[PATCH 07 of 10 lazy-changelog-parse] changelog: lazily parse files
Martin von Zweigbergk
martinvonz at google.com
Tue Mar 8 00:44:41 EST 2016
What do you think about folding in the following? Seems correct? Would
it be noticeably slower with the extra array slicing?
diff -r 18f6505b3b79 mercurial/changelog.py
--- a/mercurial/changelog.py Sun Mar 06 14:31:06 2016 -0800
+++ b/mercurial/changelog.py Mon Mar 07 21:37:24 2016 -0800
@@ -196,12 +196,7 @@ class changelogrevision(object):
nl3 = text.index('\n', nl2 + 1)
self._rawdateextra = text[nl2 + 1:nl3]
- # The list of files may be empty. Which means nl3 is the first of the
- # double newline that precedes the description.
- if nl3 == doublenl:
- self._rawfiles = None
- else:
- self._rawfiles = text[nl3 + 1:doublenl]
+ self._rawfiles = text[nl3:doublenl]
return self
@@ -247,10 +242,7 @@ class changelogrevision(object):
@property
def files(self):
- if self._rawfiles is None:
- return []
-
- return self._rawfiles.split('\n')
+ return self._rawfiles.split('\n')[1:]
@property
def description(self):
On Sun, Mar 6, 2016 at 3:58 PM, Gregory Szorc <gregory.szorc at gmail.com> wrote:
> # HG changeset patch
> # User Gregory Szorc <gregory.szorc at gmail.com>
> # Date 1457303466 28800
> # Sun Mar 06 14:31:06 2016 -0800
> # Node ID e37bac9adf9215f82850e7b105e3e47e1bee3d7a
> # Parent 7559d2bcdaeb320212bf8d37e0e5e2075dec6d18
> changelog: lazily parse files
>
> More of the same.
>
> Again, modest revset performance wins:
>
> author(mpm)
> 0.896565
> 0.822961
> 0.805156
>
> desc(bug)
> 0.887169
> 0.847054
> 0.798101
>
> date(2015)
> 0.878797
> 0.811613
> 0.786689
>
> extra(rebase_source)
> 0.865446
> 0.797756
> 0.777408
>
> author(mpm) or author(greg)
> 1.801832
> 1.668172
> 1.626547
>
> author(mpm) or desc(bug)
> 1.812438
> 1.677608
> 1.613941
>
> date(2015) or branch(default)
> 0.968276
> 0.896032
> 0.869017
>
> diff --git a/mercurial/changelog.py b/mercurial/changelog.py
> --- a/mercurial/changelog.py
> +++ b/mercurial/changelog.py
> @@ -148,17 +148,17 @@ class changelogrevision(object):
> Changelog revisions consist of multiple pieces of data, including
> the manifest node, user, and date. This object exposes a view into
> the parsed object.
> """
>
> __slots__ = (
> '_rawdateextra',
> '_rawdesc',
> - 'files',
> + '_rawfiles',
> '_rawmanifest',
> '_rawuser',
> )
>
> def __new__(cls, text):
> if not text:
> return _changelogrevision(
> manifest=nullid,
> @@ -191,18 +191,22 @@ class changelogrevision(object):
> self._rawmanifest = text[0:nl1]
>
> nl2 = text.index('\n', nl1 + 1)
> self._rawuser = text[nl1 + 1:nl2]
>
> nl3 = text.index('\n', nl2 + 1)
> self._rawdateextra = text[nl2 + 1:nl3]
>
> - l = text[:doublenl].split('\n')
> - self.files = l[3:]
> + # The list of files may be empty. Which means nl3 is the first of the
> + # double newline that precedes the description.
> + if nl3 == doublenl:
> + self._rawfiles = None
> + else:
> + self._rawfiles = text[nl3 + 1:doublenl]
>
> return self
>
> @property
> def manifest(self):
> return bin(self._rawmanifest)
>
> @property
> @@ -237,16 +241,23 @@ class changelogrevision(object):
> def extra(self):
> raw = self._rawextra
> if raw is None:
> return _defaultextra
>
> return decodeextra(raw)
>
> @property
> + def files(self):
> + if self._rawfiles is None:
> + return []
> +
> + return self._rawfiles.split('\n')
> +
> + @property
> def description(self):
> return encoding.tolocal(self._rawdesc)
>
> class changelog(revlog.revlog):
> def __init__(self, opener):
> revlog.revlog.__init__(self, opener, "00changelog.i")
> if self._initempty:
> # changelogs don't benefit from generaldelta
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
More information about the Mercurial-devel
mailing list