[PATCH 10 of 10 lazy-changelog-parse] changelog: return raw user and description values

Gregory Szorc gregory.szorc at gmail.com
Sun Mar 6 18:58:56 EST 2016


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1457308188 28800
#      Sun Mar 06 15:49:48 2016 -0800
# Node ID 342631fb10a19f4a2a132975fc5a1cdf728335e8
# Parent  d1a19a190175f7fd17a37a2a97e5969e8cd8b9a4
changelog: return raw user and description values

Previously, we'd return a localstr for these attributes. I
think we should return the raw bytes for a few reasons.

First, not all callers may want the localstr. If feels inefficient
to force the creation of the intermediate localstr if you
want access to the original bytes.

Second, if we ever want to implement changelog parsing in C,
calling encoding.tolocal() (which is implemented in Python)
from C feels clumsy. It feels better/simpler to have a future
C implementation expose the raw bytes and for the Python consumer
to handle the localstr encoding, if they want it.

This change isn't marked as API breaking (even though it
technically is) because the only consumer of this API was
introduced in this series. So nobody should be impacted by the
API change.

diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -199,17 +199,17 @@ class changelogrevision(object):
 
     @property
     def manifest(self):
         return bin(self._text[0:self._offsets[0]])
 
     @property
     def user(self):
         off = self._offsets
-        return encoding.tolocal(self._text[off[0] + 1:off[1]])
+        return self._text[off[0] + 1:off[1]]
 
     @property
     def _rawdate(self):
         off = self._offsets
         dateextra = self._text[off[1] + 1:off[2]]
         return dateextra.split(' ', 2)[0:2]
 
     @property
@@ -247,17 +247,17 @@ class changelogrevision(object):
         off = self._offsets
         if off[2] == off[3]:
             return []
 
         return self._text[off[2] + 1:off[3]].split('\n')
 
     @property
     def description(self):
-        return encoding.tolocal(self._text[self._offsets[3] + 2:])
+        return self._text[self._offsets[3] + 2:]
 
 class changelog(revlog.revlog):
     def __init__(self, opener):
         revlog.revlog.__init__(self, opener, "00changelog.i")
         if self._initempty:
             # changelogs don't benefit from generaldelta
             self.version &= ~revlog.REVLOGGENERALDELTA
             self._generaldelta = False
@@ -454,20 +454,20 @@ class changelog(revlog.revlog):
 
         Unless you need to access all fields, consider calling
         ``changelogrevision`` instead, as it is faster for partial object
         access.
         """
         c = changelogrevision(self.revision(node))
         return (
             c.manifest,
-            c.user,
+            encoding.tolocal(c.user),
             c.date,
             c.files,
-            c.description,
+            encoding.tolocal(c.description),
             c.extra
         )
 
     def changelogrevision(self, nodeorrev):
         """Obtain a ``changelogrevision`` for a node or revision."""
         return changelogrevision(self.revision(nodeorrev))
 
     def readfiles(self, node):
diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -541,33 +541,33 @@ class changectx(basectx):
         if p2 == nullrev:
             return [changectx(repo, p1)]
         return [changectx(repo, p1), changectx(repo, p2)]
 
     def changeset(self):
         c = self._changeset
         return (
             c.manifest,
-            c.user,
+            encoding.tolocal(c.user),
             c.date,
             c.files,
-            c.description,
+            encoding.tolocal(c.description),
             c.extra,
         )
     def manifestnode(self):
         return self._changeset.manifest
 
     def user(self):
-        return self._changeset.user
+        return encoding.tolocal(self._changeset.user)
     def date(self):
         return self._changeset.date
     def files(self):
         return self._changeset.files
     def description(self):
-        return self._changeset.description
+        return encoding.tolocal(self._changeset.description)
     def branch(self):
         return encoding.tolocal(self._changeset.extra.get("branch"))
     def closesbranch(self):
         return 'close' in self._changeset.extra
     def extra(self):
         return self._changeset.extra
     def tags(self):
         return self._repo.nodetags(self._node)


More information about the Mercurial-devel mailing list