D4753: storageutil: new module for storage primitives (API)

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Wed Sep 26 18:30:03 UTC 2018


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  There will exist common code between storage backends. It would
  be nice to have a central place to put that code.
  
  This commit attempts to create that place by creating the
  "storageutil" module.
  
  The first thing we move is revlog.hash(), which is the function for
  computing the SHA-1 hash of revision fulltext and parents.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4753

AFFECTED FILES
  mercurial/revlog.py
  mercurial/utils/storageutil.py
  tests/simplestorerepo.py

CHANGE DETAILS

diff --git a/tests/simplestorerepo.py b/tests/simplestorerepo.py
--- a/tests/simplestorerepo.py
+++ b/tests/simplestorerepo.py
@@ -40,6 +40,7 @@
 )
 from mercurial.utils import (
     interfaceutil,
+    storageutil,
 )
 
 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
@@ -285,7 +286,7 @@
     def checkhash(self, text, node, p1=None, p2=None, rev=None):
         if p1 is None and p2 is None:
             p1, p2 = self.parents(node)
-        if node != revlog.hash(text, p1, p2):
+        if node != storageutil.hashrevisionsha1(text, p1, p2):
             raise simplestoreerror(_("integrity check failed on %s") %
                 self._path)
 
@@ -342,7 +343,7 @@
 
         p1, p2 = self.parents(node)
 
-        if revlog.hash(t, p1, p2) == node:
+        if storageutil.hashrevisionsha1(t, p1, p2) == node:
             return False
 
         if self.iscensored(self.rev(node)):
@@ -420,11 +421,11 @@
         validatenode(p2)
 
         if flags:
-            node = node or revlog.hash(text, p1, p2)
+            node = node or storageutil.hashrevisionsha1(text, p1, p2)
 
         rawtext, validatehash = self._processflags(text, flags, 'write')
 
-        node = node or revlog.hash(text, p1, p2)
+        node = node or storageutil.hashrevisionsha1(text, p1, p2)
 
         if node in self._indexbynode:
             return node
diff --git a/mercurial/utils/storageutil.py b/mercurial/utils/storageutil.py
new file mode 100644
--- /dev/null
+++ b/mercurial/utils/storageutil.py
@@ -0,0 +1,41 @@
+# storageutil.py - Storage functionality agnostic of backend implementation.
+#
+# Copyright 2018 Gregory Szorc <gregory.szorc at gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import hashlib
+
+from ..node import (
+    nullid,
+)
+
+_nullhash = hashlib.sha1(nullid)
+
+def hashrevisionsha1(text, p1, p2):
+    """Compute the SHA-1 for revision data and its parents.
+
+    This hash combines both the current file contents and its history
+    in a manner that makes it easy to distinguish nodes with the same
+    content in the revision graph.
+    """
+    # As of now, if one of the parent node is null, p2 is null
+    if p2 == nullid:
+        # deep copy of a hash is faster than creating one
+        s = _nullhash.copy()
+        s.update(p1)
+    else:
+        # none of the parent nodes are nullid
+        if p1 < p2:
+            a = p1
+            b = p2
+        else:
+            a = p2
+            b = p1
+        s = hashlib.sha1(a)
+        s.update(b)
+    s.update(text)
+    return s.digest()
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -16,7 +16,6 @@
 import collections
 import contextlib
 import errno
-import hashlib
 import os
 import re
 import struct
@@ -74,6 +73,7 @@
 )
 from .utils import (
     interfaceutil,
+    storageutil,
     stringutil,
 )
 
@@ -197,33 +197,6 @@
         raise ValueError('unknown revlog index flags')
     return int(int(offset) << 16 | type)
 
-_nullhash = hashlib.sha1(nullid)
-
-def hash(text, p1, p2):
-    """generate a hash from the given text and its parent hashes
-
-    This hash combines both the current file contents and its history
-    in a manner that makes it easy to distinguish nodes with the same
-    content in the revision graph.
-    """
-    # As of now, if one of the parent node is null, p2 is null
-    if p2 == nullid:
-        # deep copy of a hash is faster than creating one
-        s = _nullhash.copy()
-        s.update(p1)
-    else:
-        # none of the parent nodes are nullid
-        if p1 < p2:
-            a = p1
-            b = p2
-        else:
-            a = p2
-            b = p1
-        s = hashlib.sha1(a)
-        s.update(b)
-    s.update(text)
-    return s.digest()
-
 @attr.s(slots=True, frozen=True)
 class _revisioninfo(object):
     """Information about a revision that allows building its fulltext
@@ -1383,7 +1356,7 @@
         returns True if text is different than what is stored.
         """
         p1, p2 = self.parents(node)
-        return hash(text, p1, p2) != node
+        return storageutil.hashrevisionsha1(text, p1, p2) != node
 
     def _cachesegment(self, offset, data):
         """Add a segment to the revlog cache.
@@ -1672,7 +1645,7 @@
         Available as a function so that subclasses can replace the hash
         as needed.
         """
-        return hash(text, p1, p2)
+        return storageutil.hashrevisionsha1(text, p1, p2)
 
     def _processflags(self, text, flags, operation, raw=False):
         """Inspect revision data flags and applies transforms defined by



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list