[PATCH 3 of 5] chgserver: add utilities to calculate mtimehash

Jun Wu quark at fb.com
Wed Feb 24 17:30:14 EST 2016


# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1456346533 0
#      Wed Feb 24 20:42:13 2016 +0000
# Node ID 66e1c30b8f82615f6154d7ca59dc9b30c230c257
# Parent  0f9e0de14b62ea174bf0af8284a17ef6801ca2ae
chgserver: add utilities to calculate mtimehash

mtimehash is designed to detect file changes. These files include:
- single file extensions (__init__.py for complex extensions)
- mercurial/__version__.py
- python (sys.executable)

mtimehash only uses stat to check files so it's fast but not 100% accurate.
However it should be good enough for our use case.

For chgserver, once mtimehash changes, the server is considered outdated
immediately and should no longer provide service.

diff --git a/hgext/chgserver.py b/hgext/chgserver.py
--- a/hgext/chgserver.py
+++ b/hgext/chgserver.py
@@ -31,10 +31,12 @@
 import SocketServer
 import errno
 import hashlib
+import inspect
 import os
 import re
 import signal
 import struct
+import sys
 import threading
 import time
 import traceback
@@ -42,11 +44,13 @@
 from mercurial.i18n import _
 
 from mercurial import (
+    __version__,
     cmdutil,
     commands,
     commandserver,
     dispatch,
     error,
+    extensions,
     osutil,
     util,
 )
@@ -89,6 +93,46 @@
     envhash = _hashlist(sorted(envitems))
     return sectionhash[:6] + envhash[:6]
 
+def _getmtimepaths(ui):
+    """get a list of paths that should be checked to detect change
+
+    The list will include:
+    - extensions (will not cover all files for complex extensions)
+    - mercurial.__version__
+    - python binary
+    """
+    modules = [m for _, m in extensions.extensions(ui)]
+    modules += [__version__]
+    files = [sys.executable]
+    for m in modules:
+        try:
+            files.append(inspect.getsourcefile(m))
+        except TypeError:
+            pass
+    return sorted(set(files))
+
+def _mtimehash(paths):
+    """return a quick hash for detecting file changes
+
+    mtimehash calls stat on given paths and calculate a hash based on size and
+    mtime of each file. mtimehash does not read file content because reading is
+    expensive. therefore it's not 100% reliable for detecting content changes.
+    it's possible to return different hashes for same file contents.
+    it's also possible to return a same hash for different file contents for
+    some carefully crafted situation.
+
+    for chgserver, it is designed that once mtimehash changes, the server is
+    considered outdated immediately and should no longer provide service.
+    """
+    def trystat(path):
+        try:
+            st = os.stat(path)
+            return (st.st_mtime, st.st_size)
+        except OSError as exc:
+            if exc.errno != errno.ENOENT:
+                raise
+    return _hashlist(map(trystat, paths))[:12]
+
 # copied from hgext/pager.py:uisetup()
 def _setuppagercmd(ui, options, cmd):
     if not ui.formatted():


More information about the Mercurial-devel mailing list