[PATCH 2 of 5 V2] chgserver: add utilities to calculate mtimehash

Jun Wu quark at fb.com
Fri Feb 26 10:36:21 EST 2016


# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1456498779 0
#      Fri Feb 26 14:59:39 2016 +0000
# Node ID 3172be958b9c1244a98f9833235cd7d9cfdf2373
# Parent  b1fb9799559b4e12435b9580626ae08ce708463b
chgserver: add utilities to calculate mtimehash

mtimehash is designed to detect file changes. These files include:
- single file extensions (__init__.py for complex extensions)
- hg (util.gethgcmd())
- python (sys.executable)

mtimehash only uses stat to check files so it's fast but not 100% accurate.
However it should be good enough for our use case.

For chgserver, once mtimehash changes, the server is considered outdated
immediately and should no longer provide service.

diff --git a/hgext/chgserver.py b/hgext/chgserver.py
--- a/hgext/chgserver.py
+++ b/hgext/chgserver.py
@@ -30,10 +30,12 @@
 
 import SocketServer
 import errno
+import inspect
 import os
 import re
 import signal
 import struct
+import sys
 import threading
 import time
 import traceback
@@ -46,6 +48,7 @@
     commandserver,
     dispatch,
     error,
+    extensions,
     osutil,
     util,
 )
@@ -97,6 +100,45 @@
     envhash = _hashlist(sorted(envitems))
     return sectionhash[:6] + envhash[:6]
 
+def _getmtimepaths(ui):
+    """get a list of paths that should be checked to detect change
+
+    The list will include:
+    - extensions (will not cover all files for complex extensions)
+    - hg binary
+    - python binary
+    """
+    modules = [m for n, m in extensions.extensions(ui)]
+    files = [sys.executable, util.gethgcmd()]
+    for m in modules:
+        try:
+            files.append(inspect.getsourcefile(m))
+        except TypeError:
+            pass
+    return sorted(set(files))
+
+def _mtimehash(paths):
+    """return a quick hash for detecting file changes
+
+    mtimehash calls stat on given paths and calculate a hash based on size and
+    mtime of each file. mtimehash does not read file content because reading is
+    expensive. therefore it's not 100% reliable for detecting content changes.
+    it's possible to return different hashes for same file contents.
+    it's also possible to return a same hash for different file contents for
+    some carefully crafted situation.
+
+    for chgserver, it is designed that once mtimehash changes, the server is
+    considered outdated immediately and should no longer provide service.
+    """
+    def trystat(path):
+        try:
+            st = os.stat(path)
+            return (st.st_mtime, st.st_size)
+        except OSError:
+            # could be ENOENT, EPERM etc. not fatal in any case
+            pass
+    return _hashlist(map(trystat, paths))[:12]
+
 # copied from hgext/pager.py:uisetup()
 def _setuppagercmd(ui, options, cmd):
     if not ui.formatted():


More information about the Mercurial-devel mailing list