[PATCH 3 of 4 hgweb-thread-isolation] hg: establish a cache for localrepository instances

Gregory Szorc gregory.szorc at gmail.com
Wed Sep 9 18:36:32 CDT 2015


# HG changeset patch
# User Gregory Szorc <gregory.szorc at gmail.com>
# Date 1440294874 25200
#      Sat Aug 22 18:54:34 2015 -0700
# Node ID f4ebdbb128db525cfb5adf60b5e88f43fa9bc962
# Parent  193680ff40214a4ea0a6c07ac118832f320d624e
hg: establish a cache for localrepository instances

hgweb contained code for determining whether a cached localrepository
instance was up to date. This code was way too low-level to be in
hgweb.

This functionality has been moved to a new "cachedlocalrepo" class
in hg.py. The code has been changed slightly to facilitate use
inside a class. hgweb has been refactored to use the new API.

As part of this refactor, hgweb.repo no longer exists! We're very close
to using a distinct repo instance per thread.

The new cache records state when it is created. This intelligence
prevents an extra localrepository from being created on the first
hgweb request. This is why some redundant output from test-extension.t
has gone away.

diff --git a/mercurial/hg.py b/mercurial/hg.py
--- a/mercurial/hg.py
+++ b/mercurial/hg.py
@@ -822,4 +822,73 @@ def remoteui(src, opts):
     elif v:
         dst.setconfig('web', 'cacerts', util.expandpath(v), 'copied')
 
     return dst
+
+# Files of interest
+# Used to check if the repository has changed looking at mtime and size of
+# theses files.
+foi = [('spath', '00changelog.i'),
+       ('spath', 'phaseroots'), # ! phase can change content at the same size
+       ('spath', 'obsstore'),
+       ('path', 'bookmarks'), # ! bookmark can change content at the same size
+      ]
+
+class cachedlocalrepo(object):
+    """Holds a localrepository that can be cached and reused."""
+
+    def __init__(self, repo):
+        """Create a new cached repo from an existing repo.
+
+        We assume the passed in repo was recently created. If the
+        repo has changed between when it was created and when it was
+        turned into a cache, it may not refresh properly.
+        """
+        assert isinstance(repo, localrepo.localrepository)
+        self._repo = repo
+        self._state, self.mtime = self._repostate()
+
+    def fetch(self):
+        """Refresh (if necessary) and return a repository.
+
+        If the cached instance is out of date, it will be recreated
+        automatically and returned.
+
+        Returns a tuple of the repo and a boolean indicating whether a new
+        repo instance was created.
+        """
+        # We compare the mtimes and sizes of some well-known files to
+        # determine if the repo changed. This is not precise, as mtimes
+        # are susceptible to clock skew and imprecise filesystems and
+        # file content can change while maintaining the same size.
+
+        state, mtime = self._repostate()
+        if state == self._state:
+            return self._repo, False
+
+        self._repo = repository(self._repo.baseui, self._repo.url())
+        self._state = state
+        self.mtime = mtime
+
+        return self._repo, True
+
+    def _repostate(self):
+        state = []
+        maxmtime = -1
+        for attr, fname in foi:
+            prefix = getattr(self._repo, attr)
+            p = os.path.join(prefix, fname)
+            try:
+                st = os.stat(p)
+            except OSError:
+                st = os.stat(prefix)
+            state.append((st.st_mtime, st.st_size))
+            maxmtime = max(maxmtime, st.st_mtime)
+
+        return tuple(state), maxmtime
+
+    def copy(self):
+        """Obtain a copy of this class instance."""
+        c = cachedlocalrepo(self._repo)
+        c._state = self._state
+        c.mtime = self.mtime
+        return c
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -24,17 +24,8 @@ perms = {
     'unbundle': 'push',
     'pushkey': 'push',
 }
 
-## Files of interest
-# Used to check if the repository has changed looking at mtime and size of
-# theses files. This should probably be relocated a bit higher in core.
-foi = [('spath', '00changelog.i'),
-       ('spath', 'phaseroots'), # ! phase can change content at the same size
-       ('spath', 'obsstore'),
-       ('path', 'bookmarks'), # ! bookmark can change content at the same size
-      ]
-
 def makebreadcrumb(url, prefix=''):
     '''Return a 'URL breadcrumb' list
 
     A 'URL breadcrumb' is a list of URL-name pairs,
@@ -65,10 +56,10 @@ class requestcontext(object):
     Servers can be multi-threaded. Holding state on the WSGI application
     is prone to race conditions. Instances of this class exist to hold
     mutable and race-free state for requests.
     """
-    def __init__(self, app):
-        self.repo = app.repo
+    def __init__(self, app, repo):
+        self.repo = repo
         self.reponame = app.reponame
 
         self.archives = ('zip', 'gz', 'bz2')
 
@@ -216,38 +207,24 @@ class hgweb(object):
         # displaying bundling progress bar while serving feel wrong and may
         # break some wsgi implementation.
         r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
         r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
-        self.repo = self._webifyrepo(r)
+        self._repo = hg.cachedlocalrepo(self._webifyrepo(r))
         hook.redirect(True)
-        self.repostate = None
-        self.mtime = -1
         self.reponame = name
 
     def _webifyrepo(self, repo):
         repo = getwebview(repo)
         self.websubtable = webutil.getwebsubs(repo)
         return repo
 
-    def refresh(self):
-        repostate = []
-        mtime = 0
-        # file of interrests mtime and size
-        for meth, fname in foi:
-            prefix = getattr(self.repo, meth)
-            st = get_stat(prefix, fname)
-            repostate.append((st.st_mtime, st.st_size))
-            mtime = max(mtime, st.st_mtime)
-        repostate = tuple(repostate)
-        # we need to compare file size in addition to mtime to catch
-        # changes made less than a second ago
-        if repostate != self.repostate:
-            r = hg.repository(self.repo.baseui, self.repo.url())
-            self.repo = self._webifyrepo(r)
-            # update these last to avoid threads seeing empty settings
-            self.repostate = repostate
-            # mtime is needed for ETag
-            self.mtime = mtime
+    def _getrepo(self):
+        r, created = self._repo.fetch()
+        if created:
+            r = self._webifyrepo(r)
+
+        self.mtime = self._repo.mtime
+        return r
 
     def run(self):
         """Start a server from CGI environment.
 
@@ -273,10 +250,10 @@ class hgweb(object):
 
         This is typically only called by Mercurial. External consumers
         should be using instances of this class as the WSGI application.
         """
-        self.refresh()
-        rctx = requestcontext(self)
+        repo = self._getrepo()
+        rctx = requestcontext(self, repo)
 
         # This state is global across all threads.
         encoding.encoding = rctx.config('web', 'encoding', encoding.encoding)
         rctx.repo.ui.environ = req.env
diff --git a/tests/test-extension.t b/tests/test-extension.t
--- a/tests/test-extension.t
+++ b/tests/test-extension.t
@@ -114,10 +114,8 @@ Check hgweb's load order:
   3) foo extsetup
   3) bar extsetup
   4) foo reposetup
   4) bar reposetup
-  4) foo reposetup
-  4) bar reposetup
 
   $ echo 'foo = !' >> $HGRCPATH
   $ echo 'bar = !' >> $HGRCPATH
 
diff --git a/tests/test-hgweb-non-interactive.t b/tests/test-hgweb-non-interactive.t
--- a/tests/test-hgweb-non-interactive.t
+++ b/tests/test-hgweb-non-interactive.t
@@ -63,9 +63,9 @@ by the WSGI standard and strictly implem
   > print errors.getvalue()
   > print '---- OS.ENVIRON wsgi variables'
   > print sorted([x for x in os.environ if x.startswith('wsgi')])
   > print '---- request.ENVIRON wsgi variables'
-  > print sorted([x for x in i.repo.ui.environ if x.startswith('wsgi')])
+  > print sorted([x for x in i._getrepo().ui.environ if x.startswith('wsgi')])
   > EOF
   $ python request.py
   ---- STATUS
   200 Script output follows


More information about the Mercurial-devel mailing list