D2794: hgweb: refactor 304 handling code

indygreg (Gregory Szorc) phabricator at mercurial-scm.org
Sun Mar 11 05:24:52 UTC 2018


indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  We had generic code in wsgirequest for handling HTTP 304 responses.
  We also had a special case for it in the catch all exception handler
  in the WSGI application.
  
  We only ever raise 304 in one place. So, we don't need to treat it
  specially in the catch all exception handler.
  
  But it is useful to validate behavior of 304 responses. We port the
  code that sends a 304 to use the new response API. We then move the
  code for screening 304 sanity into the new response API.
  
  As part of doing so, we discovered that we would send
  Content-Length: 0. This is not allowed. So, we fix our response code
  to not emit that header for empty response bodies.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2794

AFFECTED FILES
  mercurial/hgweb/hgweb_mod.py
  mercurial/hgweb/request.py

CHANGE DETAILS

diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -15,7 +15,6 @@
 
 from .common import (
     ErrorResponse,
-    HTTP_NOT_MODIFIED,
     statusmessage,
 )
 
@@ -361,7 +360,10 @@
             raise error.ProgrammingError('cannot define body multiple times')
 
     def setbodybytes(self, b):
-        """Define the response body as static bytes."""
+        """Define the response body as static bytes.
+
+        The empty string signals that there is no response body.
+        """
         self._verifybody()
         self._bodybytes = b
         self.headers['Content-Length'] = '%d' % len(b)
@@ -408,6 +410,35 @@
             and not self._bodywillwrite):
             raise error.ProgrammingError('response body not defined')
 
+        # RFC 7232 Section 4.1 states that a 304 MUST generate one of
+        # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
+        # and SHOULD NOT generate other headers unless they could be used
+        # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
+        # states that no response body can be issued. Content-Length can
+        # be sent. But if it is present, it should be the size of the response
+        # that wasn't transferred.
+        if self.status.startswith('304 '):
+            # setbodybytes('') will set C-L to 0. This doesn't conform with the
+            # spec. So remove it.
+            if self.headers.get('Content-Length') == '0':
+                del self.headers['Content-Length']
+
+            # Strictly speaking, this is too strict. But until it causes
+            # problems, let's be strict.
+            badheaders = {k for k in self.headers.keys()
+                          if k.lower() not in ('date', 'etag', 'expires',
+                                               'cache-control',
+                                               'content-location',
+                                               'vary')}
+            if badheaders:
+                raise error.ProgrammingError(
+                    'illegal header on 304 response: %s' %
+                    ', '.join(sorted(badheaders)))
+
+            if self._bodygen is not None or self._bodywillwrite:
+                raise error.ProgrammingError("must use setbodybytes('') with "
+                                             "304 responses")
+
         # Various HTTP clients (notably httplib) won't read the HTTP response
         # until the HTTP request has been sent in full. If servers (us) send a
         # response before the HTTP request has been fully sent, the connection
@@ -539,13 +570,6 @@
 
             if isinstance(status, ErrorResponse):
                 self.headers.extend(status.headers)
-                if status.code == HTTP_NOT_MODIFIED:
-                    # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
-                    # it MUST NOT include any headers other than these and no
-                    # body
-                    self.headers = [(k, v) for (k, v) in self.headers if
-                                    k in ('Date', 'ETag', 'Expires',
-                                          'Cache-Control', 'Vary')]
                 status = statusmessage(status.code, pycompat.bytestr(status))
             elif status == 200:
                 status = '200 Script output follows'
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -15,7 +15,6 @@
     ErrorResponse,
     HTTP_BAD_REQUEST,
     HTTP_NOT_FOUND,
-    HTTP_NOT_MODIFIED,
     HTTP_OK,
     HTTP_SERVER_ERROR,
     cspvalues,
@@ -391,7 +390,10 @@
             if rctx.configbool('web', 'cache') and not rctx.nonce:
                 tag = 'W/"%d"' % self.mtime
                 if req.headers.get('If-None-Match') == tag:
-                    raise ErrorResponse(HTTP_NOT_MODIFIED)
+                    res.status = '304 Not Modified'
+                    # Response body not allowed on 304.
+                    res.setbodybytes('')
+                    return res.sendresponse()
 
                 wsgireq.headers.append((r'ETag', pycompat.sysstr(tag)))
                 res.headers['ETag'] = tag
@@ -426,9 +428,6 @@
             return tmpl('error', error=pycompat.bytestr(inst))
         except ErrorResponse as inst:
             wsgireq.respond(inst, ctype)
-            if inst.code == HTTP_NOT_MODIFIED:
-                # Not allowed to return a body on a 304
-                return ['']
             return tmpl('error', error=pycompat.bytestr(inst))
 
     def check_perm(self, rctx, req, op):



To: indygreg, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list