D2854: hgweb: str/bytes followups after indygreg's refactoring

durin42 (Augie Fackler) phabricator at mercurial-scm.org
Wed Mar 14 03:31:02 UTC 2018


durin42 created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  As of this change, we're mostly principled about using native strings
  (so bytes on 2, unicode on 3) for accessing HTTP headers. This
  restores test-getbundle.t (probably among others) to passing on Python
  3.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2854

AFFECTED FILES
  mercurial/hgweb/common.py
  mercurial/hgweb/hgweb_mod.py
  mercurial/hgweb/hgwebdir_mod.py
  mercurial/hgweb/request.py
  mercurial/hgweb/server.py
  mercurial/wireprotoserver.py

CHANGE DETAILS

diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -29,9 +29,9 @@
 
 HTTP_OK = 200
 
-HGTYPE = 'application/mercurial-0.1'
-HGTYPE2 = 'application/mercurial-0.2'
-HGERRTYPE = 'application/hg-error'
+HGTYPE = r'application/mercurial-0.1'
+HGTYPE2 = r'application/mercurial-0.2'
+HGERRTYPE = r'application/hg-error'
 
 SSHV1 = wireprototypes.SSHV1
 SSHV2 = wireprototypes.SSHV2
@@ -44,7 +44,7 @@
     chunks = []
     i = 1
     while True:
-        v = req.headers.get(b'%s-%d' % (headerprefix, i))
+        v = req.headers.get(r'%s-%d' % (headerprefix, i))
         if v is None:
             break
         chunks.append(pycompat.bytesurl(v))
@@ -79,23 +79,23 @@
 
     def _args(self):
         args = self._req.qsparams.asdictoflists()
-        postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
+        postlen = int(self._req.headers.get(r'X-HgArgs-Post', 0))
         if postlen:
             args.update(urlreq.parseqs(
                 self._req.bodyfh.read(postlen), keep_blank_values=True))
             return args
 
-        argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
+        argvalue = decodevaluefromheaders(self._req, r'X-HgArg')
         args.update(urlreq.parseqs(argvalue, keep_blank_values=True))
         return args
 
     def forwardpayload(self, fp):
         # Existing clients *always* send Content-Length.
-        length = int(self._req.headers[b'Content-Length'])
+        length = int(self._req.headers[r'Content-Length'])
 
         # If httppostargs is used, we need to read Content-Length
         # minus the amount that was consumed by args.
-        length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
+        length -= int(self._req.headers.get(r'X-HgArgs-Post', 0))
         for s in util.filechunkiter(self._req.bodyfh, limit=length):
             fp.write(s)
 
@@ -189,7 +189,7 @@
     # in this case. We send an HTTP 404 for backwards compatibility reasons.
     if req.dispatchpath:
         res.status = hgwebcommon.statusmessage(404)
-        res.headers['Content-Type'] = HGTYPE
+        res.headers[r'Content-Type'] = HGTYPE
         # TODO This is not a good response to issue for this request. This
         # is mostly for BC for now.
         res.setbodybytes('0\n%s\n' % b'Not Found')
@@ -207,7 +207,7 @@
     except hgwebcommon.ErrorResponse as e:
         for k, v in e.headers:
             res.headers[k] = v
-        res.status = hgwebcommon.statusmessage(e.code, pycompat.bytestr(e))
+        res.status = hgwebcommon.statusmessage(e.code, e)
         # TODO This response body assumes the failed command was
         # "unbundle." That assumption is not always valid.
         res.setbodybytes('0\n%s\n' % pycompat.bytestr(e))
@@ -271,11 +271,11 @@
 
     def setresponse(code, contenttype, bodybytes=None, bodygen=None):
         if code == HTTP_OK:
-            res.status = '200 Script output follows'
+            res.status = r'200 Script output follows'
         else:
             res.status = hgwebcommon.statusmessage(code)
 
-        res.headers['Content-Type'] = contenttype
+        res.headers[r'Content-Type'] = pycompat.strurl(contenttype)
 
         if bodybytes is not None:
             res.setbodybytes(bodybytes)
diff --git a/mercurial/hgweb/server.py b/mercurial/hgweb/server.py
--- a/mercurial/hgweb/server.py
+++ b/mercurial/hgweb/server.py
@@ -101,7 +101,7 @@
         try:
             self.do_write()
         except Exception:
-            self._start_response("500 Internal Server Error", [])
+            self._start_response(r"500 Internal Server Error", [])
             self._write("Internal Server Error")
             self._done()
             tb = r"".join(traceback.format_exception(*sys.exc_info()))
@@ -136,26 +136,26 @@
                 env[r'CONTENT_TYPE'] = self.headers.get_default_type()
             else:
                 env[r'CONTENT_TYPE'] = self.headers.get_content_type()
-            length = self.headers.get('content-length')
+            length = self.headers.get(r'content-length')
         else:
             if self.headers.typeheader is None:
                 env[r'CONTENT_TYPE'] = self.headers.type
             else:
                 env[r'CONTENT_TYPE'] = self.headers.typeheader
-            length = self.headers.getheader('content-length')
+            length = self.headers.getheader(r'content-length')
         if length:
             env[r'CONTENT_LENGTH'] = length
         for header in [h for h in self.headers.keys()
-                       if h not in ('content-type', 'content-length')]:
+                       if h not in (r'content-type', r'content-length')]:
             hkey = r'HTTP_' + header.replace(r'-', r'_').upper()
             hval = self.headers.get(header)
             hval = hval.replace(r'\n', r'').strip()
             if hval:
                 env[hkey] = hval
         env[r'SERVER_PROTOCOL'] = self.request_version
         env[r'wsgi.version'] = (1, 0)
         env[r'wsgi.url_scheme'] = pycompat.sysstr(self.url_scheme)
-        if env.get(r'HTTP_EXPECT', '').lower() == '100-continue':
+        if env.get(r'HTTP_EXPECT', '').lower() == r'100-continue':
             self.rfile = common.continuereader(self.rfile, self.wfile.write)
 
         env[r'wsgi.input'] = self.rfile
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -316,18 +316,20 @@
         if k.startswith('HTTP_'):
             headers.append((k[len('HTTP_'):].replace('_', '-'), v))
 
-    headers = wsgiheaders.Headers(headers)
+
+    headers = wsgiheaders.Headers([(pycompat.strurl(k), pycompat.strurl(v))
+                                   for k, v in headers])
 
     # This is kind of a lie because the HTTP header wasn't explicitly
     # sent. But for all intents and purposes it should be OK to lie about
     # this, since a consumer will either either value to determine how many
     # bytes are available to read.
     if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
-        headers['Content-Length'] = env['CONTENT_LENGTH']
+        headers[r'Content-Length'] = pycompat.strurl(env['CONTENT_LENGTH'])
 
     bodyfh = env['wsgi.input']
-    if 'Content-Length' in headers:
-        bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
+    if r'Content-Length' in headers:
+        bodyfh = util.cappedreader(bodyfh, int(headers[r'Content-Length']))
 
     return parsedrequest(method=env['REQUEST_METHOD'],
                          url=fullurl, baseurl=baseurl,
@@ -422,7 +424,7 @@
         """
         self._verifybody()
         self._bodybytes = b
-        self.headers['Content-Length'] = '%d' % len(b)
+        self.headers[r'Content-Length'] = r'%d' % len(b)
 
     def setbodygen(self, gen):
         """Define the response body as a generator of bytes."""
@@ -473,19 +475,19 @@
         # states that no response body can be issued. Content-Length can
         # be sent. But if it is present, it should be the size of the response
         # that wasn't transferred.
-        if self.status.startswith('304 '):
+        if self.status.startswith(r'304 '):
             # setbodybytes('') will set C-L to 0. This doesn't conform with the
             # spec. So remove it.
-            if self.headers.get('Content-Length') == '0':
-                del self.headers['Content-Length']
+            if self.headers.get(r'Content-Length') == r'0':
+                del self.headers[r'Content-Length']
 
             # Strictly speaking, this is too strict. But until it causes
             # problems, let's be strict.
             badheaders = {k for k in self.headers.keys()
-                          if k.lower() not in ('date', 'etag', 'expires',
-                                               'cache-control',
-                                               'content-location',
-                                               'vary')}
+                          if k.lower() not in (r'date', r'etag', r'expires',
+                                               r'cache-control',
+                                               r'content-location',
+                                               r'vary')}
             if badheaders:
                 raise error.ProgrammingError(
                     'illegal header on 304 response: %s' %
@@ -508,7 +510,7 @@
         # If the client sent Expect: 100-continue, we assume it is smart enough
         # to deal with the server sending a response before reading the request.
         # (httplib doesn't do this.)
-        if self._req.headers.get('Expect', '').lower() == '100-continue':
+        if self._req.headers.get(r'Expect', r'').lower() == r'100-continue':
             pass
         # Only tend to request methods that have bodies. Strictly speaking,
         # we should sniff for a body. But this is fine for our existing
diff --git a/mercurial/hgweb/hgwebdir_mod.py b/mercurial/hgweb/hgwebdir_mod.py
--- a/mercurial/hgweb/hgwebdir_mod.py
+++ b/mercurial/hgweb/hgwebdir_mod.py
@@ -371,11 +371,11 @@
             virtual = req.dispatchpath.strip('/')
             tmpl = self.templater(req, nonce)
             ctype = tmpl('mimetype', encoding=encoding.encoding)
-            ctype = templateutil.stringify(ctype)
+            ctype = pycompat.strurl(templateutil.stringify(ctype))
 
             # Global defaults. These can be overridden by any handler.
-            res.status = '200 Script output follows'
-            res.headers['Content-Type'] = ctype
+            res.status = r'200 Script output follows'
+            res.headers[r'Content-Type'] = ctype
 
             # a static file
             if virtual.startswith('static/') or 'static' in req.qsparams:
@@ -423,8 +423,10 @@
                 if real:
                     # Re-parse the WSGI environment to take into account our
                     # repository path component.
+                    strenv = {pycompat.strurl(k): pycompat.strurl(v) for k, v in
+                              req.rawenv.iteritems()}
                     req = requestmod.parserequestfromenv(
-                        req.rawenv, reponame=virtualrepo,
+                        strenv, reponame=virtualrepo,
                         altbaseurl=self.ui.config('web', 'baseurl'))
                     try:
                         # ensure caller gets private copy of ui
@@ -442,12 +444,12 @@
                 return self.makeindex(req, res, tmpl, subdir)
 
             # prefixes not found
-            res.status = '404 Not Found'
+            res.status = r'404 Not Found'
             res.setbodygen(tmpl('notfound', repo=virtual))
             return res.sendresponse()
 
         except ErrorResponse as e:
-            res.status = statusmessage(e.code, pycompat.bytestr(e))
+            res.status = statusmessage(e.code, e)
             res.setbodygen(tmpl('error', error=e.message or ''))
             return res.sendresponse()
         finally:
diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py
--- a/mercurial/hgweb/hgweb_mod.py
+++ b/mercurial/hgweb/hgweb_mod.py
@@ -360,7 +360,7 @@
                     for a in args:
                         req.qsparams.add('file', a)
 
-            ua = req.headers.get('User-Agent', '')
+            ua = pycompat.bytesurl(req.headers.get(r'User-Agent', r''))
             if cmd == 'rev' and 'mercurial' in ua:
                 req.qsparams['style'] = 'raw'
 
@@ -379,7 +379,7 @@
         try:
             rctx.tmpl = rctx.templater(req)
             ctype = rctx.tmpl('mimetype', encoding=encoding.encoding)
-            ctype = templateutil.stringify(ctype)
+            ctype = pycompat.strurl(templateutil.stringify(ctype))
 
             # check read permissions non-static content
             if cmd != 'static':
@@ -392,41 +392,41 @@
             # Don't enable caching if using a CSP nonce because then it wouldn't
             # be a nonce.
             if rctx.configbool('web', 'cache') and not rctx.nonce:
-                tag = 'W/"%d"' % self.mtime
-                if req.headers.get('If-None-Match') == tag:
-                    res.status = '304 Not Modified'
+                tag = r'W/"%d"' % self.mtime
+                if req.headers.get(r'If-None-Match') == tag:
+                    res.status = r'304 Not Modified'
                     # Response body not allowed on 304.
                     res.setbodybytes('')
                     return res.sendresponse()
 
-                res.headers['ETag'] = tag
+                res.headers[r'ETag'] = tag
 
             if cmd not in webcommands.__all__:
                 msg = 'no such method: %s' % cmd
                 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
             else:
                 # Set some globals appropriate for web handlers. Commands can
                 # override easily enough.
-                res.status = '200 Script output follows'
-                res.headers['Content-Type'] = ctype
+                res.status = r'200 Script output follows'
+                res.headers[r'Content-Type'] = ctype
                 return getattr(webcommands, cmd)(rctx)
 
         except (error.LookupError, error.RepoLookupError) as err:
             msg = pycompat.bytestr(err)
             if (util.safehasattr(err, 'name') and
                 not isinstance(err,  error.ManifestLookupError)):
                 msg = 'revision not found: %s' % err.name
 
-            res.status = '404 Not Found'
-            res.headers['Content-Type'] = ctype
+            res.status = r'404 Not Found'
+            res.headers[r'Content-Type'] = ctype
             return rctx.sendtemplate('error', error=msg)
         except (error.RepoError, error.RevlogError) as e:
-            res.status = '500 Internal Server Error'
-            res.headers['Content-Type'] = ctype
+            res.status = r'500 Internal Server Error'
+            res.headers[r'Content-Type'] = ctype
             return rctx.sendtemplate('error', error=pycompat.bytestr(e))
         except ErrorResponse as e:
-            res.status = statusmessage(e.code, pycompat.bytestr(e))
-            res.headers['Content-Type'] = ctype
+            res.status = statusmessage(e.code, e)
+            res.headers[r'Content-Type'] = ctype
             return rctx.sendtemplate('error', error=pycompat.bytestr(e))
 
     def check_perm(self, rctx, req, op):
diff --git a/mercurial/hgweb/common.py b/mercurial/hgweb/common.py
--- a/mercurial/hgweb/common.py
+++ b/mercurial/hgweb/common.py
@@ -128,7 +128,7 @@
     return responses.get(code, ('Error', 'Unknown error'))[0]
 
 def statusmessage(code, message=None):
-    return '%d %s' % (code, message or _statusmessage(code))
+    return r'%d %s' % (code, message or _statusmessage(code))
 
 def get_stat(spath, fn):
     """stat fn if it exists, spath otherwise"""
@@ -181,7 +181,7 @@
         res.setbodybytes(data)
         return res
     except TypeError:
-        raise ErrorResponse(HTTP_SERVER_ERROR, 'illegal filename')
+        raise ErrorResponse(HTTP_SERVER_ERROR, r'illegal filename')
     except OSError as err:
         if err.errno == errno.ENOENT:
             raise ErrorResponse(HTTP_NOT_FOUND)



To: durin42, #hg-reviewers
Cc: mercurial-devel


More information about the Mercurial-devel mailing list