[PATCH] url: be stricter about detecting schemes

Brodie Rao brodie at bitheap.org
Thu Mar 31 19:38:06 CDT 2011


# HG changeset patch
# User Brodie Rao <brodie at bitheap.org>
# Date 1301618253 25200
# Node ID 5b53df7ac5338c863aa3f898b58be964d15e62c7
# Parent  d69c9510d648321a30af673cbb113972e74fa284
url: be stricter about detecting schemes

While the URL parser is very forgiving about what characters are
allowed in each component, it's useful to be strict about the scheme
so we don't accidentally interpret local paths with colons as URLs.

This restricts schemes to containing alphanumeric characters, dashes,
pluses, and dots (as specified in RFC 2396).

diff --git a/mercurial/url.py b/mercurial/url.py
--- a/mercurial/url.py
+++ b/mercurial/url.py
@@ -7,7 +7,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import urllib, urllib2, httplib, os, socket, cStringIO
+import urllib, urllib2, httplib, os, socket, cStringIO, re
 import __builtin__
 from i18n import _
 import keepalive, util
@@ -64,6 +64,7 @@ class url(object):
 
     _safechars = "!~*'()+"
     _safepchars = "/!~*'()+"
+    _matchscheme = re.compile(r'^[a-zA-Z0-9+.\-]+:').match
 
     def __init__(self, path, parsequery=True, parsefragment=True):
         # We slowly chomp away at path until we have only the path left
@@ -88,7 +89,7 @@ class url(object):
             self.path = path
             return
 
-        if not path.startswith('/') and ':' in path:
+        if self._matchscheme(path):
             parts = path.split(':', 1)
             if parts[0]:
                 self.scheme, path = parts
diff --git a/tests/test-url.py b/tests/test-url.py
--- a/tests/test-url.py
+++ b/tests/test-url.py
@@ -157,6 +157,12 @@ def test_url():
     <url path: 'a/b/c/d.g.f'>
     >>> url('/x///z/y/')
     <url path: '/x///z/y/'>
+    >>> url('/foo:bar')
+    <url path: '/foo:bar'>
+    >>> url('\\\\foo:bar')
+    <url path: '\\\\foo:bar'>
+    >>> url('./foo:bar')
+    <url path: './foo:bar'>
 
     Non-localhost file URL:
 


More information about the Mercurial-devel mailing list