[PATCH] introduce auxencode repositories (issue793)

Adrian Buehlmann adrian at cadifra.com
Sat Jun 28 11:10:20 CDT 2008


# HG changeset patch
# User Adrian Buehlmann <adrian at cadifra.com>
# Date 1214654461 -7200
# Node ID e2b78b52e0a1b496238d3c18dd39ed8c76420163
# Parent  39319a457ddac02e209461eb3d0ed2145ccc0106
introduce auxencode repositories (issue793)

This change adds a new entry 'auxencode' in the .hg/requires file
for new repositories.

'auxencode' encodes repository path components that
* begin with a Windows reserved filename followed by a period, or
* are equal to a reserved filename

The encoding is done by encoding the third letter, extending the current
tilde character encoding scheme to the full range of characters ('a' -> '~61').

A new decoding function util.decodefilename_fullrange is provided, which can
decode the full range of encoded chars (~00..~ff). 'auxencode' repositories
require this function to be present and used.

Encoding examples:
* 'aux'     -> 'au~78'
* 'foo.aux' -> 'foo.aux'    # doesn't need to be encoded
* 'aux.foo' -> 'au~78.foo'
* '\0aux'   -> '~00aux'     # same as current encoding
* 'Aux'     -> '_aux'       # same as current encoding
* '~aux'    -> '~7eaux'     # same as current encoding
* 'aux/com1/nul/prn/lpt1.txt' -> 'au~78/co~6d1/nu~6c/pr~6e/lp~741.txt'
* 'lorem/ipsum/dolor.txt'     -> 'lorem/ipsum/dolor.txt'

This encoding ensures that repositories containing tracked files
with path components consisting of Windows reserved file names
can be pulled to repositories on Windows.

Note that revisions containing reserved filenames cannot be checked
out.

Older versions of Mercurial accessing an 'auxencode' repository will
abort with "abort: requirement 'auxencode' not supported!".

diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -15,7 +15,7 @@
 
 class localrepository(repo.repository):
     capabilities = util.set(('lookup', 'changegroupsubset'))
-    supported = ('revlogv1', 'store')
+    supported = ('revlogv1', 'store', 'auxencode')
 
     def __init__(self, parentui, path=None, create=0):
         repo.repository.__init__(self)
@@ -34,6 +34,7 @@
                 if parentui.configbool('format', 'usestore', True):
                     os.mkdir(os.path.join(self.path, "store"))
                     requirements.append("store")
+                    requirements.append("auxencode")
                     # create an invalid changelog
                     self.opener("00changelog.i", "a").write(
                         '\0\0\0\2' # represents revlogv2
@@ -62,8 +63,12 @@
 
         # setup store
         if "store" in requirements:
-            self.encodefn = util.encodefilename
-            self.decodefn = util.decodefilename
+            if "auxencode" in requirements:
+                self.encodefn = util.auxencode
+                self.decodefn = util.auxdecode
+            else:
+                self.encodefn = util.encodefilename
+                self.decodefn = util.decodefilename
             self.spath = os.path.join(self.path, "store")
         else:
             self.encodefn = lambda x: x
diff --git a/mercurial/statichttprepo.py b/mercurial/statichttprepo.py
--- a/mercurial/statichttprepo.py
+++ b/mercurial/statichttprepo.py
@@ -55,8 +55,12 @@
 
         # setup store
         if "store" in requirements:
-            self.encodefn = util.encodefilename
-            self.decodefn = util.decodefilename
+            if "auxencode" in requirements:
+                self.encodefn = util.auxencode
+                self.decodefn = util.auxdecode
+            else:
+                self.encodefn = util.encodefilename
+                self.decodefn = util.decodefilename
             self.spath = self.path + "/store"
         else:
             self.encodefn = lambda x: x
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -15,7 +15,7 @@
 from i18n import _
 import cStringIO, errno, getpass, re, shutil, sys, tempfile
 import os, stat, threading, time, calendar, ConfigParser, locale, glob, osutil
-import imp, urlparse
+import imp, urlparse, string
 
 # Python compatibility
 
@@ -1345,9 +1345,10 @@
         return name
     return find_in_path(name, os.environ.get('PATH', ''), default=default)
 
+_windows_reserved_chars = '\\:*?"<>|'
 def _buildencodefun():
     e = '_'
-    win_reserved = [ord(x) for x in '\\:*?"<>|']
+    win_reserved = [ord(x) for x in _windows_reserved_chars]
     cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
     for x in (range(32) + range(126, 256) + win_reserved):
         cmap[chr(x)] = "~%02x" % x
@@ -1372,6 +1373,66 @@
             lambda s: "".join(list(decode(s))))
 
 encodefilename, decodefilename = _buildencodefun()
+
+def decodefilename_fullrange(s):
+    # same as decodefilename, but allows the full range for coded chars (~00..~ff)
+    state = 'n'
+    # possible states:
+    #  n  normal
+    #  ~  encoded char
+    #  _  encoded uppercase
+    res = ''
+    for c in s:
+        if (ord(c) < 32) or (ord(c) > 126) or (c in _windows_reserved_chars):
+            raise KeyError
+        if state == 'n':      # normal
+            if c == '_':
+                state = '_'
+            elif c == '~':
+                state = '~'
+                digits = ''
+            else:
+                res += c
+        elif state == '_':    # encoded uppercase
+            if c == '_':      
+                res += '_'
+            elif c == '~' or c == '/':
+                raise KeyError
+            else:
+                res += c.upper()
+            state = 'n'
+        elif state == '~':    # ~ encoded char
+            if not c in string.hexdigits:
+                raise KeyError
+            digits += c
+            if len(digits) == 2:
+                res += chr(int(digits, 16))
+                state = 'n'
+        else:                 # unknown state
+            raise KeyError
+    if state != 'n':
+        raise KeyError
+    return res
+
+_windows_reserved_filenames = '''con prn aux nul
+    com1 com2 com3 com4 com5 com6 com7 com8 com9
+    lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
+def auxencode(path):
+    '''encodefilename + encode windows reserved filenames
+    encodes the third letter ('aux' -> 'au~78')'''
+    res = []
+    for n in encodefilename(path).split('/'):
+        if n:
+            base = n.split('.')[0]
+            if base and (base in _windows_reserved_filenames):
+                # uppercase variants are already encoded
+                # (e.g. 'Aux' -> '_aux', 'aUx' -> 'a_ux')
+                ec = "~%02x" % ord(n[2])
+                n = n[0:2] + ec + n[3:]
+        res.append(n)
+    return '/'.join(res)
+
+auxdecode = decodefilename_fullrange
 
 def encodedopener(openerfn, fn):
     def o(path, *args, **kw):
diff --git a/tests/test-auxencode.py b/tests/test-auxencode.py
new file mode 100644
--- /dev/null
+++ b/tests/test-auxencode.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+from mercurial import util
+enc = util.auxencode
+dec = util.auxdecode
+
+def encdec(s):
+    e = enc(s)
+    d = dec(e)
+    print repr(s), '->', repr(e), '->', repr(d)
+    if s != d:
+        print 'roundtrip failed for', repr(s)
+
+def decenc(s):
+    print repr(s), '->', repr(dec(s)), '->', repr(enc(dec(s)))
+
+encdec('simple')
+encdec('simple with space')
+encdec('nul char \x00')
+encdec('windows reserved \\:*?"<>|')
+encdec('cAsE Handling')
+encdec('escape sequence _e and ~ff')
+
+print
+decenc('~3a ~61')
+
+print
+encdec('aux')
+encdec('aux.')
+encdec('aux.txt')
+
+print
+print "% foo.aux needs no encoding"
+encdec('foo.aux')
+
+print
+encdec('/nul')
+encdec('/nul.')
+encdec('/nul.txt')
+encdec('/con/lpt1.a')
+encdec('/con.b/foo/prn')
+encdec('/not-a-prn.txt')
+encdec('/context/unchanged')
diff --git a/tests/test-auxencode.py.out b/tests/test-auxencode.py.out
new file mode 100644
--- /dev/null
+++ b/tests/test-auxencode.py.out
@@ -0,0 +1,23 @@
+'simple' -> 'simple' -> 'simple'
+'simple with space' -> 'simple with space' -> 'simple with space'
+'nul char \x00' -> 'nul char ~00' -> 'nul char \x00'
+'windows reserved \\:*?"<>|' -> 'windows reserved ~5c~3a~2a~3f~22~3c~3e~7c' -> 'windows reserved \\:*?"<>|'
+'cAsE Handling' -> 'c_as_e _handling' -> 'cAsE Handling'
+'escape sequence _e and ~ff' -> 'escape sequence __e and ~7eff' -> 'escape sequence _e and ~ff'
+
+'~3a ~61' -> ': a' -> '~3a a'
+
+'aux' -> 'au~78' -> 'aux'
+'aux.' -> 'au~78.' -> 'aux.'
+'aux.txt' -> 'au~78.txt' -> 'aux.txt'
+
+% foo.aux needs no encoding
+'foo.aux' -> 'foo.aux' -> 'foo.aux'
+
+'/nul' -> '/nu~6c' -> '/nul'
+'/nul.' -> '/nu~6c.' -> '/nul.'
+'/nul.txt' -> '/nu~6c.txt' -> '/nul.txt'
+'/con/lpt1.a' -> '/co~6e/lp~741.a' -> '/con/lpt1.a'
+'/con.b/foo/prn' -> '/co~6e.b/foo/pr~6e' -> '/con.b/foo/prn'
+'/not-a-prn.txt' -> '/not-a-prn.txt' -> '/not-a-prn.txt'
+'/context/unchanged' -> '/context/unchanged' -> '/context/unchanged'
diff --git a/tests/test-init.out b/tests/test-init.out
--- a/tests/test-init.out
+++ b/tests/test-init.out
@@ -3,6 +3,7 @@
 00changelog.i created
 revlogv1
 store
+auxencode
 adding foo
 # creating repo with old format
 revlogv1


More information about the Mercurial-devel mailing list