[PATCH v2] store: rewrite fncache path mangling code in C

Adrian Buehlmann adrian at cadifra.com
Fri Aug 31 17:35:40 CDT 2012


On 2012-08-28 19:43, Bryan O'Sullivan wrote:
> # HG changeset patch
> # User Bryan O'Sullivan <bryano at fb.com>
> # Date 1346175814 25200
> # Node ID 91f70954e9d681a35130aa24f66aaa7148d8ee1b
> # Parent  99a2a4ae35e2180b7f825ef2677c36d538eac4ba
> store: rewrite fncache path mangling code in C
> 
> The Python path mangling code used by fncache

Applying:


diff --git a/tests/test-hybridencode.py b/tests/test-hybridencode.py
--- a/tests/test-hybridencode.py
+++ b/tests/test-hybridencode.py
@@ -1,9 +1,10 @@
-from mercurial import store
+from mercurial import store, parsers
 
-auxencode = lambda f: store._auxencode(f, True)
-hybridencode = lambda f: store._hybridencode(f, auxencode)
+enc = getattr(parsers, 'pathencode', False)
 
-enc = hybridencode # used for 'dotencode' repo format
+if not enc:
+    auxencode = lambda f: store._auxencode(f, True)
+    enc = lambda f: store._hybridencode(f, auxencode)
 
 def show(s):
     print "A = '%s'" % s.encode("string_escape")



currently already reveals the following deviations (please ignore the \x00 testcase,
I'll remove it):



--- C:\Users\adi\hgrepos\hg-main\tests\test-hybridencode.py.out
+++ C:\Users\adi\hgrepos\hg-main\tests\test-hybridencode.py.err
@@ -15,11 +15,11 @@

 characters in ASCII code range 0..31
 A = 'data/\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
-B = 'data/~00~01~02~03~04~05~06~07~08~09~0a~0b~0c~0d~0e~0f~10~11~12~13~14~15~16~17~18~19~1a~1b~1c~1d~1e~1f'
+B = 'data/\x00~01~02~03~04~05~06~07~08~09~0a~0b~0c~0d~0e~0f~10~11~12~13~14~15~16~17~18~19~1a~1b~1c~1d~1e~1f'

 characters in ASCII code range 126..255 (only partially tested)
 A = 'data/~ \x7f \x80 \x81 \x82 \x83 .. \xfd \xfe \xff'
-B = 'data/~7e ~7f ~80 ~81 ~82 ~83 .. ~fd ~fe ~ff'
+B = 'data/~7e ~7f ~80 ~81 ~82 _\xa3 .. ~fd ~fe ~ff'

 Windows reserved characters
 A = 'data/less <, greater >, colon :, double-quote ", backslash \\, pipe |, question-mark ?, asterisk *'
@@ -41,7 +41,7 @@

 plain .hg, .i and .d directories have the leading dot encoded
 A = 'data/.hg/.i/.d/foo'
-B = 'data/~2ehg.hg/~2ei.hg/~2ed.hg/foo'
+B = 'data/~2ehg/~2ei/~2ed/foo'

 A = 'data/aux.bla/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c.i'
 B = 'data/au~78.bla/bla.aux/pr~6e/_p_r_n/lpt/co~6d3/nu~6c/coma/foo._n_u_l/normal.c.i'
@@ -56,10 +56,10 @@
 B = 'dh/au~78.the-quick-brown-fox-ju~3amps-over-the-lazy-dog-the-quick-brown-fox-jud4dcadd033000ab2b26eb66bae1906bcb15d4a70.i'

 A = 'data/Project Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'
-B = 'dh/project_/resource/anotherl/followed/andanoth/andthenanextremelylongfilenaf93030515d9849cfdca52937c2204d19f83913e5.txt'
+B = 'dh/project /resource/anotherl/followed/andanoth/andthenanextremelylongfilenaf93030515d9849cfdca52937c2204d19f83913e5.txt'

 A = 'data/Project.Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'
-B = 'dh/project_/resource/anotherl/followed/andanoth/andthenanextremelylongfilena0fd7c506f5c9d58204444fc67e9499006bd2d445.txt'
+B = 'dh/project./resource/anotherl/followed/andanoth/andthenanextremelylongfilena0fd7c506f5c9d58204444fc67e9499006bd2d445.txt'

 A = 'data/foo.../foo   / /a./_. /__/.x../    bla/.FOO/something.i'
 B = 'data/foo..~2e/foo  ~20/~20/a~2e/__.~20/____/~2ex.~2e/~20   bla/~2e_f_o_o/something.i'
@@ -124,9 +124,9 @@

 shortest hashed path
 A = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-123456'
-B = 'dh/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxe9c55002b50bf5181e7a6fc1f60b126e2a6fcf71'
+B = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-123456'

 changing one char in part that's hashed away produces a different hash
 A = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxy-123456789-123456'
-B = 'dh/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxd24fa4455faf8a94350c18e5eace7c2bb17af706'
+B = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxy-123456789-123456'



More information about the Mercurial-devel mailing list