[PATCH v2] store: rewrite fncache path mangling code in C

Adrian Buehlmann adrian at cadifra.com
Wed Sep 5 12:41:42 CDT 2012


On 2012-09-01 00:35, Adrian Buehlmann wrote:
> On 2012-08-28 19:43, Bryan O'Sullivan wrote:
>> # HG changeset patch
>> # User Bryan O'Sullivan <bryano at fb.com>
>> # Date 1346175814 25200
>> # Node ID 91f70954e9d681a35130aa24f66aaa7148d8ee1b
>> # Parent  99a2a4ae35e2180b7f825ef2677c36d538eac4ba
>> store: rewrite fncache path mangling code in C
>>
>> The Python path mangling code used by fncache
> 
> Applying:
> 
> 
> diff --git a/tests/test-hybridencode.py b/tests/test-hybridencode.py
> --- a/tests/test-hybridencode.py
> +++ b/tests/test-hybridencode.py
> @@ -1,9 +1,10 @@
> -from mercurial import store
> +from mercurial import store, parsers
>  
> -auxencode = lambda f: store._auxencode(f, True)
> -hybridencode = lambda f: store._hybridencode(f, auxencode)
> +enc = getattr(parsers, 'pathencode', False)
>  
> -enc = hybridencode # used for 'dotencode' repo format
> +if not enc:
> +    auxencode = lambda f: store._auxencode(f, True)
> +    enc = lambda f: store._hybridencode(f, auxencode)
>  
>  def show(s):
>      print "A = '%s'" % s.encode("string_escape")

with e5422a9ffe9d it now fails with:

--- C:\Users\adi\hgrepos\hg-main\tests\test-hybridencode.py.out
+++ C:\Users\adi\hgrepos\hg-main\tests\test-hybridencode.py.err
@@ -19,10 +19,10 @@

 characters in ASCII code range 126..255
 A = 'data/~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f'
-B = 'data/~7e~7f~80~81~82~83~84~85~86~87~88~89~8a~8b~8c~8d~8e~8f~90~91~92~93~94~95~96~97~98~99~9a~9b~9c~9d~9e~9f'
+B = 'data/~7e~7f~80~81~82_\xa3_\xa4_\xa5~86~87_\xa8~89~8a_\xab_\xac_\xad~8e~8f_\xb0~91~92~93~94_\xb5_\xb6~97~98_\xb9~9a~9b~9c_\xbd_\xbe~9f'

 A = 'data/\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf'
-B = 'data/~a0~a1~a2~a3~a4~a5~a6~a7~a8~a9~aa~ab~ac~ad~ae~af~b0~b1~b2~b3~b4~b5~b6~b7~b8~b9~ba~bb~bc~bd~be~bf'
+B = 'data/_\xc0_\xc1~a2~a3~a4_\xc5_\xc6~a7~a8~a9_\xca~ab~ac_\xcd_\xce~af_\xd0~b1_\xd2~b3~b4_\xd5_\xd6~b7~b8_\xd9_\xda~bb~bc_\xdd_\xde~bf'

 A = 'data/\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf'
 B = 'data/~c0~c1~c2~c3~c4~c5~c6~c7~c8~c9~ca~cb~cc~cd~ce~cf~d0~d1~d2~d3~d4~d5~d6~d7~d8~d9~da~db~dc~dd~de~df'
@@ -50,7 +50,7 @@

 plain .hg, .i and .d directories have the leading dot encoded
 A = 'data/.hg/.i/.d/foo'
-B = 'data/~2ehg.hg/~2ei.hg/~2ed.hg/foo'
+B = 'data/~2ehg/~2ei/~2ed/foo'

 A = 'data/aux.bla/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c.i'
 B = 'data/au~78.bla/bla.aux/pr~6e/_p_r_n/lpt/co~6d3/nu~6c/coma/foo._n_u_l/normal.c.i'
@@ -65,10 +65,10 @@
 B = 'dh/au~78.the-quick-brown-fox-ju~3amps-over-the-lazy-dog-the-quick-brown-fox-jud4dcadd033000ab2b26eb66bae1906bcb15d4a70.i'

 A = 'data/Project Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'
-B = 'dh/project_/resource/anotherl/followed/andanoth/andthenanextremelylongfilenaf93030515d9849cfdca52937c2204d19f83913e5.txt'
+B = 'dh/project /resource/anotherl/followed/andanoth/andthenanextremelylongfilenaf93030515d9849cfdca52937c2204d19f83913e5.txt'

 A = 'data/Project.Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'
-B = 'dh/project_/resource/anotherl/followed/andanoth/andthenanextremelylongfilena0fd7c506f5c9d58204444fc67e9499006bd2d445.txt'
+B = 'dh/project./resource/anotherl/followed/andanoth/andthenanextremelylongfilena0fd7c506f5c9d58204444fc67e9499006bd2d445.txt'

 A = 'data/foo.../foo   / /a./_. /__/.x../    bla/.FOO/something.i'
 B = 'data/foo..~2e/foo  ~20/~20/a~2e/__.~20/____/~2ex.~2e/~20   bla/~2e_f_o_o/something.i'
@@ -133,18 +133,18 @@

 shortest hashed path
 A = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-123456'
-B = 'dh/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxe9c55002b50bf5181e7a6fc1f60b126e2a6fcf71'
+B = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-123456'

 changing one char in part that's hashed away produces a different hash
 A = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxy-123456789-123456'
-B = 'dh/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxd24fa4455faf8a94350c18e5eace7c2bb17af706'
+B = 'data/123456789-123456789-123456789-123456789-123456789-hashed----xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxy-123456789-123456'

 uppercase hitting length limit due to encoding
 A = 'data/A23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/a23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxcbbc657029b41b94ed510d05feb6716a5c03bc6b'
+B = 'dh/cbbc657029b41b94ed510d05feb6716a5c03bc6b'

 A = 'data/Z23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'

+B = 'dh/938f32a725c89512833fb96b6602dd9ebff51ddd'

 compare with lowercase not hitting limit
 A = 'data/a23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
@@ -159,103 +159,103 @@

 underbar hitting length limit due to encoding
 A = 'data/_23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/_23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxx9921a01af50feeabc060ce00eee4cba6efc31d2b'
+B = 'dh/9921a01af50feeabc060ce00eee4cba6efc31d2b'

 tilde hitting length limit due to encoding
 A = 'data/~23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~7e23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx9cec6f97d569c10995f785720044ea2e4227481b'
+B = 'dh/9cec6f97d569c10995f785720044ea2e4227481b'

 Windows reserved characters hitting length limit
 A = 'data/<23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~3c23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxee67d8f275876ca1ef2500fc542e63c885c4e62d'
+B = 'dh/ee67d8f275876ca1ef2500fc542e63c885c4e62d'

 A = 'data/>23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~3e23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx387a85a5b1547cc9136310c974df716818458ddb'
+B = 'dh/387a85a5b1547cc9136310c974df716818458ddb'

 A = 'data/:23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~3a23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx2e4154fb571d13d22399c58cc4ef4858e4b75999'
+B = 'dh/2e4154fb571d13d22399c58cc4ef4858e4b75999'

 A = 'data/"23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~2223456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxfc7e3ec7b0687ee06ed8c32fef0eb0c1980259f5'
+B = 'dh/fc7e3ec7b0687ee06ed8c32fef0eb0c1980259f5'

 A = 'data/\\23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~5c23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx944e1f2b7110687e116e0d151328ac648b06ab4a'
+B = 'dh/944e1f2b7110687e116e0d151328ac648b06ab4a'

 A = 'data/|23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~7c23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx28b23dd3fd0242946334126ab62bcd772aac32f4'
+B = 'dh/28b23dd3fd0242946334126ab62bcd772aac32f4'

 A = 'data/?23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~3f23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxa263022d3994d2143d98f94f431eef8b5e7e0f8a'
+B = 'dh/a263022d3994d2143d98f94f431eef8b5e7e0f8a'

 A = 'data/*23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~2a23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx0e7e6020e3c00ba7bb7893d84ca2966fbf53e140'
+B = 'dh/0e7e6020e3c00ba7bb7893d84ca2966fbf53e140'

 initial space hitting length limit
 A = 'data/ 23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~2023456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx92acbc78ef8c0b796111629a02601f07d8aec4ea'
+B = 'dh/92acbc78ef8c0b796111629a02601f07d8aec4ea'

 initial dot hitting length limit
 A = 'data/.23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~2e23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxdbe19cc6505b3515ab9228cebf877ad07075168f'
+B = 'dh/dbe19cc6505b3515ab9228cebf877ad07075168f'

 trailing space in filename hitting length limit
 A = 'data/123456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-1234 '
-B = 'dh/123456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxx0025dc73e04f97426db4893e3bf67d581dc6d066'
+B = 'dh/0025dc73e04f97426db4893e3bf67d581dc6d066'

 trailing dot in filename hitting length limit
 A = 'data/123456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-1234.'
-B = 'dh/123456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxx85a16cf03ee7feba8a5abc626f1ba9886d01e89d'
+B = 'dh/85a16cf03ee7feba8a5abc626f1ba9886d01e89d'

 initial space in directory hitting length limit
 A = 'data/ x/456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~20x/456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx1b3a3b712b2ac00d6af14ae8b4c14fdbf904f516'
+B = 'dh/~20x/1b3a3b712b2ac00d6af14ae8b4c14fdbf904f516'

 initial dot in directory hitting length limit
 A = 'data/.x/456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/~2ex/456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx39dbc4c193a5643a8936fc69c3363cd7ac91ab14'
+B = 'dh/~2ex/39dbc4c193a5643a8936fc69c3363cd7ac91ab14'

 trailing space in directory hitting length limit
 A = 'data/x /456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/x~20/456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx2253c341df0b5290790ad312cd8499850f2273e5'
+B = 'dh/x~20/2253c341df0b5290790ad312cd8499850f2273e5'

 trailing dot in directory hitting length limit
 A = 'data/x./456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/x~2e/456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxcc0324d696d34562b44b5138db08ee1594ccc583'
+B = 'dh/x~2e/cc0324d696d34562b44b5138db08ee1594ccc583'

 with directories that need direncoding, hitting length limit
 A = 'data/x.i/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/x.i.hg/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxa4c4399bdf81c67dbbbb7060aa0124d8dea94f74'
+B = 'dh/x.i.hg/eb36e11a2dd25d73a4dfc3eac6b85adbd1a23eec'

 A = 'data/x.d/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/x.d.hg/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxx1303fa90473b230615f5b3ea7b660e881ae5270a'
+B = 'dh/x.d.hg/532463c397fd825590d4c068807c16a15b52acc7'

 A = 'data/x.hg/5789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/x.hg.hg/5789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxx26d724a8af68e7a4e4455e6602ea9adbd0eb801f'
+B = 'dh/x.hg.hg/26d724a8af68e7a4e4455e6602ea9adbd0eb801f'

 Windows reserved filenames, hitting length limit
 A = 'data/con/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/co~6e/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxc0794d4f4c605a2617900eb2563d7113cf6ea7d3'
+B = 'dh/co~6e/c0794d4f4c605a2617900eb2563d7113cf6ea7d3'

 A = 'data/prn/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/pr~6e/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx64db876e1a9730e27236cb9b167aff942240e932'
+B = 'dh/pr~6e/64db876e1a9730e27236cb9b167aff942240e932'

 A = 'data/aux/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/au~78/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx8a178558405ca6fb4bbd75446dfa186f06751a0d'
+B = 'dh/au~78/8a178558405ca6fb4bbd75446dfa186f06751a0d'

 A = 'data/nul/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/nu~6c/56789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxc5e51b6fec1bd07bd243b053a0c3f7209855b886'
+B = 'dh/nu~6c/c5e51b6fec1bd07bd243b053a0c3f7209855b886'

 A = 'data/com1/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/co~6d1/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx32f5f44ece3bb62b9327369ca84cc19c86259fcd'
+B = 'dh/co~6d1/32f5f44ece3bb62b9327369ca84cc19c86259fcd'

 A = 'data/com9/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/co~6d9/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxx734360b28c66a3230f55849fe8926206d229f990'
+B = 'dh/co~6d9/734360b28c66a3230f55849fe8926206d229f990'

 A = 'data/lpt1/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/lp~741/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxe6f16ab4b6b0637676b2842b3345c9836df46ef7'
+B = 'dh/lp~741/e6f16ab4b6b0637676b2842b3345c9836df46ef7'

 A = 'data/lpt9/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
-B = 'dh/lp~749/6789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxa475814c51acead3e44f2ff801f0c4903f986157'
+B = 'dh/lp~749/a475814c51acead3e44f2ff801f0c4903f986157'

 non-reserved names, just not hitting limit
 A = 'data/123456789-123456789-123456789-123456789-123456789-/com/com0/lpt/lpt0/-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'



More information about the Mercurial-devel mailing list