[PATCH alternative-version] pathencode: for long paths, strip first 5 chars, not first dir

Martin von Zweigbergk martinvonz at google.com
Fri May 8 19:59:05 UTC 2015


# HG changeset patch
# User Martin von Zweigbergk <martinvonz at google.com>
# Date 1430953094 25200
#      Wed May 06 15:58:14 2015 -0700
# Node ID baa1fb9f49617b0d554c6c47af9838922baf9a35
# Parent  8179af513aebf96c4902ba3e5e3cf710d49501e4
pathencode: for long paths, strip first 5 chars, not first dir

When encoding long paths, the pure Python code strips the first
directory from the path, while the native code currently strips the
first 5 characters. This discrepancy has not been a problem so far,
since we have not stored anything in directories other than
data/. However, we will soon be storing submanifest revlogs in
metadata/, so the discrepancy will have to go [1]. Since file
collisions are avoided by the hashing alone (which is done on the full
unencoded path), it doesn't really matter whether we drop the first
dir, the first 5 characters, or special-case non-data/. To avoid
touching the C code, let's always strip the first 5 characters.

 [1] Or maybe elsewhere, but the discrepancy is ugly either way.

diff -r 8179af513aeb -r baa1fb9f4961 mercurial/store.py
--- a/mercurial/store.py	Thu May 07 16:43:58 2015 -0700
+++ b/mercurial/store.py	Wed May 06 15:58:14 2015 -0700
@@ -187,7 +187,7 @@
 
 def _hashencode(path, dotencode):
     digest = _sha(path).hexdigest()
-    le = lowerencode(path).split('/')[1:]
+    le = lowerencode(path[5:]).split('/')
     parts = _auxencode(le, dotencode)
     basename = parts[-1]
     _root, ext = os.path.splitext(basename)
diff -r 8179af513aeb -r baa1fb9f4961 tests/test-hybridencode.py
--- a/tests/test-hybridencode.py	Thu May 07 16:43:58 2015 -0700
+++ b/tests/test-hybridencode.py	Wed May 06 15:58:14 2015 -0700
@@ -460,3 +460,9 @@
           'VWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxx'
           'xxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwww'
           'wwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i')
+
+print "paths outside data/ can be encoded"
+show('metadata/dir/00manifest.i')
+show('metadata/12345678/12345678/12345678/12345678/12345678/'
+          '12345678/12345678/12345678/12345678/12345678/12345678/'
+          '12345678/12345678/00manifest.i')
diff -r 8179af513aeb -r baa1fb9f4961 tests/test-hybridencode.py.out
--- a/tests/test-hybridencode.py.out	Thu May 07 16:43:58 2015 -0700
+++ b/tests/test-hybridencode.py.out	Wed May 06 15:58:14 2015 -0700
@@ -491,3 +491,10 @@
 A = 'data/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12.3456789-12345-ABCDEFGHIJKLMNOPRSTUVWXYZ-abcdefghjiklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPRSTUVWXYZ-1234567890-xxxxxxxxx-xxxxxxxxx-xxxxxxxx-xxxxxxxxx-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww-wwwwwwwww.i'
 B = 'dh/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/-xxxxx93352aa50377751d9e5ebdf52da1e6e69a6887a6.i'
 
+paths outside data/ can be encoded
+A = 'metadata/dir/00manifest.i'
+B = 'metadata/dir/00manifest.i'
+
+A = 'metadata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manifest.i'
+B = 'dh/ata/12345678/12345678/12345678/12345678/12345678/12345678/12345678/00manife0a4da1f89aa2aa9eb0896eb451288419049781b4.i'
+


More information about the Mercurial-devel mailing list