[PATCH 1 of 1 STABLE V2] store: add auto detection for hardlink count blindness (issue1866)

Adrian Buehlmann adrian at cadifra.com
Sun Oct 31 08:34:14 CDT 2010


# HG changeset patch
# User Adrian Buehlmann <adrian at cadifra.com>
# Date 1288531120 -3600
# Branch stable
# Node ID a3b8f1b7d3962a15c40ffc9d3894dfe697135b37
# Parent  15ca4bfecfe343cbf53210b19c081676b2a35d3f
store: add auto detection for hardlink count blindness (issue1866)

The Linux CIFS kernel driver (even in 2.6.36) suffers from a hardlink
count blindness bug (lstat() returning 1 in st_nlink when it is expected
to return >1), which causes repository corruption if Mercurial running
on Linux pushes or commits to a hardlinked repository stored on a windows
share, if that share is mounted using the CIFS driver.

This patch fixes issue1866 and improves the workaround done in
50523b4407f6 to fix issue761, by teaching the store to tell its opener
to lazily execute a runtime test for hardlink count blindness in the
opener's base directory.

Hardlink count blindness means: util.nlinks() returns a count less than
two (file not hardlinked) when it is expected to return two or more
(file hardlinked).

If the test (new function 'testnlinks') reveals that hardlinks can't be
detected, then the store's opener will do a unconditional full file copy
before every single write to a file, making sure any potential preexisting
hardlinks are broken up, so that repository corruption is prevented when
pushing or committing to hardlinked clones located on Windows shares.

The test (function 'testnlinks') is done by creating a hardlinked pair of
testfiles in the base directory of the opener and checking if a hardlink
count of 2 can be seen on them. The test also verifies that the testfiles
were really hardlinked by writing a pattern into one of the testfiles.
If the same pattern can be read back from the other testfile, then the
file pair is proven to be hardlinked (the exact effect that must be
prevented when pushing or committing to hardlinked clones).

diff --git a/mercurial/store.py b/mercurial/store.py
--- a/mercurial/store.py
+++ b/mercurial/store.py
@@ -175,6 +175,7 @@ class basicstore(object):
         self.createmode = _calcmode(path)
         op = opener(self.path)
         op.createmode = self.createmode
+        op.linktest = True
         self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
 
     def join(self, f):
@@ -220,6 +221,7 @@ class encodedstore(basicstore):
         self.createmode = _calcmode(self.path)
         op = opener(self.path)
         op.createmode = self.createmode
+        op.linktest = True
         self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
 
     def datafiles(self):
@@ -291,6 +293,7 @@ class fncachestore(basicstore):
         self.createmode = _calcmode(self.path)
         op = opener(self.path)
         op.createmode = self.createmode
+        op.linktest = True
         fnc = fncache(op)
         self.fncache = fnc
 
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -827,6 +827,44 @@ def makedirs(name, mode=None):
     makedirs(parent, mode)
     makedirs(name, mode)
 
+def testnlinks(base):
+    '''return True if hardlink detection in base works ok'''
+
+    # create a hardlinked pair of testfiles in base
+    f = os.path.join(base, 'hg-linktest-')
+    fa = f + 'a'
+    fb = f + 'b'
+    try:
+        posixfile(fa, 'w').close()
+        try:
+            os_link(fa, fb)
+        except (IOError, OSError):
+            return True # file system doesn't support creating hardlinks
+
+        # examine the testfile pair
+        if nlinks(fa) != 2 or nlinks(fb) != 2:
+            # write a testpattern to fa
+            testpattern = "53fb244cc540 " + base
+            fp = posixfile(fa, 'w')
+            fp.write(testpattern)
+            fp.close()
+            # if we can read back the pattern from fb, then the files
+            # are hardlinked, but we can't detect hardlinks (issue761)
+            fp = posixfile(fb, 'r')
+            blind = fp.read() == testpattern
+        else:
+            # examine pair again, but this time with an open file (issue1866)
+            fp = posixfile(fa, 'r')
+            blind = nlinks(fa) != 2 or nlinks(fb) != 2
+        fp.close()
+        return not blind
+    finally:
+        for f in (fa, fb):
+            try:
+                os.unlink(f)
+            except OSError:
+                pass
+
 class opener(object):
     """Open files relative to a base directory
 
@@ -840,6 +878,8 @@ class opener(object):
         else:
             self.auditor = always
         self.createmode = None
+        self.linktest = False
+        self._forcecopy = False
 
     @propertycache
     def _can_symlink(self):
@@ -859,6 +899,9 @@ class opener(object):
 
         nlink = -1
         if mode not in ("r", "rb"):
+            if self.linktest:
+                self.linktest = False
+                self._forcecopy = not testnlinks(self.base)
             try:
                 nlink = nlinks(f)
             except OSError:
@@ -870,7 +913,7 @@ class opener(object):
                     makedirs(dirname, self.createmode)
             if atomictemp:
                 return atomictempfile(f, mode, self.createmode)
-            if nlink > 1:
+            if nlink > 1 or self._forcecopy:
                 rename(mktempcopy(f), f)
         fp = posixfile(f, mode)
         if nlink == 0:
diff --git a/mercurial/win32.py b/mercurial/win32.py
--- a/mercurial/win32.py
+++ b/mercurial/win32.py
@@ -43,17 +43,7 @@ def _getfileinfo(pathname):
 
 def nlinks(pathname):
     """Return number of hardlinks for the given file."""
-    links = _getfileinfo(pathname)[7]
-    if links < 2:
-        # Known to be wrong for most network drives
-        dirname = os.path.dirname(pathname)
-        if not dirname:
-            dirname = '.'
-        dt = win32file.GetDriveType(dirname + '\\')
-        if dt == 4 or dt == 1:
-            # Fake hardlink to force COW for network drives
-            links = 2
-    return links
+    return _getfileinfo(pathname)[7]
 
 def samefile(fpath1, fpath2):
     """Returns whether fpath1 and fpath2 refer to the same file. This is only


More information about the Mercurial-devel mailing list