[PATCH] Clone via hardlinks for Windows

Stephen Darnell sdarnell at esmertec.com
Sun Aug 28 07:58:09 CDT 2005


Attached is an updated patch for cloning via hardlinks on Windows.

I also ran the test suite on debian and it fails the same as the
current latest (unrelated to my changes).
- test-copy2.err (time diffs?)
- test-filebranch.err (whitespace change)

Unix should be unaffected, although if you ran HG on a filesystem
that didn't support hardlinks it would fall back to doing a copy.
That is, at the first link failure, it drops back to a copy.

On win32 is doesn't seem to be possible to tell whether link will
be supported without trying it.

Performance on trivial depots isn't much different (the update
is a large portion of the time anyway).  On larger depots, the
differences is much more noticeable.
Cloning Xen (5000 files) with -U dropped from about 95s to 15s.

Regards,
  Stephen




-------------- next part --------------
# HG changeset patch
# User Stephen Darnell
# Node ID c5cc6e9ac216ec6139d1d28ee675d04368d80cee
# Parent  a33a7a543803c7383a6918b19797bf9fd6b83e8e
Add support for cloning with hardlinks on windows.

In order to use hardlinks, the win32file module is needed, and this is
present in ActivePython.  If it isn't present, or hardlinks are not supported
on the underlying filesystem, a regular copy is used.

When using hardlinks the biggest benefit is probably the saving in space,
but cloning can be much quicker.  For example cloning the Xen tree 
(non trivial) without an update goes from about 95s to 15s.

Unix-like platforms should be unaffected, although should be more tolerant on
filesystems that don't support hard links.

diff -r a33a7a543803 -r c5cc6e9ac216 mercurial/commands.py
--- a/mercurial/commands.py	Sun Aug 28 06:45:27 2005
+++ b/mercurial/commands.py	Sun Aug 28 12:48:01 2005
@@ -572,15 +572,12 @@
 
     if other.dev() != -1:
         abspath = os.path.abspath(source)
-        copyfile = (os.stat(dest).st_dev == other.dev()
-                    and getattr(os, 'link', None) or shutil.copy2)
-        if copyfile is not shutil.copy2:
-            ui.note("cloning by hardlink\n")
         # we use a lock here because because we're not nicely ordered
         l = lock.lock(os.path.join(source, ".hg", "lock"))
 
-        util.copytree(os.path.join(source, ".hg"), os.path.join(dest, ".hg"),
-                      copyfile)
+        if util.copytree(os.path.join(source, ".hg"),
+                         os.path.join(dest, ".hg")):
+            ui.note("cloned with hardlinks\n")
 
         for fn in "dirstate", "lock":
             try:
diff -r a33a7a543803 -r c5cc6e9ac216 mercurial/util.py
--- a/mercurial/util.py	Sun Aug 28 06:45:27 2005
+++ b/mercurial/util.py	Sun Aug 28 12:48:01 2005
@@ -12,7 +12,7 @@
 
 import os, errno
 from demandload import *
-demandload(globals(), "re")
+demandload(globals(), "re shutil")
 
 def binary(s):
     """return true if a string is binary data using diff's heuristic"""
@@ -217,20 +217,29 @@
         os.unlink(dst)
         os.rename(src, dst)
 
-def copytree(src, dst, copyfile):
-    """Copy a directory tree, files are copied using 'copyfile'."""
-    names = os.listdir(src)
+def copytree(src, dst, hardlink=None):
+    """Copy a directory tree using hardlinks if possible."""
     os.mkdir(dst)
-
-    for name in names:
+    if hardlink is None:
+        hardlink = (os.stat(src).st_dev == os.stat(dst).st_dev)
+
+    for name in os.listdir(src):
         srcname = os.path.join(src, name)
         dstname = os.path.join(dst, name)
         if os.path.isdir(srcname):
-            copytree(srcname, dstname, copyfile)
+            hardlink = copytree(srcname, dstname, hardlink)
         elif os.path.isfile(srcname):
-            copyfile(srcname, dstname)
+            if hardlink:
+                try:
+                    os_link(srcname, dstname)
+                except:
+                    hardlink = False
+                    shutil.copy2(srcname, dstname)
+            else:
+                shutil.copy2(srcname, dstname)
         else:
-            pass
+            pass # silently ignore symlinks etc. (e.g. lock file)
+    return hardlink
 
 def opener(base):
     """
@@ -247,13 +256,13 @@
 
         if mode[0] != "r":
             try:
-                s = os.stat(f)
+                nlink = nlinks(f)
             except OSError:
                 d = os.path.dirname(f)
                 if not os.path.isdir(d):
                     os.makedirs(d)
             else:
-                if s.st_nlink > 1:
+                if nlink > 1:
                     file(f + ".tmp", "wb").write(file(f, "rb").read())
                     rename(f+".tmp", f)
 
@@ -269,9 +278,40 @@
 def _readlock_file(pathname):
     return file(pathname).read()
 
+def nlinks(pathname):
+    """Return number of hardlinks for the given file."""
+    return os.stat(pathname).st_nlink
+
+if hasattr(os, 'link'):
+    os_link = os.link
+else:
+    def os_link(src, dst):
+        raise OSError(0, "Hardlinks not supported")
+
 # Platform specific variants
 if os.name == 'nt':
     nulldev = 'NUL:'
+
+    try: # ActivePython can create hard links using win32file module
+        import win32file
+
+        def os_link(src, dst): # NB will only succeed on NTFS
+            win32file.CreateHardLink(dst, src)
+
+        def nlinks(pathname):
+            """Return number of hardlinks for the given file."""
+            try:
+                fh = win32file.CreateFile(pathname,
+                    win32file.GENERIC_READ, win32file.FILE_SHARE_READ,
+                    None, win32file.OPEN_EXISTING, 0, None)
+                res = win32file.GetFileInformationByHandle(fh)
+                fh.Close()
+                return res[7]
+            except:
+                return os.stat(pathname).st_nlink
+
+    except ImportError:
+        pass
 
     def is_exec(f, last):
         return last


More information about the Mercurial mailing list