[PATCH 6 of 6] largefiles: PoC: replace largefile copies in working directory with symlinks
Mads Kiilerich
mads at kiilerich.com
Fri Oct 7 19:26:10 EDT 2016
# HG changeset patch
# User Mads Kiilerich <madski at unity3d.com>
# Date 1475882684 -7200
# Sat Oct 08 01:24:44 2016 +0200
# Node ID 36e7e42f266e5cc70535479e50445395402c7400
# Parent 2c25dc4a5a556abbebe09fe3c4eb0ff4c8fa0cd4
largefiles: PoC: replace largefile copies in working directory with symlinks
Updating between distant revisions can be expensive when working on multiple
divergent branches. The basic working directory update might be efficient but
copying the largefiles and hashing them will take a lot of bandwidth and thus
take time.
Usually, largefiles are copied from the store to the working directory to make
sure that any in-place modification of largefiles in the working directory
doesn't corrupt the store.
In a build farm, there will often be a lot of switching between branches, and
the risk of someone doing in-place modification of largefiles is quite small.
It might thus make sense to make a different trade-off.
The idea here is to make a symlink pointing at the storage file instead of
copying it. In some cases, this can make working copy updates NaN times faster.
To reduce the risk of corruption, storage files are made read-only.
To make sure that time-stamp based build systems manage a symlink change
correctly, storage files are touched when they are linked to.
This should also work on Windows IF running as admin or ordinary users have
been authorized to make symlinks (which probably in general not is a good
idea).
This is a proof of concept and doesn't contain or pass any tests. It should
perhaps turn into an extension.
diff --git a/hgext/largefiles/lfutil.py b/hgext/largefiles/lfutil.py
--- a/hgext/largefiles/lfutil.py
+++ b/hgext/largefiles/lfutil.py
@@ -10,6 +10,7 @@
from __future__ import absolute_import
import copy
+import errno
import hashlib
import os
import platform
@@ -32,6 +33,20 @@ shortnameslash = shortname + '/'
longname = 'largefiles'
filechunkitersize = 128 * 1024
+if os.name == 'nt':
+ import ctypes
+ FILESYMLINK = 0
+ _kernel32 = ctypes.windll.kernel32
+ def symlink(src, dst):
+ """Create a symbolic link pointing to src named dst.
+ Requires running as Admin ... or enabling in:
+ Computer configuration > Windows Settings > Security Settings >
+ Local Policies > User Rights Assignment > Create symbolic links
+ """
+ _kernel32.CreateSymbolicLinkA(dst, os.path.normpath(src), FILESYMLINK)
+else:
+ symlink = os.symlink
+
# -- Private worker functions ------------------------------------------
def getminsize(ui, assumelfiles, opt, default=10):
@@ -227,18 +242,26 @@ def copyfromcache(repo, hash, filename):
path = findfile(repo, hash)
if path is None:
return False
+ # make storage file read-only before pointing at it - reduce risk of
+ # corruption
+ try:
+ os.chmod(path, os.stat(path).st_mode &
+ ~(stat.S_IWUSR|stat.S_IWGRP|stat.S_IWOTH))
+ except OSError as e:
+ if e.errno != errno.EPERM:
+ raise
+ # no chmod access probably means that it is safe (but touch will fail)
+ repo.ui.debug("can't chmod %s\n" % path)
wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
- # The write may fail before the file is fully written, but we
- # don't use atomic writes in the working copy.
- with open(path, 'rb') as srcfd:
- with wvfs(filename, 'wb') as destfd:
- gothash = copyandhash(
- util.filechunkiter(srcfd, filechunkitersize), destfd)
- if gothash != hash:
- repo.ui.warn(_('%s: data corruption in %s with hash %s\n')
- % (filename, path, gothash))
- wvfs.unlink(filename)
- return False
+ symlink(path, wvfs.join(filename))
+ # touch the storage file so build systems see the file as modified
+ try:
+ os.utime(path, None) # touch
+ except OSError as e:
+ if e.errno != errno.EACCES:
+ raise
+ repo.ui.warn(_("can't touch %s - that might confuse build systems\n")
+ % path)
return True
def copytostore(repo, rev, file, uploaded=False):
diff --git a/hgext/largefiles/overrides.py b/hgext/largefiles/overrides.py
--- a/hgext/largefiles/overrides.py
+++ b/hgext/largefiles/overrides.py
@@ -1392,6 +1392,8 @@ def mergeupdate(orig, repo, node, branch
lfileabs = repo.wvfs.join(lfile)
if not repo.wvfs.exists(lfileabs):
continue
+ if repo.wvfs.islink(lfile):
+ continue # never update standins from symlinks
lfhash = lfutil.hashrepofile(repo, lfile)
standin = lfutil.standin(lfile)
lfutil.writestandin(repo, standin, lfhash,
diff --git a/hgext/largefiles/reposetup.py b/hgext/largefiles/reposetup.py
--- a/hgext/largefiles/reposetup.py
+++ b/hgext/largefiles/reposetup.py
@@ -172,7 +172,8 @@ def reposetup(ui, repo):
if standin not in ctx1:
# from second parent
modified.append(lfile)
- elif ctx1[standin].data().strip() \
+ elif not self.wvfs.islink(lfile) and \
+ ctx1[standin].data().strip() \
!= lfutil.hashfile(self.wjoin(lfile)):
modified.append(lfile)
else:
More information about the Mercurial-devel
mailing list