[PATCH] Issue919: add a standard extension to recreate hardlinks between repositories

Jesse Glick jesse.glick at sun.com
Tue Dec 16 13:31:49 CST 2008


# HG changeset patch
# User Jesse Glick <jesse.glick at sun.com>
# Date 1229454096 18000
# Node ID c4f6f45c5804659a75295e20eb8e65ace1bfdce7
# Parent  4acf5f24912be9dedecafbbd721b1b534b06b5e8
Issue919: add a standard extension to recreate hardlinks between repositories.
Having to run a standalone Python script from the contrib dir is a nuisance.
Should also update: http://www.selenic.com/mercurial/wiki/index.cgi/RecreateHardlinksBetweenRepositories

diff --git a/contrib/hg-relink b/hgext/hardlink.py
rename from contrib/hg-relink
rename to hgext/hardlink.py
--- a/contrib/hg-relink
+++ b/hgext/hardlink.py
@@ -1,29 +1,43 @@
-#!/usr/bin/env python
+# Mercurial extension to provide 'hg hardlink' command
  #
  # Copyright (C) 2007 Brendan Cully <brendan at kublai.com>
  #
  # This software may be used and distributed according to the terms
  # of the GNU General Public License, incorporated herein by reference.

-import os, sys
+"""recreating hardlinks between repository clones"""

-class ConfigError(Exception): pass
+from mercurial import commands
+from mercurial.i18n import _
+import os

-def usage():
-    print """relink <source> <destination>
-    Recreate hard links between source and destination repositories"""
+def hardlink(ui, source, dest, **opts):
+    """recreate hardlinks between two repositories

-class Config:
-    def __init__(self, args):
-        if len(args) != 3:
-            raise ConfigError("wrong number of arguments")
-        self.src = os.path.abspath(args[1])
-        self.dst = os.path.abspath(args[2])
-        for d in (self.src, self.dst):
-            if not os.path.exists(os.path.join(d, '.hg')):
-                raise ConfigError("%s: not a mercurial repository" % d)
+    When repositories are cloned locally, their data files will be hardlinked
+    so that they only use the space of a single repository.

-def collect(src):
+    Unfortunately, subsequent pulls into either repository will break hardlinks
+    for any files touched by the new changesets, even if both repositories end
+    up pulling the same changes.
+
+    Similarly, passing --pull (or --rev) to "hg clone" will fail to use
+    any hardlinks, falling back to a complete copy of the source repository.
+
+    This command lets you recreate those hardlinks and reclaim that wasted
+    space.
+    """
+    for d in (source, dest):
+        if not os.path.exists(os.path.join(d, '.hg')):
+            raise util.Abort(_('%s: not a mercurial repository') %
+                             os.path.abspath(d))
+    src = os.path.join(source, '.hg')
+    dst = os.path.join(dest, '.hg')
+    candidates = collect(src, ui)
+    targets = prune(candidates, dst, ui)
+    relink(src, dst, targets, ui)
+
+def collect(src, ui):
      seplen = len(os.path.sep)
      candidates = []
      for dirpath, dirnames, filenames in os.walk(src):
@@ -34,9 +48,10 @@
              st = os.stat(os.path.join(dirpath, filename))
              candidates.append((os.path.join(relpath, filename), st))

+    ui.status(_('Collected %d candidate *.i files\n') % len(candidates))
      return candidates

-def prune(candidates, dst):
+def prune(candidates, dst, ui):
      def getdatafile(path):
          if not path.endswith('.i'):
              return None, None
@@ -57,9 +72,9 @@
              return False
          if st.st_dev != ts.st_dev:
              # No point in continuing
-            raise Exception('Source and destination are on different devices')
+            raise util.Abort(
+                _('Source and destination are on different devices'))
          if st.st_size != ts.st_size:
-            # TODO: compare revlog heads
              return False
          return st

@@ -74,9 +89,10 @@
          if df:
              targets.append((fn[:-1] + 'd', ts.st_size))

+    ui.status(_('Pruned down to %d probably relinkable files\n') % len(targets))
      return targets

-def relink(src, dst, files):
+def relink(src, dst, files, ui):
      def relinkfile(src, dst):
          bak = dst + '.bak'
          os.rename(dst, bak)
@@ -106,23 +122,20 @@
              continue
          try:
              relinkfile(source, tgt)
-            print 'Relinked %s' % f
+            ui.status(_('Relinked %s\n') % f)
              relinked += 1
              savedbytes += sz
          except OSError, inst:
-            print '%s: %s' % (tgt, str(inst))
+            ui.warn(_('%s: %s\n') % (tgt, str(inst)))

-    print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)
+    ui.status(_('Relinked %d files (%d bytes reclaimed)\n') %
+              (relinked, savedbytes))

-try:
-    cfg = Config(sys.argv)
-except ConfigError, inst:
-    print str(inst)
-    usage()
-    sys.exit(1)
-
-src = os.path.join(cfg.src, '.hg')
-dst = os.path.join(cfg.dst, '.hg')
-candidates = collect(src)
-targets = prune(candidates, dst)
-relink(src, dst, targets)
+commands.norepo += ' hardlink'
+cmdtable = {
+    'hardlink': (
+        hardlink,
+        [],
+        _('SOURCE DEST')
+    )
+}



More information about the Mercurial-devel mailing list