[PATCH 4 of 4] hgext: add an lz4revlog extension

Bryan O'Sullivan bos at serpentine.com
Mon Jun 25 16:58:34 CDT 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1340661485 25200
# Node ID dd1fd4ec9070bd601bb5201de4b8fdace0dbc903
# Parent  e6054ad06d599fe5dca3172e9f9cd577208ff1b9
hgext: add an lz4revlog extension

This trades space (25-30% increase) for decompress performance (35
times faster).  Since revlog decompression is a major bottleneck,
this improves the performance of many operations.

A couple of examples from a kernel tree: update time improves by
about 35%, as does annotate time.

lz4 makes a bigger difference for repos with larger quantities of
compressed data.  Reconstructing a large manifest improves from 0.8
seconds with zlib to 0.1 with lz4, for instance.

diff --git a/hgext/lz4revlog.py b/hgext/lz4revlog.py
new file mode 100644
--- /dev/null
+++ b/hgext/lz4revlog.py
@@ -0,0 +1,81 @@
+# lz4revlog.py - lz4 delta compression for mercurial
+#
+# Copyright 2012 Facebook
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+'''store revlog deltas using lz4 compression
+
+This extension uses the lz4 compression algorithm to store deltas,
+rather than Mercurial's default of zlib compression.  lz4 offers much
+faster decompression than zlib, at a cost of about 30% more disk
+space.  The improvement in decompression speed leads to speedups in
+many common operations, such as update and history traversal.
+
+To use lz4 compression, a repository can be created from scratch or
+converted from an existing repository, for example using :hg:`clone
+--pull`.
+
+The behaviour of Mercurial in an existing zlib-compressed repository
+will not be affected by this extension.
+
+To avoid use of lz4 when cloning or creating a new repository, use
+:hg:`--config format.uselz4=no`.
+
+Interop with other Mercurial repositories is generally not affected by
+this extension.
+'''
+
+from mercurial import error, extensions, localrepo, revlog, util
+from mercurial.i18n import _
+import lz4
+
+try:
+    _compress = lz4.compressHC
+    _decompress = lz4.decompress
+    # don't crash horribly if invoked on an incompatible hg
+    usable = localrepo.localrepository.openerreqs
+except (AttributeError, ImportError):
+    def lz4missing(eek):
+        raise util.Abort(_('the lz4revlog extension requires lz4 support'))
+    _compress = _decompress = lz4missing
+    usable = False
+
+def decompress(orig, bin):
+    if not bin:
+        return bin
+    t = bin[0]
+    if t == '4':
+        return _decompress(bin[1:])
+    return orig(bin)
+
+if usable:
+    @extensions.replaceclass(localrepo, 'localrepository')
+    class lz4repo(localrepo.localrepository):
+        def _baserequirements(self, create):
+            reqs = super(lz4repo, self)._baserequirements(create)
+            if create and self.ui.configbool('format', 'uselz4', True):
+                reqs.append('lz4revlog')
+            return reqs
+
+    @extensions.replaceclass(revlog, 'revlog')
+    class lz4revlog(revlog.revlog):
+        def __init__(self, opener, indexfile):
+            super(lz4revlog, self).__init__(opener, indexfile)
+            opts = getattr(opener, 'options', None)
+            self._lz4 = opts and 'lz4revlog' in opts
+
+        def compress(self, text):
+            if self._lz4:
+                l = len(text)
+                c = _compress(text)
+                if len(text) <= len(c):
+                    return ('u', text)
+                return ('', '4' + c)
+            return super(lz4revlog, self).compress(text)
+
+    extensions.wrapfunction(revlog, 'decompress', decompress)
+    cls = localrepo.localrepository
+    for reqs in 'supportedformats supported openerreqs'.split():
+        getattr(cls, reqs).add('lz4revlog')
diff --git a/tests/hghave.py b/tests/hghave.py
--- a/tests/hghave.py
+++ b/tests/hghave.py
@@ -267,6 +267,13 @@ def has_tic():
 def has_msys():
     return os.getenv('MSYSTEM')
 
+def has_lz4():
+    try:
+        import lz4
+        return lz4.compressHC
+    except (ImportError, AttributeError):
+        return False
+
 checks = {
     "true": (lambda: True, "yak shaving"),
     "false": (lambda: False, "nail clipper"),
@@ -287,6 +294,7 @@ checks = {
     "icasefs": (has_icasefs, "case insensitive file system"),
     "inotify": (has_inotify, "inotify extension support"),
     "lsprof": (has_lsprof, "python lsprof module"),
+    "lz4": (has_lz4, "python lz4 module"),
     "mtn": (has_mtn, "monotone client (>= 1.0)"),
     "outer-repo": (has_outer_repo, "outer repo"),
     "p4": (has_p4, "Perforce server and client"),
diff --git a/tests/test-lz4revlog.t b/tests/test-lz4revlog.t
new file mode 100644
--- /dev/null
+++ b/tests/test-lz4revlog.t
@@ -0,0 +1,125 @@
+  $ "$TESTDIR/hghave" lz4 || exit 80
+
+  $ hg init a
+  $ cd a
+  $ echo a>a
+  $ hg ci -q -A -m 0
+
+  $ echo "[extensions]" >> $HGRCPATH
+  $ echo "lz4revlog=" >> $HGRCPATH
+
+having lz4revlog enabled should not affect an existing repo
+
+  $ for i in 0 1 2 3 4 5 6 7 8 9; do
+  >   echo qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqquuuuuuuuuuuuuuuuuuuuqqqq$i >> a
+  $ done
+  $ hg ci -q -m 1
+  $ hg verify -q
+
+  $ cd ..
+
+regular clone of an existing zlib repo should still use zlib
+
+  $ hg clone a b
+  updating to branch default
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ sort b/.hg/requires
+  dotencode
+  fncache
+  revlogv1
+  store
+
+pulled clone of zlib should use lz4
+
+  $ hg clone -q --pull a alz4
+  $ sort alz4/.hg/requires
+  dotencode
+  fncache
+  lz4revlog
+  revlogv1
+  store
+
+disable lz4, then clone
+
+  $ hg --config format.uselz4=False clone --pull a w
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 1 files
+  updating to branch default
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ sort w/.hg/requires
+  dotencode
+  fncache
+  revlogv1
+  store
+
+attempt to disable lz4 should be ignored for hardlinked clone
+
+  $ hg --config format.uselz4=False clone alz4 azlib
+  updating to branch default
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ sort azlib/.hg/requires
+  dotencode
+  fncache
+  lz4revlog
+  revlogv1
+  store
+
+a new repo should use lz4 by default
+
+  $ hg init lz
+  $ cd lz
+  $ echo a>a
+  $ hg ci -q -A -m lz0
+  $ for i in 0 1 2 3 4 5 6 7 8 9; do
+  >   echo qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqquuuuuuuuuuuuuuuuuuuu$i >> a
+  $ done
+  $ hg ci -q -m lz1
+  $ hg verify -q
+  $ hg tip
+  changeset:   1:186d32280905
+  tag:         tip
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     lz1
+  
+  $ sort .hg/requires
+  dotencode
+  fncache
+  lz4revlog
+  revlogv1
+  store
+
+vanilla hg should bail in an lz4 repo
+
+  $ hg --config 'extensions.lz4revlog=!' tip
+  abort: unknown repository format: requires features 'lz4revlog' (upgrade Mercurial)!
+  [255]
+
+start a server
+
+  $ "$TESTDIR/hghave" serve || exit 80
+
+  $ hg --config server.uncompressed=True serve -p $HGPORT -d --pid-file=../hg1.pid -E ../error.log
+  $ cat ../hg1.pid >> $DAEMON_PIDS
+
+uncompressed clone from lz4 to lz4 should be fine
+
+  $ cd ..
+  $ hg clone --uncompressed http://localhost:$HGPORT/ happy
+  streaming all changes
+  3 files to transfer, 665 bytes of data
+  transferred 665 bytes in * seconds (*/sec) (glob)
+  updating to branch default
+  1 files updated, 0 files merged, 0 files removed, 0 files unresolved
+
+uncompressed clone from lz4 to non-lz4 should fall back to pull
+
+  $ hg --config 'extensions.lz4revlog=!' clone -U --uncompressed http://localhost:$HGPORT/ nonesuch
+  requesting all changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 2 changes to 1 files


More information about the Mercurial-devel mailing list