[PATCH 2 of 3 RFC] lfs: convert threshold testing to use a matcher
Jun Wu
quark at fb.com
Sat Dec 30 17:51:47 EST 2017
Excerpts from Jun Wu's message of 2017-12-30 14:36:01 -0800:
> FWIW, I had an internal patch to make the filter more flexible while still
> maintain good performance. It was accepted but didn't land because the
> feature was not urgently needed and there was concern about conflicting with
> the fileset synatx.
>
> # HG changeset patch
> # User Jun Wu <quark at fb.com>
> # Date 1493767928 25200
> # Tue May 02 16:32:08 2017 -0700
> # Node ID bc949f9b315e3c3967853f209b7d8f8a5e81a6ce
> # Parent c95721badfb48141e376d428d0621784563688b4
The follow-up of that patch is:
# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1493996741 25200
# Fri May 05 08:05:41 2017 -0700
# Node ID 7bf8ccfed3499da4bc1394c2c5f367e14015667c
# Parent bc949f9b315e3c3967853f209b7d8f8a5e81a6ce
lfs: migrate file filtering from threshold to custom filter
Migrate `lfs.threshold` to more powerful `lfs.filter` added by D4990618 so
people can specify what files to be stored in LFS with more flexibility
diff --git a/hgext3rd/lfs/__init__.py b/hgext3rd/lfs/__init__.py
--- a/hgext3rd/lfs/__init__.py
+++ b/hgext3rd/lfs/__init__.py
@@ -14,6 +14,15 @@ Configs::
url = https://example.com/lfs
- # size of a file to make it use LFS
- threshold = 10M
+ # Which files to track in LFS. It could be a combination of ".extname",
+ # ">size", "/under/some/directory" with logic operations "|" (or), "&"
+ # (and), "!" (not) and parentheses. Some examples:
+ # - always # everything
+ # - >20MB # larger than 20MB
+ # - !.txt # except for .txt files
+ # - .zip | .tar.gz | .7z # some types of compressed files
+ # - /bin # files under "bin" in the project root
+ # - (.php & >2MB) | (.js & >5MB) | .tar.gz | (/bin & !/bin/README) | >1GB
+ # (default: !always (never))
+ track = >10M
"""
@@ -35,4 +44,5 @@ from mercurial.i18n import _
from . import (
blobstore,
+ filterlang,
wrapper,
)
@@ -46,7 +56,12 @@ def reposetup(ui, repo):
return
- threshold = repo.ui.configbytes('lfs', 'threshold', None)
+ trackspec = repo.ui.config('lfs', 'track', '!always')
- repo.svfs.options['lfsthreshold'] = threshold
+ # deprecated config: lfs.threshold
+ threshold = repo.ui.configbytes('lfs', 'threshold', None)
+ if threshold:
+ trackspec = "(%s) | >%s" % (trackspec, threshold)
+
+ repo.svfs.options['lfstrack'] = filterlang.compile(trackspec)
repo.svfs.lfslocalblobstore = blobstore.local(repo)
repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
@@ -58,4 +73,5 @@ def wrapfilelog(filelog):
wrapfunction = extensions.wrapfunction
+ wrapfunction(filelog, '__init__', wrapper.fileloginit)
wrapfunction(filelog, 'addrevision', wrapper.filelogaddrevision)
wrapfunction(filelog, 'renamed', wrapper.filelogrenamed)
diff --git a/hgext3rd/lfs/wrapper.py b/hgext3rd/lfs/wrapper.py
--- a/hgext3rd/lfs/wrapper.py
+++ b/hgext3rd/lfs/wrapper.py
@@ -103,8 +103,12 @@ def _islfs(rlog, node=None, rev=None):
return bool(flags & revlog.REVIDX_EXTSTORED)
+def fileloginit(orig, self, opener, path, *args, **kwargs):
+ # record filename so it can be tested in addrevision
+ self.filename = path
+ orig(self, opener, path, *args, **kwargs)
+
def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
cachedelta=None, node=None,
flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
- threshold = self.opener.options['lfsthreshold']
textlen = len(text)
# exclude hg rename meta from file size
@@ -113,5 +117,6 @@ def filelogaddrevision(orig, self, text,
textlen -= offset
- if threshold and textlen > threshold:
+ lfstrack = self.opener.options['lfstrack']
+ if lfstrack(self.filename, textlen):
flags |= revlog.REVIDX_EXTSTORED
diff --git a/tests/test-lfs-test-server.t b/tests/test-lfs-test-server.t
--- a/tests/test-lfs-test-server.t
+++ b/tests/test-lfs-test-server.t
@@ -15,5 +15,5 @@ Require lfs-test-server (https://github.
> [lfs]
> url=http://foo:bar@$LFS_HOST/
- > threshold=1
+ > track=always
> EOF
diff --git a/tests/test-lfs.t b/tests/test-lfs.t
--- a/tests/test-lfs.t
+++ b/tests/test-lfs.t
@@ -120,5 +120,5 @@
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=>10B
> EOF
@@ -173,5 +173,5 @@
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=30B
+ > track=>30B
> EOF
@@ -202,5 +202,5 @@
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=>10B
> EOF
@@ -282,5 +282,5 @@
$ cat >> .hg/hgrc << EOF
> [lfs]
- > threshold=10B
+ > track=>10B
> [diff]
> git=1
@@ -405,8 +405,6 @@
$ cd repo10
$ cat >> .hg/hgrc << EOF
- > [extensions]
- > lfs=$TESTDIR/../hgext3rd/lfs/
> [lfs]
- > threshold=1
+ > track=always
> EOF
$ $PYTHON <<'EOF'
@@ -434,4 +432,44 @@
$ cd ..
+# Test filter
+
+ $ hg init repo11
+ $ cd repo11
+ $ cat >> .hg/hgrc << EOF
+ > [lfs]
+ > track=(.a & >5B) | (.b & !>5B) | (.c & /d & !/d/c.c) | >10B
+ > EOF
+
+ $ mkdir a
+ $ echo aaaaaa > a/1.a
+ $ echo a > a/2.a
+ $ echo aaaaaa > 1.b
+ $ echo a > 2.b
+ $ echo a > 1.c
+ $ mkdir d
+ $ echo a > d/c.c
+ $ echo a > d/d.c
+ $ echo aaaaaaaaaaaa > x
+ $ hg add . -q
+ $ hg commit -m files
+
+ $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
+ > if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
+ > echo "${p}: is lfs"
+ > else
+ > echo "${p}: not lfs"
+ > fi
+ > done
+ a/1.a: is lfs
+ a/2.a: not lfs
+ 1.b: not lfs
+ 2.b: is lfs
+ 1.c: not lfs
+ d/c.c: not lfs
+ d/d.c: is lfs
+ x: is lfs
+
+ $ cd ..
+
# Verify the repos
diff --git a/tests/test-p4fastimport-import-lfs.t b/tests/test-p4fastimport-import-lfs.t
--- a/tests/test-p4fastimport-import-lfs.t
+++ b/tests/test-p4fastimport-import-lfs.t
@@ -11,6 +11,6 @@
$ echo "lfsmetadata=lfs.sql" >> $HGRCPATH
$ echo "[lfs]" >> $HGRCPATH
- $ echo "threshold=10" >> $HGRCPATH
- $ echo "remoteurl=https://dewey-lfs.vip.facebook.com/lfs" >> $HGRCPATH
+ $ echo "track=>10" >> $HGRCPATH
+ $ echo "url=https://dewey-lfs.vip.facebook.com/lfs" >> $HGRCPATH
create p4 depot
diff --git a/tests/test-remotefilelog-lfs.t b/tests/test-remotefilelog-lfs.t
--- a/tests/test-remotefilelog-lfs.t
+++ b/tests/test-remotefilelog-lfs.t
@@ -8,5 +8,5 @@
> lfs=$TESTDIR/../hgext3rd/lfs/
> [lfs]
- > threshold=10B
+ > track=>10B
> url=file:$TESTTMP/dummy-remote/
> [diff]
More information about the Mercurial-devel
mailing list