[PATCH 2 of 2] lfs: migrate most file filtering from threshold to custom filter

Matt Harbison mharbison72 at gmail.com
Sat Jan 13 02:23:16 EST 2018


# HG changeset patch
# User Matt Harbison <matt_harbison at yahoo.com>
# Date 1514706889 18000
#      Sun Dec 31 02:54:49 2017 -0500
# Node ID 66976f55793cced57929dedc8993204be340c717
# Parent  868cc63bfe9d7d7f5b40bc8cd70175cf1a608a95
lfs: migrate most file filtering from threshold to custom filter

Migrate `lfs.threshold` to more powerful `lfs.filter` added by D4990618 so
people can specify what files to be stored in LFS with more flexibility.

This patch was authored by Jun Wu for the fb-experimental repo, to avoid using
matcher for efficiency[1].  All I've changed here is to register the new
'lfs.track' default so that the tests run cleanly, and adapt the subsequent
language changes.  Migrating the remaining uses of 'lfs.threshold' can be done
separately since there's a fallback in place.

[1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-December/109388.html

diff --git a/hgext/lfs/__init__.py b/hgext/lfs/__init__.py
--- a/hgext/lfs/__init__.py
+++ b/hgext/lfs/__init__.py
@@ -19,8 +19,23 @@
     # (default: unset)
     url = https://example.com/lfs
 
-    # size of a file to make it use LFS
-    threshold = 10M
+    # Which files to track in LFS.  Path tests are "**.extname" for file
+    # extensions, and "path:under/some/directory" for path prefix.  Both
+    # are relative to the repository root, and the latter must be quoted.
+    # File size can be tested with the "size()" fileset, and tests can be
+    # joined with fileset operators.  (See "hg help filesets.operators".)
+    #
+    # Some examples:
+    # - all()                       # everything
+    # - none()                      # nothing
+    # - size(">20MB")               # larger than 20MB
+    # - !**.txt                     # anything not a *.txt file
+    # - **.zip | **.tar.gz | **.7z  # some types of compressed files
+    # - "path:bin"                  # files under "bin" in the project root
+    # - (**.php & size(">2MB")) | (**.js & size(">5MB")) | **.tar.gz
+    #     | ("path:bin" & !"path:/bin/README") | size(">1GB")
+    # (default: none())
+    track = size(">10M")
 
     # how many times to retry before giving up on transferring an object
     retry = 5
@@ -43,6 +58,7 @@
     filelog,
     hg,
     localrepo,
+    minifileset,
     node,
     registrar,
     revlog,
@@ -76,9 +92,13 @@
 configitem('lfs', 'usercache',
     default=None,
 )
+# Deprecated
 configitem('lfs', 'threshold',
     default=None,
 )
+configitem('lfs', 'track',
+    default='none()',
+)
 configitem('lfs', 'retry',
     default=5,
 )
@@ -100,9 +120,14 @@
     if not repo.local():
         return
 
-    threshold = repo.ui.configbytes('lfs', 'threshold')
+    trackspec = repo.ui.config('lfs', 'track')
 
-    repo.svfs.options['lfsthreshold'] = threshold
+    # deprecated config: lfs.threshold
+    threshold = repo.ui.configbytes('lfs', 'threshold')
+    if threshold:
+        trackspec = "(%s) | size('>%s')" % (trackspec, threshold)
+
+    repo.svfs.options['lfstrack'] = minifileset.compile(trackspec)
     repo.svfs.lfslocalblobstore = blobstore.local(repo)
     repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
 
diff --git a/hgext/lfs/wrapper.py b/hgext/lfs/wrapper.py
--- a/hgext/lfs/wrapper.py
+++ b/hgext/lfs/wrapper.py
@@ -123,14 +123,14 @@
 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
                        cachedelta=None, node=None,
                        flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
-    threshold = self.opener.options['lfsthreshold']
     textlen = len(text)
     # exclude hg rename meta from file size
     meta, offset = filelog.parsemeta(text)
     if offset:
         textlen -= offset
 
-    if threshold and textlen > threshold:
+    lfstrack = self.opener.options['lfstrack']
+    if lfstrack(self.filename, textlen):
         flags |= revlog.REVIDX_EXTSTORED
 
     return orig(self, text, transaction, link, p1, p2, cachedelta=cachedelta,
diff --git a/tests/test-lfs-test-server.t b/tests/test-lfs-test-server.t
--- a/tests/test-lfs-test-server.t
+++ b/tests/test-lfs-test-server.t
@@ -30,7 +30,7 @@
   > lfs=
   > [lfs]
   > url=http://foo:bar@$LFS_HOST/
-  > threshold=1
+  > track=all()
   > EOF
 
   $ hg init repo1
diff --git a/tests/test-lfs.t b/tests/test-lfs.t
--- a/tests/test-lfs.t
+++ b/tests/test-lfs.t
@@ -4,6 +4,7 @@
   > [extensions]
   > lfs=
   > [lfs]
+  > # Test deprecated config
   > threshold=1000B
   > EOF
 
@@ -140,7 +141,7 @@
   $ cd repo3
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=size(">10B")
   > EOF
 
   $ echo LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS > large
@@ -203,7 +204,7 @@
   $ cd repo6
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=30B
+  > track=size(">30B")
   > EOF
 
   $ echo LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES > large
@@ -239,7 +240,7 @@
   $ cd repo8
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=size(">10B")
   > EOF
 
   $ echo THIS-IS-LFS-BECAUSE-10-BYTES > a1
@@ -320,7 +321,7 @@
   $ cd repo9
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=size(">10B")
   > [diff]
   > git=1
   > EOF
@@ -454,7 +455,7 @@
   > [extensions]
   > lfs=
   > [lfs]
-  > threshold=1
+  > track=all()
   > EOF
   $ $PYTHON <<'EOF'
   > def write(path, content):
@@ -542,6 +543,47 @@
 
   $ cd ..
 
+# Test filter
+
+  $ hg init repo11
+  $ cd repo11
+  $ cat >> .hg/hgrc << EOF
+  > [lfs]
+  > track=(**.a & size(">5B")) | (**.b & !size(">5B"))
+  >      | (**.c & "path:d" & !"path:d/c.c") | size(">10B")
+  > EOF
+
+  $ mkdir a
+  $ echo aaaaaa > a/1.a
+  $ echo a > a/2.a
+  $ echo aaaaaa > 1.b
+  $ echo a > 2.b
+  $ echo a > 1.c
+  $ mkdir d
+  $ echo a > d/c.c
+  $ echo a > d/d.c
+  $ echo aaaaaaaaaaaa > x
+  $ hg add . -q
+  $ hg commit -m files
+
+  $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
+  >   if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
+  >     echo "${p}: is lfs"
+  >   else
+  >     echo "${p}: not lfs"
+  >   fi
+  > done
+  a/1.a: is lfs
+  a/2.a: not lfs
+  1.b: not lfs
+  2.b: is lfs
+  1.c: not lfs
+  d/c.c: not lfs
+  d/d.c: is lfs
+  x: is lfs
+
+  $ cd ..
+
 # Verify the repos
 
   $ cat > $TESTTMP/dumpflog.py << EOF


More information about the Mercurial-devel mailing list