[PATCH 2 of 3 RFC] lfs: convert threshold testing to use a matcher

Jun Wu quark at fb.com
Sat Dec 30 17:51:47 EST 2017


Excerpts from Jun Wu's message of 2017-12-30 14:36:01 -0800:
> FWIW, I had an internal patch to make the filter more flexible while still
> maintain good performance. It was accepted but didn't land because the
> feature was not urgently needed and there was concern about conflicting with
> the fileset synatx.
> 
> # HG changeset patch
> # User Jun Wu <quark at fb.com>
> # Date 1493767928 25200
> #      Tue May 02 16:32:08 2017 -0700
> # Node ID bc949f9b315e3c3967853f209b7d8f8a5e81a6ce
> # Parent  c95721badfb48141e376d428d0621784563688b4

The follow-up of that patch is:

# HG changeset patch
# User Jun Wu <quark at fb.com>
# Date 1493996741 25200
#      Fri May 05 08:05:41 2017 -0700
# Node ID 7bf8ccfed3499da4bc1394c2c5f367e14015667c
# Parent  bc949f9b315e3c3967853f209b7d8f8a5e81a6ce
lfs: migrate file filtering from threshold to custom filter

Migrate `lfs.threshold` to more powerful `lfs.filter` added by D4990618 so
people can specify what files to be stored in LFS with more flexibility

diff --git a/hgext3rd/lfs/__init__.py b/hgext3rd/lfs/__init__.py
--- a/hgext3rd/lfs/__init__.py
+++ b/hgext3rd/lfs/__init__.py
@@ -14,6 +14,15 @@ Configs::
     url = https://example.com/lfs
 
-    # size of a file to make it use LFS
-    threshold = 10M
+    # Which files to track in LFS. It could be a combination of ".extname",
+    # ">size", "/under/some/directory" with logic operations "|" (or), "&"
+    # (and), "!" (not) and parentheses. Some examples:
+    # - always                # everything
+    # - >20MB                 # larger than 20MB
+    # - !.txt                 # except for .txt files
+    # - .zip | .tar.gz | .7z  # some types of compressed files
+    # -  /bin                 # files under "bin" in the project root
+    # - (.php & >2MB) | (.js & >5MB) | .tar.gz | (/bin & !/bin/README) | >1GB
+    # (default: !always (never))
+    track = >10M
 """
 
@@ -35,4 +44,5 @@ from mercurial.i18n import _
 from . import (
     blobstore,
+    filterlang,
     wrapper,
 )
@@ -46,7 +56,12 @@ def reposetup(ui, repo):
         return
 
-    threshold = repo.ui.configbytes('lfs', 'threshold', None)
+    trackspec = repo.ui.config('lfs', 'track', '!always')
 
-    repo.svfs.options['lfsthreshold'] = threshold
+    # deprecated config: lfs.threshold
+    threshold = repo.ui.configbytes('lfs', 'threshold', None)
+    if threshold:
+        trackspec = "(%s) | >%s" % (trackspec, threshold)
+
+    repo.svfs.options['lfstrack'] = filterlang.compile(trackspec)
     repo.svfs.lfslocalblobstore = blobstore.local(repo)
     repo.svfs.lfsremoteblobstore = blobstore.remote(repo)
@@ -58,4 +73,5 @@ def wrapfilelog(filelog):
     wrapfunction = extensions.wrapfunction
 
+    wrapfunction(filelog, '__init__', wrapper.fileloginit)
     wrapfunction(filelog, 'addrevision', wrapper.filelogaddrevision)
     wrapfunction(filelog, 'renamed', wrapper.filelogrenamed)
diff --git a/hgext3rd/lfs/wrapper.py b/hgext3rd/lfs/wrapper.py
--- a/hgext3rd/lfs/wrapper.py
+++ b/hgext3rd/lfs/wrapper.py
@@ -103,8 +103,12 @@ def _islfs(rlog, node=None, rev=None):
     return bool(flags & revlog.REVIDX_EXTSTORED)
 
+def fileloginit(orig, self, opener, path, *args, **kwargs):
+    # record filename so it can be tested in addrevision
+    self.filename = path
+    orig(self, opener, path, *args, **kwargs)
+
 def filelogaddrevision(orig, self, text, transaction, link, p1, p2,
                        cachedelta=None, node=None,
                        flags=revlog.REVIDX_DEFAULT_FLAGS, **kwds):
-    threshold = self.opener.options['lfsthreshold']
     textlen = len(text)
     # exclude hg rename meta from file size
@@ -113,5 +117,6 @@ def filelogaddrevision(orig, self, text,
         textlen -= offset
 
-    if threshold and textlen > threshold:
+    lfstrack = self.opener.options['lfstrack']
+    if lfstrack(self.filename, textlen):
         flags |= revlog.REVIDX_EXTSTORED
 
diff --git a/tests/test-lfs-test-server.t b/tests/test-lfs-test-server.t
--- a/tests/test-lfs-test-server.t
+++ b/tests/test-lfs-test-server.t
@@ -15,5 +15,5 @@ Require lfs-test-server (https://github.
   > [lfs]
   > url=http://foo:bar@$LFS_HOST/
-  > threshold=1
+  > track=always
   > EOF
 
diff --git a/tests/test-lfs.t b/tests/test-lfs.t
--- a/tests/test-lfs.t
+++ b/tests/test-lfs.t
@@ -120,5 +120,5 @@
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=>10B
   > EOF
 
@@ -173,5 +173,5 @@
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=30B
+  > track=>30B
   > EOF
 
@@ -202,5 +202,5 @@
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=>10B
   > EOF
 
@@ -282,5 +282,5 @@
   $ cat >> .hg/hgrc << EOF
   > [lfs]
-  > threshold=10B
+  > track=>10B
   > [diff]
   > git=1
@@ -405,8 +405,6 @@
   $ cd repo10
   $ cat >> .hg/hgrc << EOF
-  > [extensions]
-  > lfs=$TESTDIR/../hgext3rd/lfs/
   > [lfs]
-  > threshold=1
+  > track=always
   > EOF
   $ $PYTHON <<'EOF'
@@ -434,4 +432,44 @@
   $ cd ..
 
+# Test filter
+
+  $ hg init repo11
+  $ cd repo11
+  $ cat >> .hg/hgrc << EOF
+  > [lfs]
+  > track=(.a & >5B) | (.b & !>5B) | (.c & /d & !/d/c.c) | >10B
+  > EOF
+
+  $ mkdir a
+  $ echo aaaaaa > a/1.a
+  $ echo a > a/2.a
+  $ echo aaaaaa > 1.b
+  $ echo a > 2.b
+  $ echo a > 1.c
+  $ mkdir d
+  $ echo a > d/c.c
+  $ echo a > d/d.c
+  $ echo aaaaaaaaaaaa > x
+  $ hg add . -q
+  $ hg commit -m files
+
+  $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
+  >   if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
+  >     echo "${p}: is lfs"
+  >   else
+  >     echo "${p}: not lfs"
+  >   fi
+  > done
+  a/1.a: is lfs
+  a/2.a: not lfs
+  1.b: not lfs
+  2.b: is lfs
+  1.c: not lfs
+  d/c.c: not lfs
+  d/d.c: is lfs
+  x: is lfs
+
+  $ cd ..
+
 # Verify the repos
 
diff --git a/tests/test-p4fastimport-import-lfs.t b/tests/test-p4fastimport-import-lfs.t
--- a/tests/test-p4fastimport-import-lfs.t
+++ b/tests/test-p4fastimport-import-lfs.t
@@ -11,6 +11,6 @@
   $ echo "lfsmetadata=lfs.sql" >> $HGRCPATH
   $ echo "[lfs]" >> $HGRCPATH
-  $ echo "threshold=10" >> $HGRCPATH
-  $ echo "remoteurl=https://dewey-lfs.vip.facebook.com/lfs" >> $HGRCPATH
+  $ echo "track=>10" >> $HGRCPATH
+  $ echo "url=https://dewey-lfs.vip.facebook.com/lfs" >> $HGRCPATH
 
 create p4 depot
diff --git a/tests/test-remotefilelog-lfs.t b/tests/test-remotefilelog-lfs.t
--- a/tests/test-remotefilelog-lfs.t
+++ b/tests/test-remotefilelog-lfs.t
@@ -8,5 +8,5 @@
   > lfs=$TESTDIR/../hgext3rd/lfs/
   > [lfs]
-  > threshold=10B
+  > track=>10B
   > url=file:$TESTTMP/dummy-remote/
   > [diff]


More information about the Mercurial-devel mailing list