[PATCH 3 of 3 RFC] localrepo: use ctx.size comparisons to speed up status

Sat Jul 24 21:12:55 CDT 2010

# HG changeset patch
# User Nicolas Dumazet <nicdumz.commits at gmail.com>
# Date 1278816052 -32400
# Node ID 42b4ba8013abce794478c689201399ecf8294540
# Parent  dca39a137eaa3f107c6b6419540a0afca702d3eb
localrepo: use ctx.size comparisons to speed up status

Comparing sizes is cheaper than comparing file contents, as it does not
involve reading the file on disk or from the filelog.

It is however not always possible: some extensions, or encode filters,
change data when extracting it to the working directory.
_cancomparesize is meant to detect cases where such comparisons are not
possible. A _cancomparesize() call is cheap, as _loadfilter is caching
its results in filterpats.

Unwrapping the complex inlined boolean comparisons produces longer code,
but boolean logic has not been changed, except for the size check
before ctx.cmp calls.

diff --git a/hgext/keyword.py b/hgext/keyword.py
--- a/hgext/keyword.py
+++ b/hgext/keyword.py
@@ -502,6 +502,11 @@
                               False, True)
             return n
 
+        def _cancomparesize(self):
+            # keywords affect data size, comparing wdir and filelog size does
+            # not make sense
+            return False
+
     # monkeypatches
     def kwpatchfile_init(orig, self, ui, fname, opener,
                          missing=False, eolmode=None):
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -1009,6 +1009,15 @@
         '''
         return self[node].walk(match)
 
+    def _cancomparesize(self):
+        """
+        Does it make sense to compare context sizes in this repo?
+        """
+        # Encode filters change data size, we can't rely on
+        # size comparisons if they are used
+        self._loadfilter("encode")
+        return not self.filterpats["encode"]
+
     def status(self, node1='.', node2=None, match=None,
                ignored=False, clean=False, unknown=False):
         """return status of files between two nodes or node and working directory
@@ -1036,6 +1045,8 @@
         comparewithparent = ctx1 == self['.']
         comparewithwdir = ctx2.rev() is None
 
+        checksize = self._cancomparesize()
+
         match = match or matchmod.always(self.root, self.getcwd())
         listignored, listclean, listunknown = ignored, clean, unknown
 
@@ -1062,11 +1073,14 @@
                 fixup = []
                 # do a full compare of any files that might have changed
                 for f in sorted(cmp):
-                    if (f not in ctx1 or ctx2.flags(f) != ctx1.flags(f)
-                        or ctx1[f].cmp(ctx2[f].data())):
-                        modified.append(f)
-                    else:
-                        fixup.append(f)
+                    if f in ctx1 and ctx2.flags(f) == ctx1.flags(f):
+                        f1 = ctx1[f]
+                        f2 = ctx2[f]
+                        sizematch = not checksize or f1.size() == f2.size()
+                        if sizematch and not f1.cmp(f2.data()):
+                            fixup.append(f)
+                            continue
+                    modified.append(f)
 
                 # update dirstate for files that are actually clean
                 if fixup:
@@ -1103,14 +1117,26 @@
                 mf2 = mfmatches(ctx2)
 
             modified, added, clean = [], [], []
+
+            if listclean:
+                appendclean = clean.append
+            else:
+                def appendclean(fn): pass
+            appendmodified = modified.append
+
             for fn in mf2:
                 if fn in mf1:
-                    if (mf1.flags(fn) != mf2.flags(fn) or
-                        (mf1[fn] != mf2[fn] and
-                         (mf2[fn] or ctx1[fn].cmp(ctx2[fn].data())))):
-                        modified.append(fn)
-                    elif listclean:
-                        clean.append(fn)
+                    action = appendmodified
+                    if mf1.flags(fn) == mf2.flags(fn):
+                        if mf1[fn] == mf2[fn]:
+                            action = appendclean
+                        elif not mf2[fn]:
+                            f1 = ctx1[fn]
+                            f2 = ctx2[fn]
+                            sizematch = not checksize or f1.size() == f2.size()
+                            if sizematch and not f1.cmp(f2.data()):
+                                action = appendclean
+                    action(fn)
                     del mf1[fn]
                 else:
                     added.append(fn)