[patch 01/10] dirstate walking optimizations

Chris Mason mason at suse.com
Tue Aug 9 12:42:54 CDT 2005


# HG changeset patch
# User mason at suse.com

dirstate walking optimizations

The repo walking code introduces a number of calls to dirstate.map.copy(),
significantly slowing down the walk on large trees.  When a list of
files is passed to the walking code, we should only look at map entries
relevant to the file list passed in.

dirstate.filterfiles() is added to return a subset of the dirstate map.
The subset includes in files passed in, and if one of the files requested
is actually a directory, it includes any files inside that directory tree.

This brings the time for hg diff Makefile down from 1.7s to .3s on 
a linux kernel repo.

Also, the diff command was unconditionally calling makewalk, leading
to an extra pass through repo.changes.  This patch avoids the call
to makewalk when commands.diff isn't given a list of patterns, cutting
the time for hg diff (with no args) in half.

Index: mine/mercurial/hg.py
===================================================================
--- mine.orig/mercurial/hg.py	2005-08-09 12:40:14.000000000 -0400
+++ mine/mercurial/hg.py	2005-08-09 13:01:13.000000000 -0400
@@ -435,11 +435,38 @@ class dirstate:
             st.write(e + f)
         self.dirty = 0
 
+    def filterfiles(self, files):
+        ret = {}
+        b = self.map.keys()
+        b.sort()
+        blen = len(b)
+
+        for x in files:
+            if x is '.':
+                return self.map.copy()
+            bs = bisect.bisect(b, x)
+            if bs != 0 and  b[bs-1] == x: 
+                ret[x] = self.map[x]
+                continue
+            while bs < blen:
+                s = b[bs]
+                if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
+                    ret[s] = self.map[s]
+                else:
+                    break
+                bs += 1
+        return ret
+
     def walk(self, files = None, match = util.always):
         self.read()
-        dc = self.map.copy()
+
         # walk all files by default
-        if not files: files = [self.root]
+        if not files:
+            files = [self.root]
+            dc = self.map.copy()
+        else:
+            dc = self.filterfiles(files)
+                    
         known = {'.hg': 1}
         def seen(fn):
             if fn in known: return True
@@ -486,7 +513,10 @@ class dirstate:
 
     def changes(self, files = None, match = util.always):
         self.read()
-        dc = self.map.copy()
+        if not files:
+            dc = self.map.copy()
+        else:
+            dc = self.filterfiles(files)
         lookup, changed, added, unknown = [], [], [], []
 
         for src, fn in self.walk(files, match):
Index: mine/mercurial/commands.py
===================================================================
--- mine.orig/mercurial/commands.py	2005-08-09 12:40:14.000000000 -0400
+++ mine/mercurial/commands.py	2005-08-09 13:00:30.000000000 -0400
@@ -634,9 +634,11 @@ def diff(ui, repo, *pats, **opts):
         raise Abort("too many revisions to diff")
 
     files = []
-    roots, match, results = makewalk(repo, pats, opts)
-    for src, abs, rel in results:
-        files.append(abs)
+    match = util.always
+    if pats:
+        roots, match, results = makewalk(repo, pats, opts)
+        for src, abs, rel in results:
+            files.append(abs)
     dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match})
 
 def doexport(ui, repo, changeset, seqno, total, revwidth, opts):

--


More information about the Mercurial mailing list