[patch 1/4] The repo walking code introduces a number of calls to dirstate.map.copy(),

Chris Mason mason at suse.com
Fri Aug 12 09:43:42 CDT 2005


# HG changeset patch
# User mason at suse.com
dirstate walking optimizations

The repo walking code introduces a number of calls to dirstate.map.copy(),
significantly slowing down the walk on large trees.  When a list of
files is passed to the walking code, we should only look at map entries
relevant to the file list passed in.

dirstate.filterfiles() is added to return a subset of the dirstate map.
The subset includes in files passed in, and if one of the files requested
is actually a directory, it includes any files inside that directory tree.

This brings the time for hg diff Makefile down from 1.7s to .3s on 
a linux kernel repo.

Also, the diff command was unconditionally calling makewalk, leading
to an extra pass through repo.changes.  This patch avoids the call
to makewalk when commands.diff isn't given a list of patterns, cutting
the time for hg diff (with no args) in half.

Index: mine/mercurial/hg.py
===================================================================
--- mine.orig/mercurial/hg.py	2005-08-10 08:22:06.000000000 -0400
+++ mine/mercurial/hg.py	2005-08-10 08:22:12.000000000 -0400
@@ -435,11 +435,50 @@ class dirstate:
             st.write(e + f)
         self.dirty = 0
 
-    def walk(self, files = None, match = util.always):
+    def filterfiles(self, files):
+        ret = {}
+        unknown = []
+
+        for x in files:
+            if x is '.':
+                return self.map.copy()
+            if x not in self.map:
+                unknown.append(x)
+            else:
+                ret[x] = self.map[x]
+                
+        if not unknown:
+            return ret
+
+        b = self.map.keys()
+        b.sort()
+        blen = len(b)
+
+        for x in unknown:
+            bs = bisect.bisect(b, x)
+            if bs != 0 and  b[bs-1] == x: 
+                ret[x] = self.map[x]
+                continue
+            while bs < blen:
+                s = b[bs]
+                if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
+                    ret[s] = self.map[s]
+                else:
+                    break
+                bs += 1
+        return ret
+
+    def walk(self, files = None, match = util.always, dc=None):
         self.read()
-        dc = self.map.copy()
+
         # walk all files by default
-        if not files: files = [self.root]
+        if not files:
+            files = [self.root]
+            if not dc:
+                dc = self.map.copy()
+        elif not dc:
+            dc = self.filterfiles(files)
+                    
         known = {'.hg': 1}
         def seen(fn):
             if fn in known: return True
@@ -477,19 +516,20 @@ class dirstate:
         for src, fn in util.unique(traverse()):
             fn = os.path.normpath(fn)
             if seen(fn): continue
-            if fn in dc:
-                del dc[fn]
-            elif self.ignore(fn):
+            if fn not in dc and self.ignore(fn):
                 continue
             if match(fn):
                 yield src, fn
 
     def changes(self, files = None, match = util.always):
         self.read()
-        dc = self.map.copy()
+        if not files:
+            dc = self.map.copy()
+        else:
+            dc = self.filterfiles(files)
         lookup, changed, added, unknown = [], [], [], []
 
-        for src, fn in self.walk(files, match):
+        for src, fn in self.walk(files, match, dc=dc):
             try: s = os.stat(os.path.join(self.root, fn))
             except: continue
 
Index: mine/mercurial/commands.py
===================================================================
--- mine.orig/mercurial/commands.py	2005-08-10 08:22:06.000000000 -0400
+++ mine/mercurial/commands.py	2005-08-10 08:22:08.000000000 -0400
@@ -634,9 +634,11 @@ def diff(ui, repo, *pats, **opts):
         raise Abort("too many revisions to diff")
 
     files = []
-    roots, match, results = makewalk(repo, pats, opts)
-    for src, abs, rel in results:
-        files.append(abs)
+    match = util.always
+    if pats:
+        roots, match, results = makewalk(repo, pats, opts)
+        for src, abs, rel in results:
+            files.append(abs)
     dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match})
 
 def doexport(ui, repo, changeset, seqno, total, revwidth, opts):

--


More information about the Mercurial mailing list