[patch 01/10] dirstate walking optimizations
Chris Mason
mason at suse.com
Tue Aug 9 12:42:54 CDT 2005
# HG changeset patch
# User mason at suse.com
dirstate walking optimizations
The repo walking code introduces a number of calls to dirstate.map.copy(),
significantly slowing down the walk on large trees. When a list of
files is passed to the walking code, we should only look at map entries
relevant to the file list passed in.
dirstate.filterfiles() is added to return a subset of the dirstate map.
The subset includes in files passed in, and if one of the files requested
is actually a directory, it includes any files inside that directory tree.
This brings the time for hg diff Makefile down from 1.7s to .3s on
a linux kernel repo.
Also, the diff command was unconditionally calling makewalk, leading
to an extra pass through repo.changes. This patch avoids the call
to makewalk when commands.diff isn't given a list of patterns, cutting
the time for hg diff (with no args) in half.
Index: mine/mercurial/hg.py
===================================================================
--- mine.orig/mercurial/hg.py 2005-08-09 12:40:14.000000000 -0400
+++ mine/mercurial/hg.py 2005-08-09 13:01:13.000000000 -0400
@@ -435,11 +435,38 @@ class dirstate:
st.write(e + f)
self.dirty = 0
+ def filterfiles(self, files):
+ ret = {}
+ b = self.map.keys()
+ b.sort()
+ blen = len(b)
+
+ for x in files:
+ if x is '.':
+ return self.map.copy()
+ bs = bisect.bisect(b, x)
+ if bs != 0 and b[bs-1] == x:
+ ret[x] = self.map[x]
+ continue
+ while bs < blen:
+ s = b[bs]
+ if len(s) > len(x) and s.startswith(x) and s[len(x)] == '/':
+ ret[s] = self.map[s]
+ else:
+ break
+ bs += 1
+ return ret
+
def walk(self, files = None, match = util.always):
self.read()
- dc = self.map.copy()
+
# walk all files by default
- if not files: files = [self.root]
+ if not files:
+ files = [self.root]
+ dc = self.map.copy()
+ else:
+ dc = self.filterfiles(files)
+
known = {'.hg': 1}
def seen(fn):
if fn in known: return True
@@ -486,7 +513,10 @@ class dirstate:
def changes(self, files = None, match = util.always):
self.read()
- dc = self.map.copy()
+ if not files:
+ dc = self.map.copy()
+ else:
+ dc = self.filterfiles(files)
lookup, changed, added, unknown = [], [], [], []
for src, fn in self.walk(files, match):
Index: mine/mercurial/commands.py
===================================================================
--- mine.orig/mercurial/commands.py 2005-08-09 12:40:14.000000000 -0400
+++ mine/mercurial/commands.py 2005-08-09 13:00:30.000000000 -0400
@@ -634,9 +634,11 @@ def diff(ui, repo, *pats, **opts):
raise Abort("too many revisions to diff")
files = []
- roots, match, results = makewalk(repo, pats, opts)
- for src, abs, rel in results:
- files.append(abs)
+ match = util.always
+ if pats:
+ roots, match, results = makewalk(repo, pats, opts)
+ for src, abs, rel in results:
+ files.append(abs)
dodiff(sys.stdout, ui, repo, files, *revs, **{'match': match})
def doexport(ui, repo, changeset, seqno, total, revwidth, opts):
--
More information about the Mercurial
mailing list