[PATCH 2 of 2] cleanup: use the deque type where appropriate

Bryan O'Sullivan bos at serpentine.com
Tue May 15 12:46:53 CDT 2012


# HG changeset patch
# User Bryan O'Sullivan <bryano at fb.com>
# Date 1337103983 25200
# Node ID bbd6f7e937abe78790e26c28163f2ec05535c7a9
# Parent  4c5bca2a37682d39974eb38b5b313597a981b50a
cleanup: use the deque type where appropriate

There have been quite a few places where we pop elements off the
front of a list.  This can turn O(n) algorithms into something more
like O(n**2).  Python has provided a deque type that can do this
efficiently since at least 2.4.

As an example of the difference a deque can make, it improves
perfancestors performance on a Linux repo from 0.50 seconds to 0.36.

diff --git a/mercurial/hbisect.py b/mercurial/hbisect.py
--- a/mercurial/hbisect.py
+++ b/mercurial/hbisect.py
@@ -11,7 +11,7 @@
 import os, error
 from i18n import _
 from node import short, hex
-import util
+import collections, util
 
 def bisect(changelog, state):
     """find the next node (if any) for testing during a bisect search.
@@ -69,10 +69,10 @@
 
     # build children dict
     children = {}
-    visit = [badrev]
+    visit = collections.deque([badrev])
     candidates = []
     while visit:
-        rev = visit.pop(0)
+        rev = visit.popleft()
         if ancestors[rev] == []:
             candidates.append(rev)
             for prev in clparents(rev):
diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -12,7 +12,7 @@
 from i18n import _
 from node import hex, nullid, short
 import base85, mdiff, scmutil, util, diffhelpers, copies, encoding, error
-import context
+import collections, context
 
 gitre = re.compile('diff --git a/(.*) b/(.*)')
 
@@ -1587,12 +1587,12 @@
 
     def lrugetfilectx():
         cache = {}
-        order = []
+        order = collections.deque()
         def getfilectx(f, ctx):
             fctx = ctx.filectx(f, filelog=cache.get(f))
             if f not in cache:
                 if len(cache) > 20:
-                    del cache[order.pop(0)]
+                    del cache[order.popleft()]
                 cache[f] = fctx.filelog()
             else:
                 order.remove(f)
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -15,7 +15,7 @@
 from node import bin, hex, nullid, nullrev
 from i18n import _
 import ancestor, mdiff, parsers, error, util, dagutil
-import struct, zlib, errno
+import struct, zlib, errno, collections
 
 _pack = struct.pack
 _unpack = struct.unpack
@@ -362,13 +362,13 @@
         """return the set of all nodes ancestral to a given node, including
          the node itself, stopping when stop is matched"""
         reachable = set((node,))
-        visit = [node]
+        visit = collections.deque([node])
         if stop:
             stopn = self.rev(stop)
         else:
             stopn = 0
         while visit:
-            n = visit.pop(0)
+            n = visit.popleft()
             if n == stop:
                 continue
             if n == nullid:
@@ -389,10 +389,10 @@
         an ancestor of itself.  Results are in breadth-first order:
         parents of each rev in revs, then parents of those, etc.  Result
         does not include the null revision."""
-        visit = list(revs)
+        visit = collections.deque(revs)
         seen = set([nullrev])
         while visit:
-            for parent in self.parentrevs(visit.pop(0)):
+            for parent in self.parentrevs(visit.popleft()):
                 if parent not in seen:
                     visit.append(parent)
                     seen.add(parent)
@@ -447,9 +447,9 @@
 
         # take all ancestors from heads that aren't in has
         missing = set()
-        visit = [r for r in heads if r not in has]
+        visit = collections.deque(r for r in heads if r not in has)
         while visit:
-            r = visit.pop(0)
+            r = visit.popleft()
             if r in missing:
                 continue
             else:
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import re
+import re, collections
 import parser, util, error, discovery, hbisect, phases
 import node
 import bookmarks as bookmarksmod
@@ -17,10 +17,10 @@
     """Like revlog.ancestors(), but supports followfirst."""
     cut = followfirst and 1 or None
     cl = repo.changelog
-    visit = list(revs)
+    visit = collections.deque(revs)
     seen = set([node.nullrev])
     while visit:
-        for parent in cl.parentrevs(visit.pop(0))[:cut]:
+        for parent in cl.parentrevs(visit.popleft())[:cut]:
             if parent not in seen:
                 visit.append(parent)
                 seen.add(parent)
diff --git a/mercurial/treediscovery.py b/mercurial/treediscovery.py
--- a/mercurial/treediscovery.py
+++ b/mercurial/treediscovery.py
@@ -7,7 +7,7 @@
 
 from node import nullid, short
 from i18n import _
-import util, error
+import util, error, collections
 
 def findcommonincoming(repo, remote, heads=None, force=False):
     """Return a tuple (common, fetch, heads) used to identify the common
@@ -56,11 +56,11 @@
     # a 'branch' here is a linear segment of history, with four parts:
     # head, root, first parent, second parent
     # (a branch always has two parents (or none) by definition)
-    unknown = remote.branches(unknown)
+    unknown = collections.deque(remote.branches(unknown))
     while unknown:
         r = []
         while unknown:
-            n = unknown.pop(0)
+            n = unknown.popleft()
             if n[0] in seen:
                 continue
 
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -14,7 +14,7 @@
 """
 
 from i18n import _
-import error, osutil, encoding
+import error, osutil, encoding, collections
 import errno, re, shutil, sys, tempfile, traceback
 import os, time, datetime, calendar, textwrap, signal
 import imp, socket, urllib
@@ -205,12 +205,12 @@
 def lrucachefunc(func):
     '''cache most recent results of function calls'''
     cache = {}
-    order = []
+    order = collections.deque()
     if func.func_code.co_argcount == 1:
         def f(arg):
             if arg not in cache:
                 if len(cache) > 20:
-                    del cache[order.pop(0)]
+                    del cache[order.popleft()]
                 cache[arg] = func(arg)
             else:
                 order.remove(arg)
@@ -220,7 +220,7 @@
         def f(*args):
             if args not in cache:
                 if len(cache) > 20:
-                    del cache[order.pop(0)]
+                    del cache[order.popleft()]
                 cache[args] = func(*args)
             else:
                 order.remove(args)
@@ -865,7 +865,7 @@
         Returns less than L bytes if the iterator runs dry."""
         left = l
         buf = ''
-        queue = self._queue
+        queue = collections.deque(self._queue)
         while left > 0:
             # refill the queue
             if not queue:
@@ -878,13 +878,14 @@
                 if not queue:
                     break
 
-            chunk = queue.pop(0)
+            chunk = queue.popleft()
             left -= len(chunk)
             if left < 0:
-                queue.insert(0, chunk[left:])
+                queue.appendleft(chunk[left:])
                 buf += chunk[:left]
             else:
                 buf += chunk
+        self._queue = list(queue)
 
         return buf
 


More information about the Mercurial-devel mailing list