[PATCH] convert: add commandline.xargs(), use it in svn_sink class

Maxim Dounin mdounin at mdounin.ru
Thu Jan 10 21:18:08 CST 2008


# HG changeset patch
# User Maxim Dounin <mdounin at mdounin.ru>
# Date 1200020863 -10800
# Node ID 722df76e00e3f63c1163a6da15cb594ecf4b20a7
# Parent  c32d41affb68e3c1c782c1ba7df93f150301323d
convert: add commandline.xargs(), use it in svn_sink class

Introduce commandline.xargs() to limit argument list with respect
to ARG_MAX bytes. If no ARG_MAX information available - use POSIX
required minimum of 4096 bytes.

Under Windows, while actual argument list length is limited to 32k,
shells impose their own limits on command line length, down to 2047 bytes
for cmd.exe under Windows NT/2k and about 2500 bytes for older 4nt.exe.
See http://support.microsoft.com/kb/830473 for details about cmd.exe
limitations.

Since ARG_MAX is limit for argument list and environment, we reserve half
of it and one byte for environment variables. This way with default ARG_MAX
(4096 bytes) we get value 2047 bytes which is OK for Windows too.

diff -r c32d41affb68 -r 722df76e00e3 hgext/convert/common.py
--- a/hgext/convert/common.py	Thu Jan 10 12:07:18 2008 +0300
+++ b/hgext/convert/common.py	Fri Jan 11 06:07:43 2008 +0300
@@ -1,5 +1,6 @@
  # common code for the convert extension
  import base64, errno
+import os
  import cPickle as pickle
  from mercurial import util
  from mercurial.i18n import _
@@ -212,7 +213,7 @@ class commandline(object):
      def postrun(self):
          pass
  
-    def _run(self, cmd, *args, **kwargs):
+    def _cmdline(self, cmd, *args, **kwargs):
          cmdline = [self.command, cmd] + list(args)
          for k, v in kwargs.iteritems():
              if len(k) == 1:
@@ -230,7 +231,10 @@ class commandline(object):
          cmdline += ['<', util.nulldev]
          cmdline = ' '.join(cmdline)
          self.ui.debug(cmdline, '\n')
+        return cmdline
  
+    def _run(self, cmd, *args, **kwargs):
+        cmdline = self._cmdline(cmd, *args, **kwargs)
          self.prerun()
          try:
              return util.popen(cmdline)
@@ -256,6 +260,47 @@ class commandline(object):
          self.checkexit(status, output)
          return output
  
+    def getargmax(self):
+        if '_argmax' in self.__dict__:
+            return self._argmax
+
+        # POSIX requires at least 4096 bytes for ARG_MAX
+        self._argmax = 4096
+        try:
+            self._argmax = os.sysconf("SC_ARG_MAX")
+        except:
+            pass
+
+        # Windows shells impose their own limits on command line length,
+        # down to 2047 bytes for cmd.exe under Windows NT/2k and 2500 bytes
+        # for older 4nt.exe. See http://support.microsoft.com/kb/830473 for
+        # details about cmd.exe limitations.
+
+        # Since ARG_MAX is for command line _and_ environment, lower our limit
+        # (and make happy Windows shells while doing this).
+
+        self._argmax = self._argmax/2 - 1
+        return self._argmax
+
+    def limit_arglist(self, arglist, cmd, *args, **kwargs):
+        limit = self.getargmax() - len(self._cmdline(cmd, *args, **kwargs))
+        bytes = 0
+        fl = []
+        for fn in arglist:
+            b = len(fn) + 3
+            if bytes + b < limit or len(fl) == 0:
+                fl.append(fn)
+                bytes += b
+            else:
+                yield fl
+                fl = [fn]
+                bytes = b
+        if fl:
+            yield fl
+
+    def xargs(self, arglist, cmd, *args, **kwargs):
+        for l in self.limit_arglist(arglist, cmd, *args, **kwargs):
+            self.run0(cmd, *(list(args) + l), **kwargs)
  
  class mapfile(dict):
      def __init__(self, ui, path):
diff -r c32d41affb68 -r 722df76e00e3 hgext/convert/subversion.py
--- a/hgext/convert/subversion.py	Thu Jan 10 12:07:18 2008 +0300
+++ b/hgext/convert/subversion.py	Fri Jan 11 06:07:43 2008 +0300
@@ -707,27 +707,6 @@ class svn_sink(converter_sink, commandli
  class svn_sink(converter_sink, commandline):
      commit_re = re.compile(r'Committed revision (\d+).', re.M)
  
-    # iterates sublist of given list for concatenated length is within limit
-    def limit_arglist(self, files):
-        if os.name != 'nt':
-            yield files
-            return
-        # When I tested on WinXP, limit = 2500 is NG, 2400 is OK
-        limit = 2000
-        bytes = 0
-        fl = []
-        for fn in files:
-            b = len(fn) + 1
-            if bytes + b < limit:
-                fl.append(fn)
-                bytes += b
-            else:
-                yield fl
-                fl = [fn]
-                bytes = b
-        if fl:
-            yield fl
-
      def prerun(self):
          if self.wc:
              os.chdir(self.wc)
@@ -866,14 +845,12 @@ class svn_sink(converter_sink, commandli
                      if not os.path.exists(self.wjoin(d, '.svn', 'entries'))]
          if add_dirs:
              add_dirs.sort()
-            for fl in self.limit_arglist(add_dirs):
-                self.run('add', non_recursive=True, quiet=True, *fl)
+            self.xargs(add_dirs, 'add', non_recursive=True, quiet=True)
          return add_dirs
  
      def add_files(self, files):
          if files:
-            for fl in self.limit_arglist(files):
-                self.run('add', quiet=True, *fl)
+            self.xargs(files, 'add', quiet=True)
          return files
  
      def tidy_dirs(self, names):
@@ -907,18 +884,15 @@ class svn_sink(converter_sink, commandli
                  self._copyfile(s, d)
              self.copies = []
          if self.delete:
-            for fl in self.limit_arglist(self.delete):
-                self.run0('delete', *fl)
+            self.xargs(self.delete, 'delete')
              self.delete = []
          entries.update(self.add_files(files.difference(entries)))
          entries.update(self.tidy_dirs(entries))
          if self.delexec:
-            for fl in self.limit_arglist(self.delexec):
-                self.run0('propdel', 'svn:executable', *fl)
+            self.xargs(self.delexec, 'propdel', 'svn:executable')
              self.delexec = []
          if self.setexec:
-            for fl in self.limit_arglist(self.setexec):
-                self.run0('propset', 'svn:executable', '*', *fl)
+            self.xargs(self.setexec, 'propset', 'svn:executable', '*')
              self.setexec = []
  
          fd, messagefile = tempfile.mkstemp(prefix='hg-convert-')


More information about the Mercurial-devel mailing list