[PATCH] extdiff: proof of concept of refactoring to use cmdlines & avoid shlex.split

FUJIWARA Katsunori foozy at lares.dti.ne.jp
Mon Dec 8 09:29:19 CST 2014


At Sun, 07 Dec 2014 16:25:01 +0100,
Mads Kiilerich wrote:
> 
> # HG changeset patch
> # User Mads Kiilerich <madski at unity3d.com>
> # Date 1417965870 -3600
> #      Sun Dec 07 16:24:30 2014 +0100
> # Node ID 7017dac8c3cd3abf1b48f290a61e434caae754c3
> # Parent  c237499a7fba65c88a2da721a22b66df4f39cf4e
> extdiff: proof of concept of refactoring to use cmdlines & avoid shlex.split

This looks very good to me !

I added some comments below.

> diff --git a/hgext/extdiff.py b/hgext/extdiff.py
> --- a/hgext/extdiff.py
> +++ b/hgext/extdiff.py
> @@ -109,7 +109,7 @@ def snapshot(ui, repo, files, node, tmpr
>                                    os.lstat(dest).st_mtime))
>      return dirname, fns_and_mtime
>  
> -def dodiff(ui, repo, diffcmd, diffopts, pats, opts):
> +def dodiff(ui, repo, cmdline, pats, opts):
>      '''Do the actual diff:
>  
>      - copy to a temp structure if diffing 2 internal revisions
> @@ -120,8 +120,7 @@ def dodiff(ui, repo, diffcmd, diffopts, 
>  
>      revs = opts.get('rev')
>      change = opts.get('change')
> -    args = ' '.join(map(util.shellquote, diffopts))
> -    do3way = '$parent2' in args
> +    do3way = '$parent2' in cmdline
>  
>      if revs and change:
>          msg = _('cannot specify --rev and --change at the same time')
> @@ -205,25 +204,24 @@ def dodiff(ui, repo, diffcmd, diffopts, 
>              dir2 = os.path.join(dir2root, dir2, common_file)
>              label2 = common_file + rev2
>  
> -        # Function to quote file/dir names in the argument string.
> +        # Function to insert quoted file/dir names in the argument string.
>          # When not operating in 3-way mode, an empty string is
>          # returned for parent2
>          replace = {'parent': dir1a, 'parent1': dir1a, 'parent2': dir1b,
>                     'plabel1': label1a, 'plabel2': label1b,
>                     'clabel': label2, 'child': dir2,
>                     'root': repo.root}
> -        def quote(match):
> +        def replacer(match):
>              key = match.group()[1:]
>              if not do3way and key == 'parent2':
>                  return ''
>              return util.shellquote(replace[key])
>  
> -        # Match parent2 first, so 'parent1?' will match both parent1 and parent
> -        regex = '\$(parent2|parent1?|child|plabel1|plabel2|clabel|root)'
> -        if not do3way and not re.search(regex, args):
> -            args += ' $parent1 $child'
> -        args = re.sub(regex, quote, args)
> -        cmdline = util.shellquote(diffcmd) + ' ' + args
> +        # Regexps are greedy so 'parent1?' will not match 'parent2'
> +        regex = '\$(parent1?|parent2|child|plabel1|plabel2|clabel|root)'
> +        if not do3way and not re.search(regex, cmdline):
> +            cmdline += ' $parent1 $child'
> +        cmdline = re.sub(regex, replacer, cmdline)
>  
>          ui.debug('running %r in %s\n' % (cmdline, tmproot))
>          ui.system(cmdline, cwd=tmproot)

These changes seem to become more readable by dividing them into
patches (1) just integrating "diffcmd" and "diffopts" into "args", (2)
renaming from "args" to "cmdline", and (3) refactoring comments,
regular expression, function name and so on (please ignore this
comment, if these are mixed just for readability of entire PoC).


> @@ -267,43 +265,57 @@ def extdiff(ui, repo, *pats, **opts):
>      revisions are specified, the working directory files are compared
>      to its parent.'''
>      program = opts.get('program')
> -    option = opts.get('option')
> +    options = opts.get('option')
>      if not program:
>          program = 'diff'
> -        option = option or ['-Npru']
> -    return dodiff(ui, repo, program, option, pats, opts)
> +        if not options:
> +            options = ['-Npru']
> +    cmdline = ' '.join(map(util.shellquote, [program] + options))
> +    return dodiff(ui, repo, cmdline, pats, opts)
>  
>  def uisetup(ui):
>      for cmd, path in ui.configitems('extdiff'):
>          if cmd.startswith('cmd.'):
>              cmd = cmd[4:]
> -            if not path:
> -                path = util.findexe(cmd)
> -                if path is None:
> -                    path = filemerge.findexternaltool(ui, cmd) or cmd
> -            diffopts = shlex.split(ui.config('extdiff', 'opts.' + cmd, ''))
> +            exe = path
> +            if not exe:
> +                exe = util.findexe(cmd)
> +                if exe is None:
> +                    exe = filemerge.findexternaltool(ui, cmd)
> +                if exe is None:
> +                    exe = cmd
> +            cmdline = util.shellquote(exe)
> +            diffopts = ui.config('extdiff', 'opts.' + cmd, '')
> +            if diffopts:
> +                cmdline += ' ' + diffopts
>          elif cmd.startswith('opts.'):
>              continue
>          else:
> -            # command = path opts
> +            # case "cmd = exename diffoptions"
>              if path:
> -                diffopts = shlex.split(path)
> -                path = diffopts.pop(0)
> +                cmdline = path
> +                diffopts = len(shlex.split(cmdline)) > 1
>              else:
> -                path, diffopts = util.findexe(cmd), []
> -                if path is None:
> -                    path = filemerge.findexternaltool(ui, cmd) or cmd
> +                exe = util.findexe(cmd)
> +                if exe is None:
> +                    exe = filemerge.findexternaltool(ui, cmd)
> +                if exe is None:
> +                    exe = cmd
> +                cmdline = util.shellquote(exe)
> +                diffopts = False
>          # look for diff arguments in [diff-tools] then [merge-tools]
> -        if diffopts == []:
> -            args = ui.config('diff-tools', cmd+'.diffargs') or \
> -                   ui.config('merge-tools', cmd+'.diffargs')
> +        if not diffopts:
> +            args = ui.config('diff-tools', cmd + '.diffargs') or \
> +                   ui.config('merge-tools', cmd + '.diffargs')
>              if args:
> -                diffopts = shlex.split(args)
> -        def save(cmd, path, diffopts):
> +                cmdline += ' ' + args
> +        def save(cmdline):
>              '''use closure to save diff command to use'''
>              def mydiff(ui, repo, *pats, **opts):
> -                return dodiff(ui, repo, path, diffopts + opts['option'],
> -                              pats, opts)
> +                options = ' '.join(map(util.shellquote, opts['option']))
> +                if options:
> +                    options = ' ' + options
> +                return dodiff(ui, repo, cmdline + options, pats, opts)
>              doc = _('''\
>  use %(path)s to diff repository (or selected files)
>  
> @@ -325,6 +337,6 @@ use %(path)s to diff repository (or sele
>              # right encoding) prevents that.
>              mydiff.__doc__ = doc.decode(encoding.encoding)
>              return mydiff
> -        cmdtable[cmd] = (save(cmd, path, diffopts),
> +        cmdtable[cmd] = (save(cmdline),
>                           cmdtable['extdiff'][1][1:],
>                           _('hg %s [OPTION]... [FILE]...') % cmd)
> diff --git a/mercurial/posix.py b/mercurial/posix.py
> --- a/mercurial/posix.py
> +++ b/mercurial/posix.py
> @@ -8,7 +8,7 @@
>  from i18n import _
>  import encoding
>  import os, sys, errno, stat, getpass, pwd, grp, socket, tempfile, unicodedata
> -import fcntl
> +import fcntl, re
>  
>  posixfile = open
>  normpath = os.path.normpath
> @@ -312,11 +312,17 @@ if sys.platform == 'cygwin':
>      def checklink(path):
>          return False
>  
> +_needsshellquote = None
>  def shellquote(s):
> +    global _needsshellquote
> +    if _needsshellquote is None:
> +        _needsshellquote = re.compile(r'[^a-zA-Z0-9._/-]').search
>      if os.sys.platform == 'OpenVMS':
>          return '"%s"' % s
> +    elif _needsshellquote(s):
> +        return "'%s'" % s.replace("'", "'\\''")
>      else:
> -        return "'%s'" % s.replace("'", "'\\''")
> +        return s
>  
>  def quotecommand(cmd):
>      return cmd

This kind of changes should be applied also on "windows.shellquote",
shouldn't it ?


BTW, in fact, I thought that "posix.shellquote" doesn't have to check
needs of quoting, because quoting always works correctly on POSIX, at
first.

But for portability of test-extdiff.t, "posix.shellquote" seems to
have to do so, too: checking should be much cheaper than fork/exec-ing
other process.


> diff --git a/tests/test-extdiff.t b/tests/test-extdiff.t
> --- a/tests/test-extdiff.t
> +++ b/tests/test-extdiff.t
> @@ -207,7 +207,7 @@ Fallback to merge-tools.tool.executable|
>    making snapshot of 2 files from working directory
>      a
>      b
> -  running "'$TESTTMP/a/dir/tool.sh'  'a.*' 'a'" in */extdiff.* (glob)
> +  running '$TESTTMP/a/dir/tool.sh a.* a' in */extdiff.* (glob)
>    ** custom diff **
>    cleaning up temp directory
>    [1]
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

----------------------------------------------------------------------
[FUJIWARA Katsunori]                             foozy at lares.dti.ne.jp


More information about the Mercurial-devel mailing list