[PATCH V2] revset: introduce new operator "##" to concatenate strings/symbols at runtime

FUJIWARA Katsunori foozy at lares.dti.ne.jp
Tue Jan 6 15:06:53 UTC 2015


# HG changeset patch
# User FUJIWARA Katsunori <foozy at lares.dti.ne.jp>
# Date 1420555578 -32400
#      Tue Jan 06 23:46:18 2015 +0900
# Node ID c5247f7811031426c8b5469284e6e15d8ca415d3
# Parent  d944492445fa0d0b9c164336afab68127080a1f3
revset: introduce new operator "##" to concatenate strings/symbols at runtime

Before this patch, there is no way to concatenate strings at runtime.

For example, to search for the issue ID "1234" in descriptions against
all of "issue 1234", "issue:1234", issue1234" and "bug(1234)"
patterns, the revset below should be written fully from scratch for
each issue ID.

    grep(r"\bissue[ :]?1234\b|\bbug\(1234\)")

This patch introduces new infix operator "##" to concatenate
strings/symbols at runtime. Operator symbol "##" comes from the same
one of C pre-processor. This concatenation allows parametrizing a part
of strings in revset queries.

In the case of example above, the definition of the revset alias using
operator "##" below can search issue ID "1234" in complicated patterns
by "issue(1234)" simply:

    issue($1) = grep(r"\bissue[ :]?" ## $1 ## r"\b|\bbug\(" ## $1 ## r"\)")

"##" operator does:

  - concatenate not only strings but also symbols into the string

    Exact distinction between strings and symbols seems not to be
    convenience, because it is tiresome for users (and
    "revset.getstring" treats both similarly)

    For example of revset alias "issue()", "issue(1234)" is easier
    than "issue('1234')".

  - have higher priority than any other prefix, infix and postfix
    operators (like as "##" of C pre-processor)

    This patch (re-)assigns the priority 20 to "##", and 21 to "(",
    because priority 19 is already assigned to "-" as prefix "negate".

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -2840,6 +2840,10 @@
         newtree = revset.findaliases(ui, tree)
         if newtree != tree:
             ui.note(revset.prettyformat(newtree), "\n")
+        tree = newtree
+        newtree = revset.foldconcat(tree)
+        if newtree != tree:
+            ui.note(revset.prettyformat(newtree), "\n")
         if opts["optimize"]:
             weight, optimizedtree = revset.optimize(newtree, True)
             ui.note("* optimized:\n", revset.prettyformat(optimizedtree), "\n")
diff --git a/mercurial/help/revsets.txt b/mercurial/help/revsets.txt
--- a/mercurial/help/revsets.txt
+++ b/mercurial/help/revsets.txt
@@ -81,6 +81,19 @@
 defines three aliases, ``h``, ``d``, and ``rs``. ``rs(0:tip, author)`` is
 exactly equivalent to ``reverse(sort(0:tip, author))``.
 
+An infix operator ``##`` can concatenate strings and identifiers into
+one string. For example::
+
+  [revsetalias]
+  issue($1) = grep(r'\bissue[ :]?' ## $1 ## r'\b|\bbug\(' ## $1 ## r'\)')
+
+``issue(1234)`` is equivalent to ``grep(r'\bissue[ :]?1234\b|\bbug\(1234\)')``
+in this case. This matches against all of "issue 1234", "issue:1234",
+"issue1234" and "bug(1234)".
+
+All other prefix, infix and postfix operators have lower priority than
+``##``. For example, ``$1 ## $2~2`` is equivalent to ``($1 ## $2)~2``.
+
 Command line equivalents for :hg:`log`::
 
   -f    ->  ::.
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -102,7 +102,8 @@
     return baseset(sorted(reachable))
 
 elements = {
-    "(": (20, ("group", 1, ")"), ("func", 1, ")")),
+    "(": (21, ("group", 1, ")"), ("func", 1, ")")),
+    "##": (20, None, ("_concat", 20)),
     "~": (18, None, ("ancestor", 18)),
     "^": (18, None, ("parent", 18), ("parentpost", 18)),
     "-": (5, ("negate", 19), ("minus", 5)),
@@ -148,6 +149,9 @@
         elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
             yield ('..', None, pos)
             pos += 1 # skip ahead
+        elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
+            yield ('##', None, pos)
+            pos += 1 # skip ahead
         elif c in "():,-|&+!~^": # handle simple operators
             yield (c, None, pos)
         elif (c in '"\'' or c == 'r' and
@@ -2155,6 +2159,27 @@
                 alias.warned = True
     return tree
 
+def foldconcat(tree):
+    """Fold elements to be concatenated by `##`
+    """
+    if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
+        return tree
+    if tree[0] == '_concat':
+        pending = [tree]
+        l = []
+        while pending:
+            e = pending.pop()
+            if e[0] == '_concat':
+                pending.extend(reversed(e[1:]))
+            elif e[0] in ('string', 'symbol'):
+                l.append(e[1])
+            else:
+                msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
+                raise error.ParseError(msg)
+        return ('string', ''.join(l))
+    else:
+        return tuple(foldconcat(t) for t in tree)
+
 def parse(spec, lookup=None):
     p = parser.parser(tokenize, elements)
     return p.parse(spec, lookup=lookup)
@@ -2170,6 +2195,7 @@
         raise error.ParseError(_("invalid token"), pos)
     if ui:
         tree = findaliases(ui, tree, showwarning=ui.warn)
+    tree = foldconcat(tree)
     weight, tree = optimize(tree, True)
     def mfunc(repo, subset):
         if util.safehasattr(subset, 'isascending'):
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -1123,6 +1123,54 @@
   $ cd ../repo
   $ log 'remote(".a.b.c.", "../remote3")'
 
+tests for concatenation of strings/symbols by "##"
+
+  $ try "278 ## '5f5' ## 1ee ## 'ce5'"
+  (_concat
+    (_concat
+      (_concat
+        ('symbol', '278')
+        ('string', '5f5'))
+      ('symbol', '1ee'))
+    ('string', 'ce5'))
+  ('string', '2785f51eece5')
+  0
+
+  $ echo 'cat4($1, $2, $3, $4) = $1 ## $2 ## $3 ## $4' >> .hg/hgrc
+  $ try "cat4(278, '5f5', 1ee, 'ce5')"
+  (func
+    ('symbol', 'cat4')
+    (list
+      (list
+        (list
+          ('symbol', '278')
+          ('string', '5f5'))
+        ('symbol', '1ee'))
+      ('string', 'ce5')))
+  (_concat
+    (_concat
+      (_concat
+        ('symbol', '278')
+        ('string', '5f5'))
+      ('symbol', '1ee'))
+    ('string', 'ce5'))
+  ('string', '2785f51eece5')
+  0
+
+(check concatenation in alias nesting)
+
+  $ echo 'cat2($1, $2) = $1 ## $2' >> .hg/hgrc
+  $ echo 'cat2x2($1, $2, $3, $4) = cat2($1 ## $2, $3 ## $4)' >> .hg/hgrc
+  $ log "cat2x2(278, '5f5', 1ee, 'ce5')"
+  0
+
+(check operator priority)
+
+  $ echo 'cat2n2($1, $2, $3, $4) = $1 ## $2 or $3 ## $4~2' >> .hg/hgrc
+  $ log "cat2n2(2785f5, 1eece5, 24286f, 4ae135)"
+  0
+  4
+
   $ cd ..
 
 test author/desc/keyword in problematic encoding


More information about the Mercurial-devel mailing list