[PATCH 1 of 2] parser: accept iterator of tokens instead of tokenizer function and program

Yuya Nishihara yuya at tcha.org
Sun Jun 21 08:28:41 UTC 2015


# HG changeset patch
# User Yuya Nishihara <yuya at tcha.org>
# Date 1434815366 -32400
#      Sun Jun 21 00:49:26 2015 +0900
# Node ID f872d2a17b67f9c34140cb9a94279ce910985cc4
# Parent  7fdd1782fc4ee9da87d8af13e806dc9055db2c38
parser: accept iterator of tokens instead of tokenizer function and program

This can simplify the interface of parse() function. Our tokenizer tends to
have optional arguments other than the message to be parsed.

Before this patch, the "lookup" argument existed only for the revset, and the
templater had to pack [program, start, end] to be passed to its tokenizer.

diff --git a/mercurial/fileset.py b/mercurial/fileset.py
--- a/mercurial/fileset.py
+++ b/mercurial/fileset.py
@@ -80,8 +80,8 @@ def tokenize(program):
     yield ('end', None, pos)
 
 def parse(expr):
-    p = parser.parser(tokenize, elements)
-    tree, pos = p.parse(expr)
+    p = parser.parser(elements)
+    tree, pos = p.parse(tokenize(expr))
     if pos != len(expr):
         raise error.ParseError(_("invalid token"), pos)
     return tree
diff --git a/mercurial/parser.py b/mercurial/parser.py
--- a/mercurial/parser.py
+++ b/mercurial/parser.py
@@ -19,8 +19,7 @@ import error
 from i18n import _
 
 class parser(object):
-    def __init__(self, tokenizer, elements, methods=None):
-        self._tokenizer = tokenizer
+    def __init__(self, elements, methods=None):
         self._elements = elements
         self._methods = methods
         self.current = None
@@ -72,12 +71,9 @@ class parser(object):
                     if len(infix) == 3:
                         self._match(infix[2], pos)
         return expr
-    def parse(self, message, lookup=None):
-        'generate a parse tree from a message'
-        if lookup:
-            self._iter = self._tokenizer(message, lookup)
-        else:
-            self._iter = self._tokenizer(message)
+    def parse(self, tokeniter):
+        'generate a parse tree from tokens'
+        self._iter = tokeniter
         self._advance()
         res = self._parse()
         token, value, pos = self.current
@@ -87,9 +83,9 @@ class parser(object):
         if not isinstance(tree, tuple):
             return tree
         return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
-    def __call__(self, message):
-        'parse a message into a parse tree and evaluate if methods given'
-        t = self.parse(message)
+    def __call__(self, tokeniter):
+        'parse tokens into a parse tree and evaluate if methods given'
+        t = self.parse(tokeniter)
         if self._methods:
             return self.eval(t)
         return t
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2383,9 +2383,9 @@ def _parsealiasdecl(decl):
     >>> _parsealiasdecl('foo($1, $2, $1)')
     ('foo', None, None, 'argument names collide with each other')
     """
-    p = parser.parser(_tokenizealias, elements)
+    p = parser.parser(elements)
     try:
-        tree, pos = p.parse(decl)
+        tree, pos = p.parse(_tokenizealias(decl))
         if (pos != len(decl)):
             raise error.ParseError(_('invalid token'), pos)
 
@@ -2474,8 +2474,8 @@ def _parsealiasdefn(defn, args):
                                            pos)
             yield (t, value, pos)
 
-    p = parser.parser(tokenizedefn, elements)
-    tree, pos = p.parse(defn)
+    p = parser.parser(elements)
+    tree, pos = p.parse(tokenizedefn(defn))
     if pos != len(defn):
         raise error.ParseError(_('invalid token'), pos)
     return parser.simplifyinfixops(tree, ('or',))
@@ -2605,8 +2605,8 @@ def foldconcat(tree):
         return tuple(foldconcat(t) for t in tree)
 
 def parse(spec, lookup=None):
-    p = parser.parser(tokenize, elements)
-    tree, pos = p.parse(spec, lookup=lookup)
+    p = parser.parser(elements)
+    tree, pos = p.parse(tokenize(spec, lookup=lookup))
     if pos != len(spec):
         raise error.ParseError(_("invalid token"), pos)
     return parser.simplifyinfixops(tree, ('or',))
diff --git a/mercurial/templater.py b/mercurial/templater.py
--- a/mercurial/templater.py
+++ b/mercurial/templater.py
@@ -27,8 +27,7 @@ elements = {
     "end": (0, None, None),
 }
 
-def tokenizer(data):
-    program, start, end = data
+def tokenize(program, start, end):
     pos = start
     while pos < end:
         c = program[pos]
@@ -96,7 +95,7 @@ def tokenizer(data):
 def compiletemplate(tmpl, context):
     parsed = []
     pos, stop = 0, len(tmpl)
-    p = parser.parser(tokenizer, elements)
+    p = parser.parser(elements)
     while pos < stop:
         n = tmpl.find('{', pos)
         if n < 0:
@@ -111,8 +110,7 @@ def compiletemplate(tmpl, context):
         if n > pos:
             parsed.append(('string', tmpl[pos:n]))
 
-        pd = [tmpl, n + 1, stop]
-        parseres, pos = p.parse(pd)
+        parseres, pos = p.parse(tokenize(tmpl, n + 1, stop))
         parsed.append(parseres)
 
     return [compileexp(e, context, methods) for e in parsed]


More information about the Mercurial-devel mailing list