grawkit/play/vendor/github.com/benhoyt/goawk/parser/parser.go

1049 lines
26 KiB
Go

// Package parser is an AWK parser and abstract syntax tree.
//
// Use the ParseProgram function to parse an AWK program, and then give the
// result to interp.Exec, interp.ExecProgram, or interp.New to execute it.
package parser
import (
"fmt"
"io"
"regexp"
"strconv"
"strings"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/compiler"
. "github.com/benhoyt/goawk/lexer"
)
// ParseError (actually *ParseError) is the type of error returned by
// ParseProgram.
type ParseError struct {
// Source line/column position where the error occurred.
Position Position
// Error message.
Message string
}
// Error returns a formatted version of the error, including the line
// and column numbers.
func (e *ParseError) Error() string {
return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message)
}
// ParserConfig lets you specify configuration for the parsing
// process (for example printing type information for debugging).
type ParserConfig struct {
// Enable printing of type information
DebugTypes bool
// io.Writer to print type information on (for example, os.Stderr)
DebugWriter io.Writer
// Map of named Go functions to allow calling from AWK. See docs
// on interp.Config.Funcs for details.
Funcs map[string]interface{}
}
// ParseProgram parses an entire AWK program, returning the *Program
// abstract syntax tree or a *ParseError on error. "config" describes
// the parser configuration (and is allowed to be nil).
func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) {
defer func() {
// The parser uses panic with a *ParseError to signal parsing
// errors internally, and they're caught here. This
// significantly simplifies the recursive descent calls as
// we don't have to check errors everywhere.
if r := recover(); r != nil {
// Convert to ParseError or re-panic
err = r.(*ParseError)
}
}()
lexer := NewLexer(src)
p := parser{lexer: lexer}
if config != nil {
p.debugTypes = config.DebugTypes
p.debugWriter = config.DebugWriter
p.nativeFuncs = config.Funcs
}
p.initResolve()
p.next() // initialize p.tok
// Parse into abstract syntax tree
prog = p.program()
// Compile to virtual machine code
prog.Compiled, err = compiler.Compile(prog.toAST())
return prog, err
}
// Program is the parsed and compiled representation of an entire AWK program.
type Program struct {
// These fields aren't intended to be used or modified directly,
// but are exported for the interpreter (Program itself needs to
// be exported in package "parser", otherwise these could live in
// "internal/ast".)
Begin []ast.Stmts
Actions []ast.Action
End []ast.Stmts
Functions []ast.Function
Scalars map[string]int
Arrays map[string]int
Compiled *compiler.Program
}
// String returns an indented, pretty-printed version of the parsed
// program.
func (p *Program) String() string {
return p.toAST().String()
}
// Disassemble writes a human-readable form of the program's virtual machine
// instructions to writer.
func (p *Program) Disassemble(writer io.Writer) error {
return p.Compiled.Disassemble(writer)
}
// toAST converts the *Program to an *ast.Program.
func (p *Program) toAST() *ast.Program {
return &ast.Program{
Begin: p.Begin,
Actions: p.Actions,
End: p.End,
Functions: p.Functions,
Scalars: p.Scalars,
Arrays: p.Arrays,
}
}
// Parser state
type parser struct {
// Lexer instance and current token values
lexer *Lexer
pos Position // position of last token (tok)
tok Token // last lexed token
prevTok Token // previously lexed token
val string // string value of last token (or "")
// Parsing state
inAction bool // true if parsing an action (false in BEGIN or END)
funcName string // function name if parsing a func, else ""
loopDepth int // current loop depth (0 if not in any loops)
// Variable tracking and resolving
locals map[string]bool // current function's locals (for determining scope)
varTypes map[string]map[string]typeInfo // map of func name to var name to type
varRefs []varRef // all variable references (usually scalars)
arrayRefs []arrayRef // all array references
multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions
// Function tracking
functions map[string]int // map of function name to index
userCalls []userCall // record calls so we can resolve them later
nativeFuncs map[string]interface{}
// Configuration and debugging
debugTypes bool // show variable types for debugging
debugWriter io.Writer // where the debug output goes
}
// Parse an entire AWK program.
func (p *parser) program() *Program {
prog := &Program{}
p.optionalNewlines()
for p.tok != EOF {
switch p.tok {
case BEGIN:
p.next()
prog.Begin = append(prog.Begin, p.stmtsBrace())
case END:
p.next()
prog.End = append(prog.End, p.stmtsBrace())
case FUNCTION:
function := p.function()
p.addFunction(function.Name, len(prog.Functions))
prog.Functions = append(prog.Functions, function)
default:
p.inAction = true
// Allow empty pattern, normal pattern, or range pattern
pattern := []ast.Expr{}
if !p.matches(LBRACE, EOF) {
pattern = append(pattern, p.expr())
}
if !p.matches(LBRACE, EOF, NEWLINE) {
p.commaNewlines()
pattern = append(pattern, p.expr())
}
// Or an empty action (equivalent to { print $0 })
action := ast.Action{pattern, nil}
if p.tok == LBRACE {
action.Stmts = p.stmtsBrace()
}
prog.Actions = append(prog.Actions, action)
p.inAction = false
}
p.optionalNewlines()
}
p.resolveUserCalls(prog)
p.resolveVars(prog)
p.checkMultiExprs()
return prog
}
// Parse a list of statements.
func (p *parser) stmts() ast.Stmts {
switch p.tok {
case SEMICOLON:
// This is so things like this parse correctly:
// BEGIN { for (i=0; i<10; i++); print "x" }
p.next()
return nil
case LBRACE:
return p.stmtsBrace()
default:
return []ast.Stmt{p.stmt()}
}
}
// Parse a list of statements surrounded in {...} braces.
func (p *parser) stmtsBrace() ast.Stmts {
p.expect(LBRACE)
p.optionalNewlines()
ss := []ast.Stmt{}
for p.tok != RBRACE && p.tok != EOF {
ss = append(ss, p.stmt())
}
p.expect(RBRACE)
if p.tok == SEMICOLON {
p.next()
}
return ss
}
// Parse a "simple" statement (eg: allowed in a for loop init clause).
func (p *parser) simpleStmt() ast.Stmt {
switch p.tok {
case PRINT, PRINTF:
op := p.tok
p.next()
args := p.exprList(p.printExpr)
if len(args) == 1 {
// This allows parens around all the print args
if m, ok := args[0].(*ast.MultiExpr); ok {
args = m.Exprs
p.useMultiExpr(m)
}
}
redirect := ILLEGAL
var dest ast.Expr
if p.matches(GREATER, APPEND, PIPE) {
redirect = p.tok
p.next()
dest = p.expr()
}
if op == PRINT {
return &ast.PrintStmt{args, redirect, dest}
} else {
if len(args) == 0 {
panic(p.errorf("expected printf args, got none"))
}
return &ast.PrintfStmt{args, redirect, dest}
}
case DELETE:
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
var index []ast.Expr
if p.tok == LBRACKET {
p.next()
index = p.exprList(p.expr)
if len(index) == 0 {
panic(p.errorf("expected expression instead of ]"))
}
p.expect(RBRACKET)
}
return &ast.DeleteStmt{ref, index}
case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN:
panic(p.errorf("expected print/printf, delete, or expression"))
default:
return &ast.ExprStmt{p.expr()}
}
}
// Parse any top-level statement.
func (p *parser) stmt() ast.Stmt {
for p.matches(SEMICOLON, NEWLINE) {
p.next()
}
var s ast.Stmt
switch p.tok {
case IF:
p.next()
p.expect(LPAREN)
cond := p.expr()
p.expect(RPAREN)
p.optionalNewlines()
body := p.stmts()
p.optionalNewlines()
var elseBody ast.Stmts
if p.tok == ELSE {
p.next()
p.optionalNewlines()
elseBody = p.stmts()
}
s = &ast.IfStmt{cond, body, elseBody}
case FOR:
// Parse for statement, either "for in" or C-like for loop.
//
// FOR LPAREN NAME IN NAME RPAREN NEWLINE* stmts |
// FOR LPAREN [simpleStmt] SEMICOLON NEWLINE*
// [expr] SEMICOLON NEWLINE*
// [simpleStmt] RPAREN NEWLINE* stmts
//
p.next()
p.expect(LPAREN)
var pre ast.Stmt
if p.tok != SEMICOLON {
pre = p.simpleStmt()
}
if pre != nil && p.tok == RPAREN {
// Match: for (var in array) body
p.next()
p.optionalNewlines()
exprStmt, ok := pre.(*ast.ExprStmt)
if !ok {
panic(p.errorf("expected 'for (var in array) ...'"))
}
inExpr, ok := exprStmt.Expr.(*ast.InExpr)
if !ok {
panic(p.errorf("expected 'for (var in array) ...'"))
}
if len(inExpr.Index) != 1 {
panic(p.errorf("expected 'for (var in array) ...'"))
}
varExpr, ok := inExpr.Index[0].(*ast.VarExpr)
if !ok {
panic(p.errorf("expected 'for (var in array) ...'"))
}
body := p.loopStmts()
s = &ast.ForInStmt{varExpr, inExpr.Array, body}
} else {
// Match: for ([pre]; [cond]; [post]) body
p.expect(SEMICOLON)
p.optionalNewlines()
var cond ast.Expr
if p.tok != SEMICOLON {
cond = p.expr()
}
p.expect(SEMICOLON)
p.optionalNewlines()
var post ast.Stmt
if p.tok != RPAREN {
post = p.simpleStmt()
}
p.expect(RPAREN)
p.optionalNewlines()
body := p.loopStmts()
s = &ast.ForStmt{pre, cond, post, body}
}
case WHILE:
p.next()
p.expect(LPAREN)
cond := p.expr()
p.expect(RPAREN)
p.optionalNewlines()
body := p.loopStmts()
s = &ast.WhileStmt{cond, body}
case DO:
p.next()
p.optionalNewlines()
body := p.loopStmts()
p.expect(WHILE)
p.expect(LPAREN)
cond := p.expr()
p.expect(RPAREN)
s = &ast.DoWhileStmt{body, cond}
case BREAK:
if p.loopDepth == 0 {
panic(p.errorf("break must be inside a loop body"))
}
p.next()
s = &ast.BreakStmt{}
case CONTINUE:
if p.loopDepth == 0 {
panic(p.errorf("continue must be inside a loop body"))
}
p.next()
s = &ast.ContinueStmt{}
case NEXT:
if !p.inAction && p.funcName == "" {
panic(p.errorf("next can't be inside BEGIN or END"))
}
p.next()
s = &ast.NextStmt{}
case EXIT:
p.next()
var status ast.Expr
if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
status = p.expr()
}
s = &ast.ExitStmt{status}
case RETURN:
if p.funcName == "" {
panic(p.errorf("return must be inside a function"))
}
p.next()
var value ast.Expr
if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
value = p.expr()
}
s = &ast.ReturnStmt{value}
case LBRACE:
body := p.stmtsBrace()
s = &ast.BlockStmt{body}
default:
s = p.simpleStmt()
}
// Ensure statements are separated by ; or newline
if !p.matches(NEWLINE, SEMICOLON, RBRACE) && p.prevTok != NEWLINE && p.prevTok != SEMICOLON && p.prevTok != RBRACE {
panic(p.errorf("expected ; or newline between statements"))
}
for p.matches(NEWLINE, SEMICOLON) {
p.next()
}
return s
}
// Same as stmts(), but tracks that we're in a loop (as break and
// continue can only occur inside a loop).
func (p *parser) loopStmts() ast.Stmts {
p.loopDepth++
ss := p.stmts()
p.loopDepth--
return ss
}
// Parse a function definition and body. As it goes, this resolves
// the local variable indexes and tracks which parameters are array
// parameters.
func (p *parser) function() ast.Function {
if p.funcName != "" {
// Should never actually get here (FUNCTION token is only
// handled at the top level), but just in case.
panic(p.errorf("can't nest functions"))
}
p.next()
name := p.val
if _, ok := p.functions[name]; ok {
panic(p.errorf("function %q already defined", name))
}
p.expect(NAME)
p.expect(LPAREN)
first := true
params := make([]string, 0, 7) // pre-allocate some to reduce allocations
p.locals = make(map[string]bool, 7)
for p.tok != RPAREN {
if !first {
p.commaNewlines()
}
first = false
param := p.val
if param == name {
panic(p.errorf("can't use function name as parameter name"))
}
if p.locals[param] {
panic(p.errorf("duplicate parameter name %q", param))
}
p.expect(NAME)
params = append(params, param)
p.locals[param] = true
}
p.expect(RPAREN)
p.optionalNewlines()
// Parse the body
p.startFunction(name, params)
body := p.stmtsBrace()
p.stopFunction()
p.locals = nil
return ast.Function{name, params, nil, body}
}
// Parse expressions separated by commas: args to print[f] or user
// function call, or multi-dimensional index.
func (p *parser) exprList(parse func() ast.Expr) []ast.Expr {
exprs := []ast.Expr{}
first := true
for !p.matches(NEWLINE, SEMICOLON, RBRACE, RBRACKET, RPAREN, GREATER, PIPE, APPEND) {
if !first {
p.commaNewlines()
}
first = false
exprs = append(exprs, parse())
}
return exprs
}
// Here's where things get slightly interesting: only certain
// expression types are allowed in print/printf statements,
// presumably so `print a, b > "file"` is a file redirect instead of
// a greater-than comparison. So we kind of have two ways to recurse
// down here: expr(), which parses all expressions, and printExpr(),
// which skips PIPE GETLINE and GREATER expressions.
// Parse a single expression.
func (p *parser) expr() ast.Expr { return p.getLine() }
func (p *parser) printExpr() ast.Expr { return p._assign(p.printCond) }
// Parse an "expr | getline [lvalue]" expression:
//
// assign [PIPE GETLINE [lvalue]]
func (p *parser) getLine() ast.Expr {
expr := p._assign(p.cond)
if p.tok == PIPE {
p.next()
p.expect(GETLINE)
target := p.optionalLValue()
return &ast.GetlineExpr{expr, target, nil}
}
return expr
}
// Parse an = assignment expression:
//
// lvalue [assign_op assign]
//
// An lvalue is a variable name, an array[expr] index expression, or
// an $expr field expression.
func (p *parser) _assign(higher func() ast.Expr) ast.Expr {
expr := higher()
_, isNamedField := expr.(*ast.NamedFieldExpr)
if (isNamedField || ast.IsLValue(expr)) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN,
MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) {
if isNamedField {
panic(p.errorf("assigning @ expression not supported"))
}
op := p.tok
p.next()
right := p._assign(higher)
switch op {
case ASSIGN:
return &ast.AssignExpr{expr, right}
case ADD_ASSIGN:
op = ADD
case DIV_ASSIGN:
op = DIV
case MOD_ASSIGN:
op = MOD
case MUL_ASSIGN:
op = MUL
case POW_ASSIGN:
op = POW
case SUB_ASSIGN:
op = SUB
}
return &ast.AugAssignExpr{expr, op, right}
}
return expr
}
// Parse a ?: conditional expression:
//
// or [QUESTION NEWLINE* cond COLON NEWLINE* cond]
func (p *parser) cond() ast.Expr { return p._cond(p.or) }
func (p *parser) printCond() ast.Expr { return p._cond(p.printOr) }
func (p *parser) _cond(higher func() ast.Expr) ast.Expr {
expr := higher()
if p.tok == QUESTION {
p.next()
p.optionalNewlines()
t := p.expr()
p.expect(COLON)
p.optionalNewlines()
f := p.expr()
return &ast.CondExpr{expr, t, f}
}
return expr
}
// Parse an || or expression:
//
// and [OR NEWLINE* and] [OR NEWLINE* and] ...
func (p *parser) or() ast.Expr { return p.binaryLeft(p.and, true, OR) }
func (p *parser) printOr() ast.Expr { return p.binaryLeft(p.printAnd, true, OR) }
// Parse an && and expression:
//
// in [AND NEWLINE* in] [AND NEWLINE* in] ...
func (p *parser) and() ast.Expr { return p.binaryLeft(p.in, true, AND) }
func (p *parser) printAnd() ast.Expr { return p.binaryLeft(p.printIn, true, AND) }
// Parse an "in" expression:
//
// match [IN NAME] [IN NAME] ...
func (p *parser) in() ast.Expr { return p._in(p.match) }
func (p *parser) printIn() ast.Expr { return p._in(p.printMatch) }
func (p *parser) _in(higher func() ast.Expr) ast.Expr {
expr := higher()
for p.tok == IN {
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
expr = &ast.InExpr{[]ast.Expr{expr}, ref}
}
return expr
}
// Parse a ~ match expression:
//
// compare [MATCH|NOT_MATCH compare]
func (p *parser) match() ast.Expr { return p._match(p.compare) }
func (p *parser) printMatch() ast.Expr { return p._match(p.printCompare) }
func (p *parser) _match(higher func() ast.Expr) ast.Expr {
expr := higher()
if p.matches(MATCH, NOT_MATCH) {
op := p.tok
p.next()
right := p.regexStr(higher) // Not match() as these aren't associative
return &ast.BinaryExpr{expr, op, right}
}
return expr
}
// Parse a comparison expression:
//
// concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat]
func (p *parser) compare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) }
func (p *parser) printCompare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) }
func (p *parser) _compare(ops ...Token) ast.Expr {
expr := p.concat()
if p.matches(ops...) {
op := p.tok
p.next()
right := p.concat() // Not compare() as these aren't associative
return &ast.BinaryExpr{expr, op, right}
}
return expr
}
func (p *parser) concat() ast.Expr {
expr := p.add()
for p.matches(DOLLAR, AT, NOT, NAME, NUMBER, STRING, LPAREN, INCR, DECR) ||
p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC {
right := p.add()
expr = &ast.BinaryExpr{expr, CONCAT, right}
}
return expr
}
func (p *parser) add() ast.Expr {
return p.binaryLeft(p.mul, false, ADD, SUB)
}
func (p *parser) mul() ast.Expr {
return p.binaryLeft(p.pow, false, MUL, DIV, MOD)
}
func (p *parser) pow() ast.Expr {
// Note that pow (expr ^ expr) is right-associative
expr := p.preIncr()
if p.tok == POW {
p.next()
right := p.pow()
return &ast.BinaryExpr{expr, POW, right}
}
return expr
}
func (p *parser) preIncr() ast.Expr {
if p.tok == INCR || p.tok == DECR {
op := p.tok
p.next()
exprPos := p.pos
expr := p.preIncr()
if !ast.IsLValue(expr) {
panic(p.posErrorf(exprPos, "expected lvalue after ++ or --"))
}
return &ast.IncrExpr{expr, op, true}
}
return p.postIncr()
}
func (p *parser) postIncr() ast.Expr {
expr := p.primary()
if (p.tok == INCR || p.tok == DECR) && ast.IsLValue(expr) {
op := p.tok
p.next()
return &ast.IncrExpr{expr, op, false}
}
return expr
}
func (p *parser) primary() ast.Expr {
switch p.tok {
case NUMBER:
// AWK allows forms like "1.5e", but ParseFloat doesn't
s := strings.TrimRight(p.val, "eE")
n, _ := strconv.ParseFloat(s, 64)
p.next()
return &ast.NumExpr{n}
case STRING:
s := p.val
p.next()
return &ast.StrExpr{s}
case DIV, DIV_ASSIGN:
// If we get to DIV or DIV_ASSIGN as a primary expression,
// it's actually a regex.
regex := p.nextRegex()
return &ast.RegExpr{regex}
case DOLLAR:
p.next()
return &ast.FieldExpr{p.primary()}
case AT:
p.next()
return &ast.NamedFieldExpr{p.primary()}
case NOT, ADD, SUB:
op := p.tok
p.next()
return &ast.UnaryExpr{op, p.pow()}
case NAME:
name := p.val
namePos := p.pos
p.next()
if p.tok == LBRACKET {
// a[x] or a[x, y] array index expression
p.next()
index := p.exprList(p.expr)
if len(index) == 0 {
panic(p.errorf("expected expression instead of ]"))
}
p.expect(RBRACKET)
return &ast.IndexExpr{p.arrayRef(name, namePos), index}
} else if p.tok == LPAREN && !p.lexer.HadSpace() {
if p.locals[name] {
panic(p.errorf("can't call local variable %q as function", name))
}
// Grammar requires no space between function name and
// left paren for user function calls, hence the funky
// lexer.HadSpace() method.
return p.userCall(name, namePos)
}
return p.varRef(name, namePos)
case LPAREN:
parenPos := p.pos
p.next()
exprs := p.exprList(p.expr)
switch len(exprs) {
case 0:
panic(p.errorf("expected expression, not %s", p.tok))
case 1:
p.expect(RPAREN)
return exprs[0]
default:
// Multi-dimensional array "in" requires parens around index
p.expect(RPAREN)
if p.tok == IN {
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
return &ast.InExpr{exprs, ref}
}
// MultiExpr is used as a pseudo-expression for print[f] parsing.
return p.multiExpr(exprs, parenPos)
}
case GETLINE:
p.next()
target := p.optionalLValue()
var file ast.Expr
if p.tok == LESS {
p.next()
file = p.primary()
}
return &ast.GetlineExpr{nil, target, file}
// Below is the parsing of all the builtin function calls. We
// could unify these but several of them have special handling
// (array/lvalue/regex params, optional arguments, and so on).
// Doing it this way means we can check more at parse time.
case F_SUB, F_GSUB:
op := p.tok
p.next()
p.expect(LPAREN)
regex := p.regexStr(p.expr)
p.commaNewlines()
repl := p.expr()
args := []ast.Expr{regex, repl}
if p.tok == COMMA {
p.commaNewlines()
inPos := p.pos
in := p.expr()
if !ast.IsLValue(in) {
panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue"))
}
args = append(args, in)
}
p.expect(RPAREN)
return &ast.CallExpr{op, args}
case F_SPLIT:
p.next()
p.expect(LPAREN)
str := p.expr()
p.commaNewlines()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
args := []ast.Expr{str, ref}
if p.tok == COMMA {
p.commaNewlines()
args = append(args, p.regexStr(p.expr))
}
p.expect(RPAREN)
return &ast.CallExpr{F_SPLIT, args}
case F_MATCH:
p.next()
p.expect(LPAREN)
str := p.expr()
p.commaNewlines()
regex := p.regexStr(p.expr)
p.expect(RPAREN)
return &ast.CallExpr{F_MATCH, []ast.Expr{str, regex}}
case F_RAND:
p.next()
p.expect(LPAREN)
p.expect(RPAREN)
return &ast.CallExpr{F_RAND, nil}
case F_SRAND:
p.next()
p.expect(LPAREN)
var args []ast.Expr
if p.tok != RPAREN {
args = append(args, p.expr())
}
p.expect(RPAREN)
return &ast.CallExpr{F_SRAND, args}
case F_LENGTH:
p.next()
var args []ast.Expr
// AWK quirk: "length" is allowed to be called without parens
if p.tok == LPAREN {
p.next()
if p.tok != RPAREN {
args = append(args, p.expr())
}
p.expect(RPAREN)
}
return &ast.CallExpr{F_LENGTH, args}
case F_SUBSTR:
p.next()
p.expect(LPAREN)
str := p.expr()
p.commaNewlines()
start := p.expr()
args := []ast.Expr{str, start}
if p.tok == COMMA {
p.commaNewlines()
args = append(args, p.expr())
}
p.expect(RPAREN)
return &ast.CallExpr{F_SUBSTR, args}
case F_SPRINTF:
p.next()
p.expect(LPAREN)
args := []ast.Expr{p.expr()}
for p.tok == COMMA {
p.commaNewlines()
args = append(args, p.expr())
}
p.expect(RPAREN)
return &ast.CallExpr{F_SPRINTF, args}
case F_FFLUSH:
p.next()
p.expect(LPAREN)
var args []ast.Expr
if p.tok != RPAREN {
args = append(args, p.expr())
}
p.expect(RPAREN)
return &ast.CallExpr{F_FFLUSH, args}
case F_COS, F_SIN, F_EXP, F_LOG, F_SQRT, F_INT, F_TOLOWER, F_TOUPPER, F_SYSTEM, F_CLOSE:
// Simple 1-argument functions
op := p.tok
p.next()
p.expect(LPAREN)
arg := p.expr()
p.expect(RPAREN)
return &ast.CallExpr{op, []ast.Expr{arg}}
case F_ATAN2, F_INDEX:
// Simple 2-argument functions
op := p.tok
p.next()
p.expect(LPAREN)
arg1 := p.expr()
p.commaNewlines()
arg2 := p.expr()
p.expect(RPAREN)
return &ast.CallExpr{op, []ast.Expr{arg1, arg2}}
default:
panic(p.errorf("expected expression instead of %s", p.tok))
}
}
// Parse an optional lvalue
func (p *parser) optionalLValue() ast.Expr {
switch p.tok {
case NAME:
if p.lexer.PeekByte() == '(' {
// User function call, e.g., foo() not lvalue.
return nil
}
name := p.val
namePos := p.pos
p.next()
if p.tok == LBRACKET {
// a[x] or a[x, y] array index expression
p.next()
index := p.exprList(p.expr)
if len(index) == 0 {
panic(p.errorf("expected expression instead of ]"))
}
p.expect(RBRACKET)
return &ast.IndexExpr{p.arrayRef(name, namePos), index}
}
return p.varRef(name, namePos)
case DOLLAR:
p.next()
return &ast.FieldExpr{p.primary()}
default:
return nil
}
}
// Parse /.../ regex or generic expression:
//
// REGEX | expr
func (p *parser) regexStr(parse func() ast.Expr) ast.Expr {
if p.matches(DIV, DIV_ASSIGN) {
regex := p.nextRegex()
return &ast.StrExpr{regex}
}
return parse()
}
// Parse left-associative binary operator. Allow newlines after
// operator if allowNewline is true.
//
// parse [op parse] [op parse] ...
func (p *parser) binaryLeft(higher func() ast.Expr, allowNewline bool, ops ...Token) ast.Expr {
expr := higher()
for p.matches(ops...) {
op := p.tok
p.next()
if allowNewline {
p.optionalNewlines()
}
right := higher()
expr = &ast.BinaryExpr{expr, op, right}
}
return expr
}
// Parse comma followed by optional newlines:
//
// COMMA NEWLINE*
func (p *parser) commaNewlines() {
p.expect(COMMA)
p.optionalNewlines()
}
// Parse zero or more optional newlines:
//
// [NEWLINE] [NEWLINE] ...
func (p *parser) optionalNewlines() {
for p.tok == NEWLINE {
p.next()
}
}
// Parse next token into p.tok (and set p.pos and p.val).
func (p *parser) next() {
p.prevTok = p.tok
p.pos, p.tok, p.val = p.lexer.Scan()
if p.tok == ILLEGAL {
panic(p.errorf("%s", p.val))
}
}
// Parse next regex and return it (must only be called after DIV or
// DIV_ASSIGN token).
func (p *parser) nextRegex() string {
p.pos, p.tok, p.val = p.lexer.ScanRegex()
if p.tok == ILLEGAL {
panic(p.errorf("%s", p.val))
}
regex := p.val
_, err := regexp.Compile(regex)
if err != nil {
panic(p.errorf("%v", err))
}
p.next()
return regex
}
// Ensure current token is tok, and parse next token into p.tok.
func (p *parser) expect(tok Token) {
if p.tok != tok {
panic(p.errorf("expected %s instead of %s", tok, p.tok))
}
p.next()
}
// Return true iff current token matches one of the given operators,
// but don't parse next token.
func (p *parser) matches(operators ...Token) bool {
for _, operator := range operators {
if p.tok == operator {
return true
}
}
return false
}
// Format given string and args with Sprintf and return *ParseError
// with that message and the current position.
func (p *parser) errorf(format string, args ...interface{}) error {
return p.posErrorf(p.pos, format, args...)
}
// Like errorf, but with an explicit position.
func (p *parser) posErrorf(pos Position, format string, args ...interface{}) error {
message := fmt.Sprintf(format, args...)
return &ParseError{pos, message}
}
// Parse call to a user-defined function (and record call site for
// resolving later).
func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr {
p.expect(LPAREN)
args := []ast.Expr{}
i := 0
for !p.matches(NEWLINE, RPAREN) {
if i > 0 {
p.commaNewlines()
}
arg := p.expr()
p.processUserCallArg(name, arg, i)
args = append(args, arg)
i++
}
p.expect(RPAREN)
call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later
p.recordUserCall(call, pos)
return call
}