From e3f7931341af8b189814636abca091af698ccdc1 Mon Sep 17 00:00:00 2001 From: Alex Palaistras Date: Sun, 7 Apr 2024 11:26:35 +0100 Subject: [PATCH] Update dependencies, add Gitea workflow --- .gitea/workflows/container.yaml | 32 ++ play/Containerfile | 2 +- play/go.mod | 4 +- play/go.sum | 4 +- .../benhoyt/goawk/internal/ast/ast.go | 432 ++++++++++---- .../benhoyt/goawk/internal/ast/walk.go | 177 ++++++ .../goawk/internal/compiler/compiler.go | 385 ++++++++----- .../goawk/internal/compiler/disassembler.go | 34 +- .../goawk/internal/compiler/opcode_string.go | 176 +++--- .../goawk/internal/compiler/opcodes.go | 14 +- .../goawk/internal/resolver/resolve.go | 532 ++++++++++++++++++ .../{parser => internal/resolver}/toposort.go | 2 +- .../benhoyt/goawk/interp/functions.go | 32 +- .../github.com/benhoyt/goawk/interp/interp.go | 100 ++-- .../github.com/benhoyt/goawk/interp/io.go | 104 ++-- .../benhoyt/goawk/interp/iostream.go | 224 ++++++++ .../benhoyt/goawk/interp/newexecute.go | 27 +- .../github.com/benhoyt/goawk/interp/vm.go | 103 ++-- .../github.com/benhoyt/goawk/lexer/lexer.go | 32 +- .../github.com/benhoyt/goawk/lexer/token.go | 3 + .../github.com/benhoyt/goawk/parser/parser.go | 393 +++++++------ .../benhoyt/goawk/parser/resolve.go | 462 --------------- play/vendor/modules.txt | 5 +- 23 files changed, 2141 insertions(+), 1138 deletions(-) create mode 100644 .gitea/workflows/container.yaml create mode 100644 play/vendor/github.com/benhoyt/goawk/internal/ast/walk.go create mode 100644 play/vendor/github.com/benhoyt/goawk/internal/resolver/resolve.go rename play/vendor/github.com/benhoyt/goawk/{parser => internal/resolver}/toposort.go (98%) create mode 100644 play/vendor/github.com/benhoyt/goawk/interp/iostream.go delete mode 100644 play/vendor/github.com/benhoyt/goawk/parser/resolve.go diff --git a/.gitea/workflows/container.yaml b/.gitea/workflows/container.yaml new file mode 100644 index 0000000..5cd8e73 --- /dev/null +++ b/.gitea/workflows/container.yaml @@ -0,0 +1,32 @@ +name: Grawkit Play Container Build +on: + push: + paths: + - 'grawkit' + - 'play/**' +env: + CONTAINER_NAME: ${{ github.repository_owner }}/grawkit-play + CONTAINER_TAG: latest +jobs: + build: + runs-on: ubuntu-latest + name: Container Build + steps: + - name: Check Out Repository + uses: actions/checkout@v4 + - name: Login to Container Registry + uses: docker/login-action@v2 + with: + registry: ${{ vars.CONTAINER_REGISTRY_URL }} + username: ${{ secrets.CONTAINER_REGISTRY_USERNAME }} + password: ${{ secrets.CONTAINER_REGISTRY_PASSWORD }} + - name: Build and Push Container + uses: docker/build-push-action@v4 + with: + context: . + file: play/Containerfile + platforms: | + linux/amd64 + linux/arm64 + push: true + tags: ${{ vars.CONTAINER_REGISTRY_URL }}/${{ env.CONTAINER_NAME }}:${{ env.CONTAINER_TAG }} diff --git a/play/Containerfile b/play/Containerfile index ada4f9c..10b7e0e 100644 --- a/play/Containerfile +++ b/play/Containerfile @@ -1,4 +1,4 @@ -FROM docker.io/golang:1.19 AS builder +FROM docker.io/golang:1.22 AS builder WORKDIR /src COPY play/ /src/ diff --git a/play/go.mod b/play/go.mod index 880735f..6af4337 100644 --- a/play/go.mod +++ b/play/go.mod @@ -1,5 +1,5 @@ module go.deuill.org/grawkit/play -go 1.19 +go 1.20 -require github.com/benhoyt/goawk v1.20.0 +require github.com/benhoyt/goawk v1.26.0 diff --git a/play/go.sum b/play/go.sum index fdc853e..bd85e24 100644 --- a/play/go.sum +++ b/play/go.sum @@ -1,2 +1,2 @@ -github.com/benhoyt/goawk v1.20.0 h1:oz81agTfP/8Z7afMvmOwX4Ms9qTtGhZxPEzHCycIFds= -github.com/benhoyt/goawk v1.20.0/go.mod h1:Dp3jBsApuiItYR9atsCm//q/70OnqjihLh5WkU6eW7U= +github.com/benhoyt/goawk v1.26.0 h1:TuZiBi/u7Ra7092CXs+1iGd5PVd0YXicyVcFv5zWVeE= +github.com/benhoyt/goawk v1.26.0/go.mod h1:FjIAicXvrv3wbqAhSTo5bn4mIM5y1iy3lcnIynlJvoI= diff --git a/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go b/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go index 8232765..303df70 100644 --- a/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go @@ -10,14 +10,12 @@ import ( . "github.com/benhoyt/goawk/lexer" ) -// Program is an entire AWK program. +// Program is a parsed AWK program. type Program struct { Begin []Stmts - Actions []Action + Actions []*Action End []Stmts - Functions []Function - Scalars map[string]int - Arrays map[string]int + Functions []*Function } // String returns an indented, pretty-printed version of the parsed @@ -75,32 +73,137 @@ func (a *Action) String() string { return strings.Join(patterns, ", ") + sep + stmtsStr } +// Node is an interface to be satisfied by all AST elements. +// We need it to be able to work with AST in a generic way, like in ast.Walk(). +type Node interface { + node() +} + +// All these types implement the Node interface. +func (p *Program) node() {} +func (a *Action) node() {} +func (f *Function) node() {} +func (e *FieldExpr) node() {} +func (e *NamedFieldExpr) node() {} +func (e *UnaryExpr) node() {} +func (e *BinaryExpr) node() {} +func (e *InExpr) node() {} +func (e *CondExpr) node() {} +func (e *NumExpr) node() {} +func (e *StrExpr) node() {} +func (e *RegExpr) node() {} +func (e *VarExpr) node() {} +func (e *IndexExpr) node() {} +func (e *AssignExpr) node() {} +func (e *AugAssignExpr) node() {} +func (e *IncrExpr) node() {} +func (e *CallExpr) node() {} +func (e *UserCallExpr) node() {} +func (e *MultiExpr) node() {} +func (e *GetlineExpr) node() {} +func (e *GroupingExpr) node() {} +func (s *PrintStmt) node() {} +func (s *PrintfStmt) node() {} +func (s *ExprStmt) node() {} +func (s *IfStmt) node() {} +func (s *ForStmt) node() {} +func (s *ForInStmt) node() {} +func (s *WhileStmt) node() {} +func (s *DoWhileStmt) node() {} +func (s *BreakStmt) node() {} +func (s *ContinueStmt) node() {} +func (s *NextStmt) node() {} +func (s *NextfileStmt) node() {} +func (s *ExitStmt) node() {} +func (s *DeleteStmt) node() {} +func (s *ReturnStmt) node() {} +func (s *BlockStmt) node() {} + // Expr is the abstract syntax tree for any AWK expression. type Expr interface { - expr() + Node + precedence() int String() string } +// Table of operator precedence, lowest to highest +const ( + precAssign = iota + precCond + precOr + precAnd + precIn + precMatch + precCompare + precConcat + precAdd + precMul + precUnary + precPower + precPreIncr + precPostIncr + precField + precPrimary + precGrouping +) + // All these types implement the Expr interface. -func (e *FieldExpr) expr() {} -func (e *NamedFieldExpr) expr() {} -func (e *UnaryExpr) expr() {} -func (e *BinaryExpr) expr() {} -func (e *ArrayExpr) expr() {} -func (e *InExpr) expr() {} -func (e *CondExpr) expr() {} -func (e *NumExpr) expr() {} -func (e *StrExpr) expr() {} -func (e *RegExpr) expr() {} -func (e *VarExpr) expr() {} -func (e *IndexExpr) expr() {} -func (e *AssignExpr) expr() {} -func (e *AugAssignExpr) expr() {} -func (e *IncrExpr) expr() {} -func (e *CallExpr) expr() {} -func (e *UserCallExpr) expr() {} -func (e *MultiExpr) expr() {} -func (e *GetlineExpr) expr() {} +func (e *FieldExpr) precedence() int { return precField } +func (e *NamedFieldExpr) precedence() int { return precField } +func (e *UnaryExpr) precedence() int { return precUnary } +func (e *InExpr) precedence() int { return precIn } +func (e *CondExpr) precedence() int { return precCond } +func (e *NumExpr) precedence() int { return precPrimary } +func (e *StrExpr) precedence() int { return precPrimary } +func (e *RegExpr) precedence() int { return precPrimary } +func (e *VarExpr) precedence() int { return precPrimary } +func (e *IndexExpr) precedence() int { return precPrimary } +func (e *AssignExpr) precedence() int { return precAssign } +func (e *AugAssignExpr) precedence() int { return precAssign } +func (e *CallExpr) precedence() int { return precPrimary } +func (e *UserCallExpr) precedence() int { return precPrimary } +func (e *MultiExpr) precedence() int { return precPrimary } +func (e *GetlineExpr) precedence() int { return precPrimary } +func (e *GroupingExpr) precedence() int { return precGrouping } + +func (e *IncrExpr) precedence() int { + if e.Pre { + return precPreIncr + } + return precPostIncr +} + +func (e *BinaryExpr) precedence() int { + switch e.Op { + case AND: + return precAnd + case OR: + return precOr + case CONCAT: + return precConcat + case ADD, SUB: + return precAdd + case MUL, DIV, MOD: + return precMul + case EQUALS, LESS, LTE, GREATER, GTE, NOT_EQUALS: + return precCompare + case MATCH, NOT_MATCH: + return precMatch + case POW: + return precPower + default: + return precPrimary + } +} + +// parenthesize returns the string version of e, surrounding it in +// parentheses if e's precedence is lower than that of other. +func parenthesize(e, other Expr) string { + if e.precedence() < other.precedence() { + return "(" + e.String() + ")" + } + return e.String() +} // FieldExpr is an expression like $0. type FieldExpr struct { @@ -108,7 +211,7 @@ type FieldExpr struct { } func (e *FieldExpr) String() string { - return "$" + e.Index.String() + return "$" + parenthesize(e.Index, e) } // NamedFieldExpr is an expression like @"name". @@ -117,7 +220,7 @@ type NamedFieldExpr struct { } func (e *NamedFieldExpr) String() string { - return "@" + e.Field.String() + return "@" + parenthesize(e.Field, e) } // UnaryExpr is an expression like -1234. @@ -127,7 +230,7 @@ type UnaryExpr struct { } func (e *UnaryExpr) String() string { - return e.Op.String() + e.Value.String() + return e.Op.String() + parenthesize(e.Value, e) } // BinaryExpr is an expression like 1 + 2. @@ -138,43 +241,31 @@ type BinaryExpr struct { } func (e *BinaryExpr) String() string { - var opStr string + var op string if e.Op == CONCAT { - opStr = " " + op = " " } else { - opStr = " " + e.Op.String() + " " + op = " " + e.Op.String() + " " } - return "(" + e.Left.String() + opStr + e.Right.String() + ")" -} - -// ArrayExpr is an array reference. Not really a stand-alone -// expression, except as an argument to split() or a user function -// call. -type ArrayExpr struct { - Scope VarScope - Index int - Name string -} - -func (e *ArrayExpr) String() string { - return e.Name + return parenthesize(e.Left, e) + op + parenthesize(e.Right, e) } // InExpr is an expression like (index in array). type InExpr struct { - Index []Expr - Array *ArrayExpr + Index []Expr + Array string + ArrayPos Position } func (e *InExpr) String() string { if len(e.Index) == 1 { - return "(" + e.Index[0].String() + " in " + e.Array.String() + ")" + return parenthesize(e.Index[0], e) + " in " + e.Array } indices := make([]string, len(e.Index)) for i, index := range e.Index { indices[i] = index.String() } - return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")" + return "(" + strings.Join(indices, ", ") + ") in " + e.Array } // CondExpr is an expression like cond ? 1 : 0. @@ -185,7 +276,7 @@ type CondExpr struct { } func (e *CondExpr) String() string { - return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")" + return parenthesize(e.Cond, e) + " ? " + parenthesize(e.True, e) + " : " + parenthesize(e.False, e) } // NumExpr is a literal number like 1234. @@ -201,12 +292,16 @@ func (e *NumExpr) String() string { } } -// StrExpr is a literal string like "foo". +// StrExpr is a literal string like "foo" or a regex constant like /foo/. type StrExpr struct { Value string + Regex bool } func (e *StrExpr) String() string { + if e.Regex { + return formatRegex(e.Value) + } return strconv.Quote(e.Value) } @@ -217,25 +312,13 @@ type RegExpr struct { } func (e *RegExpr) String() string { - escaped := strings.Replace(e.Regex, "/", `\/`, -1) - return "/" + escaped + "/" + return formatRegex(e.Regex) } -type VarScope int - -const ( - ScopeSpecial VarScope = iota - ScopeGlobal - ScopeLocal -) - // VarExpr is a variable reference (special var, global, or local). -// Index is the resolved variable index used by the interpreter; Name -// is the original name used by String(). type VarExpr struct { - Scope VarScope - Index int - Name string + Name string + Pos Position } func (e *VarExpr) String() string { @@ -244,8 +327,9 @@ func (e *VarExpr) String() string { // IndexExpr is an expression like a[k] (rvalue or lvalue). type IndexExpr struct { - Array *ArrayExpr - Index []Expr + Array string + ArrayPos Position + Index []Expr } func (e *IndexExpr) String() string { @@ -253,7 +337,7 @@ func (e *IndexExpr) String() string { for i, index := range e.Index { indices[i] = index.String() } - return e.Array.String() + "[" + strings.Join(indices, ", ") + "]" + return e.Array + "[" + strings.Join(indices, ", ") + "]" } // AssignExpr is an expression like x = 1234. @@ -263,7 +347,7 @@ type AssignExpr struct { } func (e *AssignExpr) String() string { - return e.Left.String() + " = " + e.Right.String() + return parenthesize(e.Left, e) + " = " + parenthesize(e.Right, e) } // AugAssignExpr is an assignment expression like x += 5. @@ -274,7 +358,7 @@ type AugAssignExpr struct { } func (e *AugAssignExpr) String() string { - return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String() + return parenthesize(e.Left, e) + " " + e.Op.String() + "= " + parenthesize(e.Right, e) } // IncrExpr is an increment or decrement expression like x++ or --y. @@ -286,9 +370,9 @@ type IncrExpr struct { func (e *IncrExpr) String() string { if e.Pre { - return e.Op.String() + e.Expr.String() + return e.Op.String() + parenthesize(e.Expr, e) } else { - return e.Expr.String() + e.Op.String() + return parenthesize(e.Expr, e) + e.Op.String() } } @@ -306,15 +390,12 @@ func (e *CallExpr) String() string { return e.Func.String() + "(" + strings.Join(args, ", ") + ")" } -// UserCallExpr is a user-defined function call like my_func(1, 2, 3) -// -// Index is the resolved function index used by the interpreter; Name -// is the original name used by String(). +// UserCallExpr is a user-defined function call like my_func(1, 2, 3), +// where my_func is either AWK-defined or a native Go function. type UserCallExpr struct { - Native bool // false = AWK-defined function, true = native Go func - Index int - Name string - Args []Expr + Name string + Args []Expr + Pos Position } func (e *UserCallExpr) String() string { @@ -349,18 +430,27 @@ type GetlineExpr struct { func (e *GetlineExpr) String() string { s := "" if e.Command != nil { - s += e.Command.String() + " |" + s += parenthesize(e.Command, e) + " |" } s += "getline" if e.Target != nil { s += " " + e.Target.String() } if e.File != nil { - s += " <" + e.File.String() + s += " <" + parenthesize(e.File, e) } return s } +// GroupingExpr is a parenthesized grouping expression. +type GroupingExpr struct { + Expr Expr +} + +func (e *GroupingExpr) String() string { + return "(" + e.Expr.String() + ")" +} + // IsLValue returns true if the given expression can be used as an // lvalue (on the left-hand side of an assignment, in a ++ or -- // operation, or as the third argument to sub or gsub). @@ -373,8 +463,17 @@ func IsLValue(expr Expr) bool { } } +// formatRegex formats the regex string r. +func formatRegex(r string) string { + escaped := strings.Replace(r, "/", `\/`, -1) + return "/" + escaped + "/" +} + // Stmt is the abstract syntax tree for any AWK statement. type Stmt interface { + Node + StartPos() Position // position of first character belonging to the node + EndPos() Position // position of first character immediately after the node stmt() String() string } @@ -391,16 +490,53 @@ func (s *DoWhileStmt) stmt() {} func (s *BreakStmt) stmt() {} func (s *ContinueStmt) stmt() {} func (s *NextStmt) stmt() {} +func (s *NextfileStmt) stmt() {} func (s *ExitStmt) stmt() {} func (s *DeleteStmt) stmt() {} func (s *ReturnStmt) stmt() {} func (s *BlockStmt) stmt() {} +func (s *PrintStmt) StartPos() Position { return s.Start } +func (s *PrintfStmt) StartPos() Position { return s.Start } +func (s *ExprStmt) StartPos() Position { return s.Start } +func (s *IfStmt) StartPos() Position { return s.Start } +func (s *ForStmt) StartPos() Position { return s.Start } +func (s *ForInStmt) StartPos() Position { return s.Start } +func (s *WhileStmt) StartPos() Position { return s.Start } +func (s *DoWhileStmt) StartPos() Position { return s.Start } +func (s *BreakStmt) StartPos() Position { return s.Start } +func (s *ContinueStmt) StartPos() Position { return s.Start } +func (s *NextStmt) StartPos() Position { return s.Start } +func (s *NextfileStmt) StartPos() Position { return s.Start } +func (s *ExitStmt) StartPos() Position { return s.Start } +func (s *DeleteStmt) StartPos() Position { return s.Start } +func (s *ReturnStmt) StartPos() Position { return s.Start } +func (s *BlockStmt) StartPos() Position { return s.Start } + +func (s *PrintStmt) EndPos() Position { return s.End } +func (s *PrintfStmt) EndPos() Position { return s.End } +func (s *ExprStmt) EndPos() Position { return s.End } +func (s *IfStmt) EndPos() Position { return s.End } +func (s *ForStmt) EndPos() Position { return s.End } +func (s *ForInStmt) EndPos() Position { return s.End } +func (s *WhileStmt) EndPos() Position { return s.End } +func (s *DoWhileStmt) EndPos() Position { return s.End } +func (s *BreakStmt) EndPos() Position { return s.End } +func (s *ContinueStmt) EndPos() Position { return s.End } +func (s *NextStmt) EndPos() Position { return s.End } +func (s *NextfileStmt) EndPos() Position { return s.End } +func (s *ExitStmt) EndPos() Position { return s.End } +func (s *DeleteStmt) EndPos() Position { return s.End } +func (s *ReturnStmt) EndPos() Position { return s.End } +func (s *BlockStmt) EndPos() Position { return s.End } + // PrintStmt is a statement like print $1, $3. type PrintStmt struct { Args []Expr Redirect Token Dest Expr + Start Position + End Position } func (s *PrintStmt) String() string { @@ -424,6 +560,8 @@ type PrintfStmt struct { Args []Expr Redirect Token Dest Expr + Start Position + End Position } func (s *PrintfStmt) String() string { @@ -432,7 +570,9 @@ func (s *PrintfStmt) String() string { // ExprStmt is statement like a bare function call: my_func(x). type ExprStmt struct { - Expr Expr + Expr Expr + Start Position + End Position } func (s *ExprStmt) String() string { @@ -441,13 +581,16 @@ func (s *ExprStmt) String() string { // IfStmt is an if or if-else statement. type IfStmt struct { - Cond Expr - Body Stmts - Else Stmts + Cond Expr + BodyStart Position + Body Stmts + Else Stmts + Start Position + End Position } func (s *IfStmt) String() string { - str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}" + str := "if (" + s.Cond.String() + ") {\n" + s.Body.String() + "}" if len(s.Else) > 0 { str += " else {\n" + s.Else.String() + "}" } @@ -456,10 +599,13 @@ func (s *IfStmt) String() string { // ForStmt is a C-like for loop: for (i=0; i<10; i++) print i. type ForStmt struct { - Pre Stmt - Cond Expr - Post Stmt - Body Stmts + Pre Stmt + Cond Expr + Post Stmt + BodyStart Position + Body Stmts + Start Position + End Position } func (s *ForStmt) String() string { @@ -469,7 +615,7 @@ func (s *ForStmt) String() string { } condStr := "" if s.Cond != nil { - condStr = " " + trimParens(s.Cond.String()) + condStr = " " + s.Cond.String() } postStr := "" if s.Post != nil { @@ -480,59 +626,90 @@ func (s *ForStmt) String() string { // ForInStmt is a for loop like for (k in a) print k, a[k]. type ForInStmt struct { - Var *VarExpr - Array *ArrayExpr - Body Stmts + Var string + VarPos Position + Array string + ArrayPos Position + BodyStart Position + Body Stmts + Start Position + End Position } func (s *ForInStmt) String() string { - return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}" + return "for (" + s.Var + " in " + s.Array + ") {\n" + s.Body.String() + "}" } // WhileStmt is a while loop. type WhileStmt struct { - Cond Expr - Body Stmts + Cond Expr + BodyStart Position + Body Stmts + Start Position + End Position } func (s *WhileStmt) String() string { - return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}" + return "while (" + s.Cond.String() + ") {\n" + s.Body.String() + "}" } // DoWhileStmt is a do-while loop. type DoWhileStmt struct { - Body Stmts - Cond Expr + Body Stmts + Cond Expr + Start Position + End Position } func (s *DoWhileStmt) String() string { - return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")" + return "do {\n" + s.Body.String() + "} while (" + s.Cond.String() + ")" } // BreakStmt is a break statement. -type BreakStmt struct{} +type BreakStmt struct { + Start Position + End Position +} func (s *BreakStmt) String() string { return "break" } // ContinueStmt is a continue statement. -type ContinueStmt struct{} +type ContinueStmt struct { + Start Position + End Position +} func (s *ContinueStmt) String() string { return "continue" } // NextStmt is a next statement. -type NextStmt struct{} +type NextStmt struct { + Start Position + End Position +} func (s *NextStmt) String() string { return "next" } +// NextfileStmt is a nextfile statement. +type NextfileStmt struct { + Start Position + End Position +} + +func (s *NextfileStmt) String() string { + return "nextfile" +} + // ExitStmt is an exit statement. type ExitStmt struct { Status Expr + Start Position + End Position } func (s *ExitStmt) String() string { @@ -545,21 +722,29 @@ func (s *ExitStmt) String() string { // DeleteStmt is a statement like delete a[k]. type DeleteStmt struct { - Array *ArrayExpr - Index []Expr + Array string + ArrayPos Position + Index []Expr + Start Position + End Position } func (s *DeleteStmt) String() string { + if len(s.Index) == 0 { + return "delete " + s.Array + } indices := make([]string, len(s.Index)) for i, index := range s.Index { indices[i] = index.String() } - return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]" + return "delete " + s.Array + "[" + strings.Join(indices, ", ") + "]" } // ReturnStmt is a return statement. type ReturnStmt struct { Value Expr + Start Position + End Position } func (s *ReturnStmt) String() string { @@ -572,7 +757,9 @@ func (s *ReturnStmt) String() string { // BlockStmt is a stand-alone block like { print "x" }. type BlockStmt struct { - Body Stmts + Body Stmts + Start Position + End Position } func (s *BlockStmt) String() string { @@ -583,8 +770,8 @@ func (s *BlockStmt) String() string { type Function struct { Name string Params []string - Arrays []bool Body Stmts + Pos Position } func (f *Function) String() string { @@ -592,9 +779,22 @@ func (f *Function) String() string { f.Body.String() + "}" } -func trimParens(s string) string { - if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") { - s = s[1 : len(s)-1] - } - return s +// PositionError represents an error bound to specific position in source. +type PositionError struct { + // Source line/column position where the error occurred. + Position Position + // Error message. + Message string +} + +// PosErrorf like fmt.Errorf, but with an explicit position. +func PosErrorf(pos Position, format string, args ...interface{}) error { + message := fmt.Sprintf(format, args...) + return &PositionError{pos, message} +} + +// Error returns a formatted version of the error, including the line +// and column numbers. +func (e *PositionError) Error() string { + return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) } diff --git a/play/vendor/github.com/benhoyt/goawk/internal/ast/walk.go b/play/vendor/github.com/benhoyt/goawk/internal/ast/walk.go new file mode 100644 index 0000000..5927d5f --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/ast/walk.go @@ -0,0 +1,177 @@ +package ast + +import "fmt" + +// Visitor has a Visit method which is invoked for each node encountered by Walk. +// If the result visitor w is not nil, Walk visits each of the children +// of node with the visitor w, followed by a call of w.Visit(nil). +type Visitor interface { + Visit(node Node) (w Visitor) +} + +// WalkExprList walks a visitor over a list of expression AST nodes +func WalkExprList(v Visitor, exprs []Expr) { + for _, expr := range exprs { + Walk(v, expr) + } +} + +// WalkStmtList walks a visitor over a list of statement AST nodes +func WalkStmtList(v Visitor, stmts []Stmt) { + for _, stmt := range stmts { + Walk(v, stmt) + } +} + +// Walk traverses an AST in depth-first order: It starts by calling +// v.Visit(node); if node is nil, it does nothing. If the visitor w returned by +// v.Visit(node) is not nil, Walk is invoked recursively with visitor +// w for each of the non-nil children of node, followed by a call of +// w.Visit(nil). +func Walk(v Visitor, node Node) { + if node == nil { + return + } + if v = v.Visit(node); v == nil { + return + } + + // walk children + // (the order of the cases matches the order + // of the corresponding node types in ast.go) + switch n := node.(type) { + + // expressions + case *FieldExpr: + Walk(v, n.Index) + + case *NamedFieldExpr: + Walk(v, n.Field) + + case *UnaryExpr: + Walk(v, n.Value) + + case *BinaryExpr: + Walk(v, n.Left) + Walk(v, n.Right) + + case *InExpr: + WalkExprList(v, n.Index) + + case *CondExpr: + Walk(v, n.Cond) + Walk(v, n.True) + Walk(v, n.False) + + case *NumExpr: // leaf + case *StrExpr: // leaf + case *RegExpr: // leaf + case *VarExpr: // leaf + case *IndexExpr: + WalkExprList(v, n.Index) + + case *AssignExpr: + Walk(v, n.Left) + Walk(v, n.Right) + + case *AugAssignExpr: + Walk(v, n.Left) + Walk(v, n.Right) + + case *IncrExpr: + Walk(v, n.Expr) + + case *CallExpr: + WalkExprList(v, n.Args) + + case *UserCallExpr: + WalkExprList(v, n.Args) + + case *MultiExpr: + WalkExprList(v, n.Exprs) + + case *GetlineExpr: + Walk(v, n.Command) + Walk(v, n.Target) + Walk(v, n.File) + + case *GroupingExpr: + Walk(v, n.Expr) + + // statements + case *PrintStmt: + WalkExprList(v, n.Args) + Walk(v, n.Dest) + + case *PrintfStmt: + WalkExprList(v, n.Args) + Walk(v, n.Dest) + + case *ExprStmt: + Walk(v, n.Expr) + + case *IfStmt: + Walk(v, n.Cond) + WalkStmtList(v, n.Body) + WalkStmtList(v, n.Else) + + case *ForStmt: + Walk(v, n.Pre) + Walk(v, n.Cond) + Walk(v, n.Post) + WalkStmtList(v, n.Body) + + case *ForInStmt: + WalkStmtList(v, n.Body) + + case *WhileStmt: + Walk(v, n.Cond) + WalkStmtList(v, n.Body) + + case *DoWhileStmt: + WalkStmtList(v, n.Body) + Walk(v, n.Cond) + + case *BreakStmt: // leaf + case *ContinueStmt: // leaf + case *NextStmt: // leaf + case *NextfileStmt: // leaf + case *ExitStmt: + Walk(v, n.Status) + + case *DeleteStmt: + WalkExprList(v, n.Index) + + case *ReturnStmt: + Walk(v, n.Value) + + case *BlockStmt: + WalkStmtList(v, n.Body) + + case *Program: + for _, stmts := range n.Begin { + WalkStmtList(v, stmts) + } + for _, action := range n.Actions { + Walk(v, action) + } + for _, function := range n.Functions { + Walk(v, function) + } + for _, stmts := range n.End { + WalkStmtList(v, stmts) + } + + case *Action: + WalkExprList(v, n.Pattern) + WalkStmtList(v, n.Stmts) + + case *Function: + WalkStmtList(v, n.Body) + + default: + panic(fmt.Sprintf("ast.Walk: unexpected node type %T", n)) + } + + v.Visit(nil) +} diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/compiler.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/compiler.go index 40b2f89..26fc01f 100644 --- a/play/vendor/github.com/benhoyt/goawk/internal/compiler/compiler.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/compiler.go @@ -5,8 +5,10 @@ import ( "fmt" "math" "regexp" + "strconv" "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/resolver" "github.com/benhoyt/goawk/lexer" ) @@ -54,7 +56,7 @@ func (e *compileError) Error() string { } // Compile compiles an AST (parsed program) into virtual machine instructions. -func Compile(prog *ast.Program) (compiledProg *Program, err error) { +func Compile(resolved *resolver.ResolvedProgram) (compiledProg *Program, err error) { defer func() { // The compiler uses panic with a *compileError to signal compile // errors internally, and they're caught here. This avoids the @@ -77,57 +79,60 @@ func Compile(prog *ast.Program) (compiledProg *Program, err error) { // Compile functions. For functions called before they're defined or // recursive functions, we have to set most p.Functions data first, then // compile Body afterward. - p.Functions = make([]Function, len(prog.Functions)) - for i, astFunc := range prog.Functions { + p.Functions = make([]Function, len(resolved.Functions)) + for i, astFunc := range resolved.Functions { + arrays := make([]bool, len(astFunc.Params)) numArrays := 0 - for _, a := range astFunc.Arrays { - if a { + for j, param := range astFunc.Params { + _, info, _ := resolved.LookupVar(astFunc.Name, param) + if info.Type == resolver.Array { + arrays[j] = true numArrays++ } } compiledFunc := Function{ Name: astFunc.Name, Params: astFunc.Params, - Arrays: astFunc.Arrays, - NumScalars: len(astFunc.Arrays) - numArrays, + Arrays: arrays, + NumScalars: len(astFunc.Params) - numArrays, NumArrays: numArrays, } p.Functions[i] = compiledFunc } - for i, astFunc := range prog.Functions { - c := &compiler{program: p, indexes: indexes} + for i, astFunc := range resolved.Functions { + c := compiler{resolved: resolved, program: p, indexes: indexes, funcName: astFunc.Name} c.stmts(astFunc.Body) p.Functions[i].Body = c.finish() } // Compile BEGIN blocks. - for _, stmts := range prog.Begin { - c := &compiler{program: p, indexes: indexes} + for _, stmts := range resolved.Begin { + c := compiler{resolved: resolved, program: p, indexes: indexes} c.stmts(stmts) p.Begin = append(p.Begin, c.finish()...) } // Compile pattern-action blocks. - for _, action := range prog.Actions { + for _, action := range resolved.Actions { var pattern [][]Opcode switch len(action.Pattern) { case 0: // Always considered a match case 1: - c := &compiler{program: p, indexes: indexes} + c := compiler{resolved: resolved, program: p, indexes: indexes} c.expr(action.Pattern[0]) pattern = [][]Opcode{c.finish()} case 2: - c := &compiler{program: p, indexes: indexes} + c := compiler{resolved: resolved, program: p, indexes: indexes} c.expr(action.Pattern[0]) pattern = append(pattern, c.finish()) - c = &compiler{program: p, indexes: indexes} + c = compiler{resolved: resolved, program: p, indexes: indexes} c.expr(action.Pattern[1]) pattern = append(pattern, c.finish()) } var body []Opcode if len(action.Stmts) > 0 { - c := &compiler{program: p, indexes: indexes} + c := compiler{resolved: resolved, program: p, indexes: indexes} c.stmts(action.Stmts) body = c.finish() } @@ -138,21 +143,33 @@ func Compile(prog *ast.Program) (compiledProg *Program, err error) { } // Compile END blocks. - for _, stmts := range prog.End { - c := &compiler{program: p, indexes: indexes} + for _, stmts := range resolved.End { + c := compiler{resolved: resolved, program: p, indexes: indexes} c.stmts(stmts) p.End = append(p.End, c.finish()...) } + // Build slices that map indexes to names (for variables and functions). // These are only used for disassembly, but set them up here. - p.scalarNames = make([]string, len(prog.Scalars)) - for name, index := range prog.Scalars { - p.scalarNames[index] = name - } - p.arrayNames = make([]string, len(prog.Arrays)) - for name, index := range prog.Arrays { - p.arrayNames[index] = name - } + resolved.IterVars("", func(name string, info resolver.VarInfo) { + if info.Type == resolver.Array { + for len(p.arrayNames) <= info.Index { + p.arrayNames = append(p.arrayNames, "") + } + p.arrayNames[info.Index] = name + } else { + for len(p.scalarNames) <= info.Index { + p.scalarNames = append(p.scalarNames, "") + } + p.scalarNames[info.Index] = name + } + }) + resolved.IterFuncs(func(name string, info resolver.FuncInfo) { + for len(p.nativeFuncNames) <= info.Index { + p.nativeFuncNames = append(p.nativeFuncNames, "") + } + p.nativeFuncNames[info.Index] = name + }) return p, nil } @@ -166,13 +183,31 @@ type constantIndexes struct { // Holds the compilation state. type compiler struct { + resolved *resolver.ResolvedProgram program *Program indexes constantIndexes + funcName string code []Opcode breaks [][]int continues [][]int } +func (c *compiler) scalarInfo(name string) (scope resolver.Scope, index int) { + scope, info, _ := c.resolved.LookupVar(c.funcName, name) + if info.Type != resolver.Scalar { + panic(fmt.Sprintf("internal error: found %s when expecting scalar %q", info.Type, name)) + } + return scope, info.Index +} + +func (c *compiler) arrayInfo(name string) (scope resolver.Scope, index int) { + scope, info, _ := c.resolved.LookupVar(c.funcName, name) + if info.Type != resolver.Array { + panic(fmt.Sprintf("internal error: found %s when expecting array %q", info.Type, name)) + } + return scope, info.Index +} + func (c *compiler) add(ops ...Opcode) { c.code = append(c.code, ops...) } @@ -201,24 +236,26 @@ func (c *compiler) stmt(stmt ast.Stmt) { // Pre or post doesn't matter for an assignment expression switch target := expr.Expr.(type) { case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(IncrGlobal, incrAmount(expr.Op), opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(IncrLocal, incrAmount(expr.Op), opcodeInt(target.Index)) + scope, index := c.scalarInfo(target.Name) + switch scope { + case resolver.Global: + c.add(IncrGlobal, incrAmount(expr.Op), opcodeInt(index)) + case resolver.Local: + c.add(IncrLocal, incrAmount(expr.Op), opcodeInt(index)) default: // ScopeSpecial - c.add(IncrSpecial, incrAmount(expr.Op), opcodeInt(target.Index)) + c.add(IncrSpecial, incrAmount(expr.Op), opcodeInt(index)) } case *ast.FieldExpr: c.expr(target.Index) c.add(IncrField, incrAmount(expr.Op)) case *ast.IndexExpr: c.index(target.Index) - switch target.Array.Scope { - case ast.ScopeGlobal: - c.add(IncrArrayGlobal, incrAmount(expr.Op), opcodeInt(target.Array.Index)) + scope, index := c.arrayInfo(target.Array) + switch scope { + case resolver.Global: + c.add(IncrArrayGlobal, incrAmount(expr.Op), opcodeInt(index)) default: // ScopeLocal - c.add(IncrArrayLocal, incrAmount(expr.Op), opcodeInt(target.Array.Index)) + c.add(IncrArrayLocal, incrAmount(expr.Op), opcodeInt(index)) } } return @@ -244,24 +281,26 @@ func (c *compiler) stmt(stmt ast.Stmt) { switch target := expr.Left.(type) { case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(AugAssignGlobal, Opcode(augOp), opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(AugAssignLocal, Opcode(augOp), opcodeInt(target.Index)) + scope, index := c.scalarInfo(target.Name) + switch scope { + case resolver.Global: + c.add(AugAssignGlobal, Opcode(augOp), opcodeInt(index)) + case resolver.Local: + c.add(AugAssignLocal, Opcode(augOp), opcodeInt(index)) default: // ScopeSpecial - c.add(AugAssignSpecial, Opcode(augOp), opcodeInt(target.Index)) + c.add(AugAssignSpecial, Opcode(augOp), opcodeInt(index)) } case *ast.FieldExpr: c.expr(target.Index) c.add(AugAssignField, Opcode(augOp)) case *ast.IndexExpr: c.index(target.Index) - switch target.Array.Scope { - case ast.ScopeGlobal: - c.add(AugAssignArrayGlobal, Opcode(augOp), opcodeInt(target.Array.Index)) + scope, index := c.arrayInfo(target.Array) + switch scope { + case resolver.Global: + c.add(AugAssignArrayGlobal, Opcode(augOp), opcodeInt(index)) default: // ScopeLocal - c.add(AugAssignArrayLocal, Opcode(augOp), opcodeInt(target.Array.Index)) + c.add(AugAssignArrayLocal, Opcode(augOp), opcodeInt(index)) } } return @@ -345,8 +384,10 @@ func (c *compiler) stmt(stmt ast.Stmt) { // Otherwise we'd need to build a slice of all keys rather than // iterating, or write our own hash table that has a more flexible // iterator. - mark := c.jumpForward(ForIn, opcodeInt(int(s.Var.Scope)), opcodeInt(s.Var.Index), - Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) + varScope, varIndex := c.scalarInfo(s.Var) + arrayScope, arrayIndex := c.arrayInfo(s.Array) + mark := c.jumpForward(ForIn, opcodeInt(int(varScope)), opcodeInt(varIndex), + Opcode(arrayScope), opcodeInt(arrayIndex)) c.breaks = append(c.breaks, nil) // nil tells BreakStmt it's a for-in loop c.continues = append(c.continues, []int{}) @@ -415,20 +456,24 @@ func (c *compiler) stmt(stmt ast.Stmt) { case *ast.NextStmt: c.add(Next) + case *ast.NextfileStmt: + c.add(Nextfile) + case *ast.ExitStmt: if s.Status != nil { c.expr(s.Status) + c.add(ExitStatus) } else { - c.expr(&ast.NumExpr{0}) + c.add(Exit) } - c.add(Exit) case *ast.DeleteStmt: + scope, index := c.arrayInfo(s.Array) if len(s.Index) > 0 { c.index(s.Index) - c.add(Delete, Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) + c.add(Delete, Opcode(scope), opcodeInt(index)) } else { - c.add(DeleteAll, Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) + c.add(DeleteAll, Opcode(scope), opcodeInt(index)) } case *ast.BlockStmt: @@ -451,27 +496,48 @@ func incrAmount(op lexer.Token) Opcode { // Generate opcodes for an assignment. func (c *compiler) assign(target ast.Expr) { - switch target := target.(type) { + switch t := target.(type) { case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(AssignGlobal, opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(AssignLocal, opcodeInt(target.Index)) - case ast.ScopeSpecial: - c.add(AssignSpecial, opcodeInt(target.Index)) + scope, index := c.scalarInfo(t.Name) + switch scope { + case resolver.Global: + c.add(AssignGlobal, opcodeInt(index)) + case resolver.Local: + c.add(AssignLocal, opcodeInt(index)) + case resolver.Special: + c.add(AssignSpecial, opcodeInt(index)) } case *ast.FieldExpr: - c.expr(target.Index) + c.expr(t.Index) c.add(AssignField) case *ast.IndexExpr: - c.index(target.Index) - switch target.Array.Scope { - case ast.ScopeGlobal: - c.add(AssignArrayGlobal, opcodeInt(target.Array.Index)) - case ast.ScopeLocal: - c.add(AssignArrayLocal, opcodeInt(target.Array.Index)) - } + c.index(t.Index) + c.assignIndexExpr(t) + } +} + +func (c *compiler) assignIndexExpr(target *ast.IndexExpr) { + scope, index := c.arrayInfo(target.Array) + switch scope { + case resolver.Global: + c.add(AssignArrayGlobal, opcodeInt(index)) + case resolver.Local: + c.add(AssignArrayLocal, opcodeInt(index)) + } +} + +// Assign to target, but instead of evaluating the index, rotate it to the top +// of the stack first (for applicable target types). +func (c *compiler) assignRoteIndex(target ast.Expr) { + switch t := target.(type) { + case *ast.VarExpr: + c.assign(target) // no index for VarExpr, just call assign + case *ast.FieldExpr: + c.add(Rote) + c.add(AssignField) + case *ast.IndexExpr: + c.add(Rote) + c.assignIndexExpr(t) } } @@ -611,13 +677,14 @@ func (c *compiler) expr(expr ast.Expr) { c.add(FieldByName) case *ast.VarExpr: - switch e.Scope { - case ast.ScopeGlobal: - c.add(Global, opcodeInt(e.Index)) - case ast.ScopeLocal: - c.add(Local, opcodeInt(e.Index)) - case ast.ScopeSpecial: - c.add(Special, opcodeInt(e.Index)) + scope, index := c.scalarInfo(e.Name) + switch scope { + case resolver.Global: + c.add(Global, opcodeInt(index)) + case resolver.Local: + c.add(Local, opcodeInt(index)) + case resolver.Special: + c.add(Special, opcodeInt(index)) } case *ast.RegExpr: @@ -658,19 +725,20 @@ func (c *compiler) expr(expr ast.Expr) { op = Subtract } if e.Pre { - c.expr(e.Expr) + c.dupeIndexLValue(e.Expr) c.expr(&ast.NumExpr{1}) c.add(op) c.add(Dupe) + c.assignRoteIndex(e.Expr) } else { - c.expr(e.Expr) - c.expr(&ast.NumExpr{0}) + c.dupeIndexLValue(e.Expr) + c.expr(&ast.NumExpr{0}) // add 0 to coerce result to number c.add(Add) c.add(Dupe) c.expr(&ast.NumExpr{1}) c.add(op) + c.assignRoteIndex(e.Expr) } - c.assign(e.Expr) case *ast.AssignExpr: // Most AssignExpr (standalone) will be handled by the ExprStmt special case @@ -680,12 +748,22 @@ func (c *compiler) expr(expr ast.Expr) { case *ast.AugAssignExpr: // Most AugAssignExpr (standalone) will be handled by the ExprStmt special case - c.expr(e.Right) - c.expr(e.Left) - c.add(Swap) - c.binaryOp(e.Op) - c.add(Dupe) - c.assign(e.Left) + switch e.Left.(type) { + case *ast.FieldExpr, *ast.IndexExpr: + c.expr(e.Right) + c.dupeIndexLValue(e.Left) + c.add(Rote) + c.binaryOp(e.Op) + c.add(Dupe) + c.assignRoteIndex(e.Left) + case *ast.VarExpr: + c.expr(e.Right) + c.expr(e.Left) + c.add(Swap) + c.binaryOp(e.Op) + c.add(Dupe) + c.assign(e.Left) + } case *ast.CondExpr: jump := c.condition(e.Cond, true) @@ -698,24 +776,20 @@ func (c *compiler) expr(expr ast.Expr) { case *ast.IndexExpr: c.index(e.Index) - switch e.Array.Scope { - case ast.ScopeGlobal: - c.add(ArrayGlobal, opcodeInt(e.Array.Index)) - case ast.ScopeLocal: - c.add(ArrayLocal, opcodeInt(e.Array.Index)) - } + c.indexExpr(e) case *ast.CallExpr: // split and sub/gsub require special cases as they have lvalue arguments switch e.Func { case lexer.F_SPLIT: c.expr(e.Args[0]) - arrayExpr := e.Args[1].(*ast.ArrayExpr) + varExpr := e.Args[1].(*ast.VarExpr) // split()'s 2nd arg is always an array + scope, index := c.arrayInfo(varExpr.Name) if len(e.Args) > 2 { c.expr(e.Args[2]) - c.add(CallSplitSep, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index)) + c.add(CallSplitSep, Opcode(scope), opcodeInt(index)) } else { - c.add(CallSplit, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index)) + c.add(CallSplit, Opcode(scope), opcodeInt(index)) } return case lexer.F_SUB, lexer.F_GSUB: @@ -727,11 +801,38 @@ func (c *compiler) expr(expr ast.Expr) { if len(e.Args) == 3 { target = e.Args[2] } - c.expr(e.Args[0]) - c.expr(e.Args[1]) - c.expr(target) - c.add(CallBuiltin, Opcode(op)) - c.assign(target) + switch target.(type) { + case *ast.FieldExpr, *ast.IndexExpr: + c.dupeIndexLValue(target) + c.expr(e.Args[0]) + c.expr(e.Args[1]) + c.add(Rote) + c.add(CallBuiltin, Opcode(op)) + c.assignRoteIndex(target) + case *ast.VarExpr: + c.expr(e.Args[0]) + c.expr(e.Args[1]) + c.expr(target) + c.add(CallBuiltin, Opcode(op)) + c.assign(target) + } + return + + case lexer.F_LENGTH: + if len(e.Args) > 0 { + // Determine if the call is length(arrayVar) or length(stringExpr). + if varExpr, ok := e.Args[0].(*ast.VarExpr); ok { + scope, info, _ := c.resolved.LookupVar(c.funcName, varExpr.Name) + if info.Type == resolver.Array { + c.add(CallLengthArray, Opcode(scope), opcodeInt(info.Index)) + return + } + } + c.expr(e.Args[0]) + c.add(CallBuiltin, Opcode(BuiltinLengthArg)) + } else { + c.add(CallBuiltin, Opcode(BuiltinLength)) + } return } @@ -757,12 +858,6 @@ func (c *compiler) expr(expr ast.Expr) { c.add(CallBuiltin, Opcode(BuiltinIndex)) case lexer.F_INT: c.add(CallBuiltin, Opcode(BuiltinInt)) - case lexer.F_LENGTH: - if len(e.Args) > 0 { - c.add(CallBuiltin, Opcode(BuiltinLengthArg)) - } else { - c.add(CallBuiltin, Opcode(BuiltinLength)) - } case lexer.F_LOG: c.add(CallBuiltin, Opcode(BuiltinLog)) case lexer.F_MATCH: @@ -810,31 +905,30 @@ func (c *compiler) expr(expr ast.Expr) { case *ast.InExpr: c.index(e.Index) - switch e.Array.Scope { - case ast.ScopeGlobal: - c.add(InGlobal, opcodeInt(e.Array.Index)) + scope, index := c.arrayInfo(e.Array) + switch scope { + case resolver.Global: + c.add(InGlobal, opcodeInt(index)) default: // ScopeLocal - c.add(InLocal, opcodeInt(e.Array.Index)) + c.add(InLocal, opcodeInt(index)) } case *ast.UserCallExpr: - if e.Native { + funcInfo, _ := c.resolved.LookupFunc(e.Name) + if funcInfo.Native { for _, arg := range e.Args { c.expr(arg) } - c.add(CallNative, opcodeInt(e.Index), opcodeInt(len(e.Args))) - for len(c.program.nativeFuncNames) <= e.Index { - c.program.nativeFuncNames = append(c.program.nativeFuncNames, "") - } - c.program.nativeFuncNames[e.Index] = e.Name + c.add(CallNative, opcodeInt(funcInfo.Index), opcodeInt(len(e.Args))) } else { - f := c.program.Functions[e.Index] + f := c.program.Functions[funcInfo.Index] var arrayOpcodes []Opcode numScalarArgs := 0 for i, arg := range e.Args { if f.Arrays[i] { a := arg.(*ast.VarExpr) - arrayOpcodes = append(arrayOpcodes, Opcode(a.Scope), opcodeInt(a.Index)) + scope, index := c.arrayInfo(a.Name) + arrayOpcodes = append(arrayOpcodes, Opcode(scope), opcodeInt(index)) } else { c.expr(arg) numScalarArgs++ @@ -843,7 +937,7 @@ func (c *compiler) expr(expr ast.Expr) { if numScalarArgs < f.NumScalars { c.add(Nulls, opcodeInt(f.NumScalars-numScalarArgs)) } - c.add(CallUser, opcodeInt(e.Index), opcodeInt(len(arrayOpcodes)/2)) + c.add(CallUser, opcodeInt(funcInfo.Index), opcodeInt(len(arrayOpcodes)/2)) c.add(arrayOpcodes...) } @@ -862,30 +956,62 @@ func (c *compiler) expr(expr ast.Expr) { } switch target := e.Target.(type) { case *ast.VarExpr: - switch target.Scope { - case ast.ScopeGlobal: - c.add(GetlineGlobal, redirect(), opcodeInt(target.Index)) - case ast.ScopeLocal: - c.add(GetlineLocal, redirect(), opcodeInt(target.Index)) - case ast.ScopeSpecial: - c.add(GetlineSpecial, redirect(), opcodeInt(target.Index)) + scope, index := c.scalarInfo(target.Name) + switch scope { + case resolver.Global: + c.add(GetlineGlobal, redirect(), opcodeInt(index)) + case resolver.Local: + c.add(GetlineLocal, redirect(), opcodeInt(index)) + case resolver.Special: + c.add(GetlineSpecial, redirect(), opcodeInt(index)) } case *ast.FieldExpr: c.expr(target.Index) c.add(GetlineField, redirect()) case *ast.IndexExpr: c.index(target.Index) - c.add(GetlineArray, redirect(), Opcode(target.Array.Scope), opcodeInt(target.Array.Index)) + scope, index := c.arrayInfo(target.Array) + c.add(GetlineArray, redirect(), Opcode(scope), opcodeInt(index)) default: c.add(Getline, redirect()) } + case *ast.GroupingExpr: + c.expr(e.Expr) + default: // Should never happen panic(fmt.Sprintf("unexpected expr type: %T", expr)) } } +func (c *compiler) indexExpr(e *ast.IndexExpr) { + scope, index := c.arrayInfo(e.Array) + switch scope { + case resolver.Global: + c.add(ArrayGlobal, opcodeInt(index)) + case resolver.Local: + c.add(ArrayLocal, opcodeInt(index)) + } +} + +// Compile an lvalue expression, but Dupe the index for applicable expr types +// so it can be used later for assignIndexExpr (without evaluating it again). +func (c *compiler) dupeIndexLValue(expr ast.Expr) { + switch e := expr.(type) { + case *ast.VarExpr: + c.expr(expr) // VarExpr has no index, so Dupe is not needed + case *ast.FieldExpr: + c.expr(e.Index) + c.add(Dupe) + c.add(Field) + case *ast.IndexExpr: + c.index(e.Index) + c.add(Dupe) + c.indexExpr(e) + } +} + // Generate a Concat opcode or, if possible, compact multiple Concats into one // ConcatMulti opcode. func (c *compiler) concatOp(expr *ast.BinaryExpr) { @@ -997,6 +1123,13 @@ func (c *compiler) binaryOp(op lexer.Token) { // Generate an array index, handling multi-indexes properly. func (c *compiler) index(index []ast.Expr) { for _, expr := range index { + if e, ok := expr.(*ast.NumExpr); ok && e.Value == float64(int(e.Value)) { + // If index expression is integer constant, optimize to string "n" + // to avoid toString() at runtime. + s := strconv.Itoa(int(e.Value)) + c.expr(&ast.StrExpr{Value: s}) + continue + } c.expr(expr) } if len(index) > 1 { diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/disassembler.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/disassembler.go index d5dc959..8f944a3 100644 --- a/play/vendor/github.com/benhoyt/goawk/internal/compiler/disassembler.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/disassembler.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/resolver" "github.com/benhoyt/goawk/lexer" ) @@ -200,12 +201,12 @@ func (d *disassembler) disassemble(prefix string) error { d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex)) case Delete: - arrayScope := ast.VarScope(d.fetch()) + arrayScope := resolver.Scope(d.fetch()) arrayIndex := int(d.fetch()) d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex)) case DeleteAll: - arrayScope := ast.VarScope(d.fetch()) + arrayScope := resolver.Scope(d.fetch()) arrayIndex := int(d.fetch()) d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex)) @@ -316,9 +317,9 @@ func (d *disassembler) disassemble(prefix string) error { d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset)) case ForIn: - varScope := ast.VarScope(d.fetch()) + varScope := resolver.Scope(d.fetch()) varIndex := int(d.fetch()) - arrayScope := ast.VarScope(d.fetch()) + arrayScope := resolver.Scope(d.fetch()) arrayIndex := int(d.fetch()) offset := d.fetch() d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset)) @@ -327,13 +328,18 @@ func (d *disassembler) disassemble(prefix string) error { builtinOp := BuiltinOp(d.fetch()) d.writeOpf("CallBuiltin %s", builtinOp) + case CallLengthArray: + arrayScope := resolver.Scope(d.fetch()) + arrayIndex := int(d.fetch()) + d.writeOpf("CallLengthArray %s", d.arrayName(arrayScope, arrayIndex)) + case CallSplit: - arrayScope := ast.VarScope(d.fetch()) + arrayScope := resolver.Scope(d.fetch()) arrayIndex := int(d.fetch()) d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex)) case CallSplitSep: - arrayScope := ast.VarScope(d.fetch()) + arrayScope := resolver.Scope(d.fetch()) arrayIndex := int(d.fetch()) d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex)) @@ -346,7 +352,7 @@ func (d *disassembler) disassemble(prefix string) error { numArrayArgs := int(d.fetch()) var arrayArgs []string for i := 0; i < numArrayArgs; i++ { - arrayScope := ast.VarScope(d.fetch()) + arrayScope := resolver.Scope(d.fetch()) arrayIndex := int(d.fetch()) arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex)) } @@ -404,7 +410,7 @@ func (d *disassembler) disassemble(prefix string) error { case GetlineArray: redirect := lexer.Token(d.fetch()) - arrayScope := ast.VarScope(d.fetch()) + arrayScope := resolver.Scope(d.fetch()) arrayIndex := int(d.fetch()) d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex)) @@ -443,13 +449,13 @@ func (d *disassembler) writeOpf(format string, args ...interface{}) { } // Return the scalar variable name described by scope and index. -func (d *disassembler) varName(scope ast.VarScope, index int) string { +func (d *disassembler) varName(scope resolver.Scope, index int) string { switch scope { - case ast.ScopeGlobal: + case resolver.Global: return d.program.scalarNames[index] - case ast.ScopeLocal: + case resolver.Local: return d.localName(index) - default: // ScopeSpecial + default: // resolver.Special return ast.SpecialVarName(index) } } @@ -471,8 +477,8 @@ func (d *disassembler) localName(index int) string { } // Return the array variable name describes by scope and index. -func (d *disassembler) arrayName(scope ast.VarScope, index int) string { - if scope == ast.ScopeLocal { +func (d *disassembler) arrayName(scope resolver.Scope, index int) string { + if scope == resolver.Local { return d.localArrayName(index) } return d.program.arrayNames[index] diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcode_string.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcode_string.go index bfa2f0c..030e8b3 100644 --- a/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcode_string.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcode_string.go @@ -14,95 +14,99 @@ func _() { _ = x[Dupe-3] _ = x[Drop-4] _ = x[Swap-5] - _ = x[Field-6] - _ = x[FieldInt-7] - _ = x[FieldByName-8] - _ = x[FieldByNameStr-9] - _ = x[Global-10] - _ = x[Local-11] - _ = x[Special-12] - _ = x[ArrayGlobal-13] - _ = x[ArrayLocal-14] - _ = x[InGlobal-15] - _ = x[InLocal-16] - _ = x[AssignField-17] - _ = x[AssignGlobal-18] - _ = x[AssignLocal-19] - _ = x[AssignSpecial-20] - _ = x[AssignArrayGlobal-21] - _ = x[AssignArrayLocal-22] - _ = x[Delete-23] - _ = x[DeleteAll-24] - _ = x[IncrField-25] - _ = x[IncrGlobal-26] - _ = x[IncrLocal-27] - _ = x[IncrSpecial-28] - _ = x[IncrArrayGlobal-29] - _ = x[IncrArrayLocal-30] - _ = x[AugAssignField-31] - _ = x[AugAssignGlobal-32] - _ = x[AugAssignLocal-33] - _ = x[AugAssignSpecial-34] - _ = x[AugAssignArrayGlobal-35] - _ = x[AugAssignArrayLocal-36] - _ = x[Regex-37] - _ = x[IndexMulti-38] - _ = x[ConcatMulti-39] - _ = x[Add-40] - _ = x[Subtract-41] - _ = x[Multiply-42] - _ = x[Divide-43] - _ = x[Power-44] - _ = x[Modulo-45] - _ = x[Equals-46] - _ = x[NotEquals-47] - _ = x[Less-48] - _ = x[Greater-49] - _ = x[LessOrEqual-50] - _ = x[GreaterOrEqual-51] - _ = x[Concat-52] - _ = x[Match-53] - _ = x[NotMatch-54] - _ = x[Not-55] - _ = x[UnaryMinus-56] - _ = x[UnaryPlus-57] - _ = x[Boolean-58] - _ = x[Jump-59] - _ = x[JumpFalse-60] - _ = x[JumpTrue-61] - _ = x[JumpEquals-62] - _ = x[JumpNotEquals-63] - _ = x[JumpLess-64] - _ = x[JumpGreater-65] - _ = x[JumpLessOrEqual-66] - _ = x[JumpGreaterOrEqual-67] - _ = x[Next-68] - _ = x[Exit-69] - _ = x[ForIn-70] - _ = x[BreakForIn-71] - _ = x[CallBuiltin-72] - _ = x[CallSplit-73] - _ = x[CallSplitSep-74] - _ = x[CallSprintf-75] - _ = x[CallUser-76] - _ = x[CallNative-77] - _ = x[Return-78] - _ = x[ReturnNull-79] - _ = x[Nulls-80] - _ = x[Print-81] - _ = x[Printf-82] - _ = x[Getline-83] - _ = x[GetlineField-84] - _ = x[GetlineGlobal-85] - _ = x[GetlineLocal-86] - _ = x[GetlineSpecial-87] - _ = x[GetlineArray-88] - _ = x[EndOpcode-89] + _ = x[Rote-6] + _ = x[Field-7] + _ = x[FieldInt-8] + _ = x[FieldByName-9] + _ = x[FieldByNameStr-10] + _ = x[Global-11] + _ = x[Local-12] + _ = x[Special-13] + _ = x[ArrayGlobal-14] + _ = x[ArrayLocal-15] + _ = x[InGlobal-16] + _ = x[InLocal-17] + _ = x[AssignField-18] + _ = x[AssignGlobal-19] + _ = x[AssignLocal-20] + _ = x[AssignSpecial-21] + _ = x[AssignArrayGlobal-22] + _ = x[AssignArrayLocal-23] + _ = x[Delete-24] + _ = x[DeleteAll-25] + _ = x[IncrField-26] + _ = x[IncrGlobal-27] + _ = x[IncrLocal-28] + _ = x[IncrSpecial-29] + _ = x[IncrArrayGlobal-30] + _ = x[IncrArrayLocal-31] + _ = x[AugAssignField-32] + _ = x[AugAssignGlobal-33] + _ = x[AugAssignLocal-34] + _ = x[AugAssignSpecial-35] + _ = x[AugAssignArrayGlobal-36] + _ = x[AugAssignArrayLocal-37] + _ = x[Regex-38] + _ = x[IndexMulti-39] + _ = x[ConcatMulti-40] + _ = x[Add-41] + _ = x[Subtract-42] + _ = x[Multiply-43] + _ = x[Divide-44] + _ = x[Power-45] + _ = x[Modulo-46] + _ = x[Equals-47] + _ = x[NotEquals-48] + _ = x[Less-49] + _ = x[Greater-50] + _ = x[LessOrEqual-51] + _ = x[GreaterOrEqual-52] + _ = x[Concat-53] + _ = x[Match-54] + _ = x[NotMatch-55] + _ = x[Not-56] + _ = x[UnaryMinus-57] + _ = x[UnaryPlus-58] + _ = x[Boolean-59] + _ = x[Jump-60] + _ = x[JumpFalse-61] + _ = x[JumpTrue-62] + _ = x[JumpEquals-63] + _ = x[JumpNotEquals-64] + _ = x[JumpLess-65] + _ = x[JumpGreater-66] + _ = x[JumpLessOrEqual-67] + _ = x[JumpGreaterOrEqual-68] + _ = x[Next-69] + _ = x[Nextfile-70] + _ = x[Exit-71] + _ = x[ExitStatus-72] + _ = x[ForIn-73] + _ = x[BreakForIn-74] + _ = x[CallBuiltin-75] + _ = x[CallLengthArray-76] + _ = x[CallSplit-77] + _ = x[CallSplitSep-78] + _ = x[CallSprintf-79] + _ = x[CallUser-80] + _ = x[CallNative-81] + _ = x[Return-82] + _ = x[ReturnNull-83] + _ = x[Nulls-84] + _ = x[Print-85] + _ = x[Printf-86] + _ = x[Getline-87] + _ = x[GetlineField-88] + _ = x[GetlineGlobal-89] + _ = x[GetlineLocal-90] + _ = x[GetlineSpecial-91] + _ = x[GetlineArray-92] + _ = x[EndOpcode-93] } -const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode" +const _Opcode_name = "NopNumStrDupeDropSwapRoteFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextNextfileExitExitStatusForInBreakForInCallBuiltinCallLengthArrayCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode" -var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826} +var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 25, 30, 38, 49, 63, 69, 74, 81, 92, 102, 110, 117, 128, 140, 151, 164, 181, 197, 203, 212, 221, 231, 240, 251, 266, 280, 294, 309, 323, 339, 359, 378, 383, 393, 404, 407, 415, 423, 429, 434, 440, 446, 455, 459, 466, 477, 491, 497, 502, 510, 513, 523, 532, 539, 543, 552, 560, 570, 583, 591, 602, 617, 635, 639, 647, 651, 661, 666, 676, 687, 702, 711, 723, 734, 742, 752, 758, 768, 773, 778, 784, 791, 803, 816, 828, 842, 854, 863} func (i Opcode) String() string { if i < 0 || i >= Opcode(len(_Opcode_index)-1) { diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcodes.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcodes.go index 36c4c93..da9ee6c 100644 --- a/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcodes.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcodes.go @@ -1,6 +1,6 @@ package compiler -//go:generate go run golang.org/x/tools/cmd/stringer@v0.1.8 -type=Opcode,AugOp,BuiltinOp +//go:generate go run golang.org/x/tools/cmd/stringer@v0.10.0 -type=Opcode,AugOp,BuiltinOp // Opcode represents a single virtual machine instruction (or argument). The // comments beside each opcode show any arguments that instruction consumes. @@ -22,6 +22,7 @@ const ( Dupe Drop Swap + Rote // Fetch a field, variable, or array item Field @@ -107,15 +108,18 @@ const ( JumpLessOrEqual // offset JumpGreaterOrEqual // offset Next + Nextfile Exit + ExitStatus ForIn // varScope varIndex arrayScope arrayIndex offset BreakForIn // Builtin functions - CallBuiltin // builtinOp - CallSplit // arrayScope arrayIndex - CallSplitSep // arrayScope arrayIndex - CallSprintf // numArgs + CallBuiltin // builtinOp + CallLengthArray // arrayScope arrayIndex + CallSplit // arrayScope arrayIndex + CallSplitSep // arrayScope arrayIndex + CallSprintf // numArgs // User and native functions CallUser // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...] diff --git a/play/vendor/github.com/benhoyt/goawk/internal/resolver/resolve.go b/play/vendor/github.com/benhoyt/goawk/internal/resolver/resolve.go new file mode 100644 index 0000000..23f7efe --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/resolver/resolve.go @@ -0,0 +1,532 @@ +// Package resolver assigns integer indexes to functions and variables, as +// well as determining and checking their types (scalar or array). +package resolver + +import ( + "fmt" + "io" + "reflect" + "sort" + "strings" + + "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/lexer" +) + +// ResolvedProgram is a parsed AWK program plus variable scope and type data +// prepared by the resolver that is needed for subsequent interpretation. +type ResolvedProgram struct { + ast.Program + resolver *resolver +} + +// LookupVar looks up a (possibly-local) variable by function name and +// variable name, returning its scope, info, and whether it exists. +func (r *ResolvedProgram) LookupVar(funcName, name string) (Scope, VarInfo, bool) { + scope, info, _, exists := r.resolver.lookupVar(funcName, name) + return scope, info, exists +} + +// IterVars iterates over the variables from the given function ("" to iterate +// globals), calling f for each variable. +func (r *ResolvedProgram) IterVars(funcName string, f func(name string, info VarInfo)) { + for name, info := range r.resolver.varInfo[funcName] { + f(name, info) + } +} + +// LookupFunc looks up a function by name, returning its info and whether it +// exists. +func (r *ResolvedProgram) LookupFunc(name string) (FuncInfo, bool) { + info, ok := r.resolver.funcInfo[name] + return info, ok +} + +// IterFuncs iterates over all the functions, including native (Go-defined) +// ones, calling f for each function. +func (r *ResolvedProgram) IterFuncs(f func(name string, info FuncInfo)) { + for name, info := range r.resolver.funcInfo { + f(name, info) + } +} + +// VarInfo holds resolved information about a variable. +type VarInfo struct { + Type Type + Index int +} + +// FuncInfo holds resolved information about a function. +type FuncInfo struct { + Native bool // true if function is a native (Go-defined) function + Index int + Params []string // list of parameter names +} + +// Scope represents the scope of a variable. +type Scope int + +const ( + Local Scope = iota + 1 // locals (function parameters) + Special // special variables (such as NF) + Global // globals +) + +func (s Scope) String() string { + switch s { + case Local: + return "local" + case Special: + return "special" + case Global: + return "global" + default: + return "unknown scope" + } +} + +// Type represents the type of a variable: scalar or array. +type Type int + +const ( + unknown Type = iota + Scalar + Array +) + +func (t Type) String() string { + switch t { + case Scalar: + return "scalar" + case Array: + return "array" + default: + return "unknown type" + } +} + +// Config holds resolver configuration. +type Config struct { + // Enable printing of type information + DebugTypes bool + + // io.Writer to print type information on (for example, os.Stderr) + DebugWriter io.Writer + + // Map of named Go functions to allow calling from AWK. See docs + // on interp.Config.Funcs for details. + Funcs map[string]interface{} +} + +// Resolve assigns integer indexes to functions and variables, as well as +// determining and checking their types (scalar or array). +func Resolve(prog *ast.Program, config *Config) *ResolvedProgram { + if config == nil { + config = &Config{} + } + + // Assign indexes to native (Go-defined) functions, in order of name. + // Do this before our first pass, so that AWK-defined functions override + // Go-defined ones and take precedence. + funcInfo := make(map[string]FuncInfo) + var nativeNames []string + for name := range config.Funcs { + nativeNames = append(nativeNames, name) + } + sort.Strings(nativeNames) + for i, name := range nativeNames { + funcInfo[name] = FuncInfo{Native: true, Index: i} + } + + // First pass determines call graph so we can process functions in + // topological order: e.g., if f() calls g(), process g first, then f. + callGraph := callGraphVisitor{ + calls: make(map[string]map[string]struct{}), + funcs: make(map[string]*ast.Function), + funcInfo: funcInfo, + } + ast.Walk(&callGraph, prog) + orderedFuncs := topoSort(callGraph.calls) + + // Ensure functions that weren't called are added to the orderedFuncs list + // (order of those doesn't matter, so add them at the end). + called := make(map[string]struct{}, len(orderedFuncs)) + for _, name := range orderedFuncs { + called[name] = struct{}{} + } + for name := range callGraph.funcs { + if _, ok := called[name]; !ok { + orderedFuncs = append(orderedFuncs, name) + } + } + + // Define the local variable names (we don't know their types yet). + varInfo := make(map[string]map[string]VarInfo) + for funcName, info := range funcInfo { + if info.Native { + continue + } + varInfo[funcName] = make(map[string]VarInfo) + for _, param := range info.Params { + varInfo[funcName][param] = VarInfo{} + } + } + + // Create our type resolver. + r := resolver{varInfo: varInfo, funcInfo: funcInfo, funcs: callGraph.funcs} + r.varInfo[""] = make(map[string]VarInfo) // func of "" stores global vars + + // Interpreter relies on ARGV and other built-in arrays being present. + r.recordVar("", "ARGV", Array, lexer.Position{1, 1}) + r.recordVar("", "ENVIRON", Array, lexer.Position{1, 1}) + r.recordVar("", "FIELDS", Array, lexer.Position{1, 1}) + + // Main resolver pass: determine types of variables and find function + // information. Can't call ast.Walk on prog directly, as it will not + // iterate through functions in topological (call graph) order. + main := mainVisitor{r: &r, nativeFuncs: config.Funcs} + updates := r.updates + main.walkOrdered(prog, orderedFuncs) + + // Do additional passes while we're still making type updates. Topological + // sorting takes care of ordinary call graphs, but additional passes are + // needed for at least these two cases: + // + // 1. Functions which don't use their parameters, such as f1's A parameter + // in this example: + // + // function f1(A) {} function f2(x, A) { x[0]; f1(a); f2(a) } + // + // 2. For complex mutually-recursive functions, such as this example: + // + // function f1(a) { if (0) f5(z1); f2(a) } + // function f2(b) { if (0) f4(z2); f3(b) } + // function f3(c) { if (0) f3(z3); f4(c) } + // function f4(d) { if (0) f2(z4); f5(d) } + // function f5(i) { if (0) f1(z5); i[1]=42 } + // BEGIN { x[1]=3; f5(x); print x[1] } + // + // Limit it to a sensible maximum number of iterations that almost + // certainly won't happen in the real world. + for i := 0; r.updates != updates; i++ { + updates = r.updates + main.walkOrdered(prog, orderedFuncs) + if i >= 100 { + panic(ast.PosErrorf(lexer.Position{1, 1}, + "too many iterations trying to resolve variable types")) + } + } + + // For any variables that are still unknown, set their type to scalar. + // This can happen for unused variables, such as in the following: + // { f(z) } function f(x) { print NR } + for _, infos := range r.varInfo { + for varName, info := range infos { + if info.Type == unknown { + infos[varName] = VarInfo{Type: Scalar, Index: info.Index} + } + } + } + + // Assign indexes to globals and locals (separate for scalars and arrays). + for funcName, infos := range r.varInfo { + var names []string + if funcName == "" { + // For global vars, order indexes by name. + for name := range infos { + names = append(names, name) + } + sort.Strings(names) + } else { + // For local vars, order indexes by parameter order. + names = r.funcInfo[funcName].Params + } + scalar := 0 + array := 0 + for _, name := range names { + info := infos[name] + if info.Type == Array { + infos[name] = VarInfo{Type: info.Type, Index: array} + array++ + } else { + infos[name] = VarInfo{Type: info.Type, Index: scalar} + scalar++ + } + } + } + + if config.DebugTypes { + printVarTypes(config.DebugWriter, r.varInfo, r.funcInfo) + } + + return &ResolvedProgram{ + Program: *prog, + resolver: &r, + } +} + +// Print variable type information (for debugging) on given writer. +func printVarTypes(w io.Writer, varInfo map[string]map[string]VarInfo, funcInfo map[string]FuncInfo) { + var funcNames []string + for funcName := range varInfo { + funcNames = append(funcNames, funcName) + } + sort.Strings(funcNames) + for _, funcName := range funcNames { + if funcName != "" { + info := funcInfo[funcName] + fmt.Fprintf(w, "function %s(%s) # index %d\n", + funcName, strings.Join(info.Params, ", "), info.Index) + } else { + fmt.Fprintln(w, "globals") + } + var varNames []string + for name := range varInfo[funcName] { + varNames = append(varNames, name) + } + sort.Strings(varNames) + for _, name := range varNames { + info := varInfo[funcName][name] + fmt.Fprintf(w, " %s: %s %d\n", name, info.Type, info.Index) + } + } +} + +// resolver tracks variable scopes and types as well as function information. +type resolver struct { + varInfo map[string]map[string]VarInfo + funcInfo map[string]FuncInfo + funcs map[string]*ast.Function + updates int +} + +// Look up variable from function funcName and return its scope and type +// information, the function it was defined in, and whether it exists. +func (r *resolver) lookupVar(funcName, varName string) (scope Scope, info VarInfo, varFunc string, exists bool) { + // If inside a function, try looking for a local variable first. + if funcName != "" { + if info, exists = r.varInfo[funcName][varName]; exists { + return Local, info, funcName, true + } + } + // Next try looking for a special variable (such as NR). + index := ast.SpecialVarIndex(varName) + if index > 0 { + // Special variables are all scalar (ARGV and similar are done as + // regular arrays). + return Special, VarInfo{Type: Scalar, Index: index}, "", true + } + // Then try looking for a global variable. + if info, exists = r.varInfo[""][varName]; exists { + return Global, info, "", true + } + return 0, VarInfo{}, "", false // not defined at all +} + +// Record that the given variable (in function funcName) is of the given type. +func (r *resolver) recordVar(funcName, varName string, typ Type, pos lexer.Position) { + _, info, varFunc, exists := r.lookupVar(funcName, varName) + if !exists { + // Doesn't exist as a local or a global, add it as a new global. + r.varInfo[""][varName] = VarInfo{Type: typ} + r.updates++ + if _, isFunc := r.funcs[varName]; isFunc { + panic(ast.PosErrorf(pos, "global var %q can't also be a function", varName)) + } + return + } + if info.Type != typ && info.Type != unknown && typ != unknown { + panic(ast.PosErrorf(pos, "can't use %s %q as %s", info.Type, varName, typ)) + } + if info.Type == unknown && typ != unknown { + r.varInfo[varFunc][varName] = VarInfo{Type: typ, Index: info.Index} + r.updates++ + } +} + +// callGraphVisitor records what functions are called by the current function +// to build our call graph. +type callGraphVisitor struct { + calls map[string]map[string]struct{} // map of current function to called function + funcs map[string]*ast.Function + funcInfo map[string]FuncInfo + curFunc string +} + +func (v *callGraphVisitor) Visit(node ast.Node) ast.Visitor { + switch n := node.(type) { + case *ast.Function: + if _, ok := v.funcs[n.Name]; ok { + panic(ast.PosErrorf(n.Pos, "function %q already defined", n.Name)) + } + v.funcInfo[n.Name] = FuncInfo{Index: len(v.funcs), Params: n.Params} + v.funcs[n.Name] = n + v.curFunc = n.Name + ast.WalkStmtList(v, n.Body) + v.curFunc = "" + + case *ast.UserCallExpr: + if _, ok := v.calls[v.curFunc]; !ok { + v.calls[v.curFunc] = make(map[string]struct{}) + } + v.calls[v.curFunc][n.Name] = struct{}{} + ast.WalkExprList(v, n.Args) + + default: + return v + } + return nil +} + +// mainVisitor records types of variables and performs various checks. +type mainVisitor struct { + r *resolver + nativeFuncs map[string]interface{} + curFunc string +} + +// Walk prog's AST, with functions walked as ordered by orderedFuncs. +func (v *mainVisitor) walkOrdered(prog *ast.Program, orderedFuncs []string) { + for _, funcName := range orderedFuncs { + if funcName == "" { + continue // BEGIN, END, and actions are processed below + } + function, exists := v.r.funcs[funcName] + if !exists { + // Happens in the case where someone tries to call a local + // variable as a function: function f(x) { x() }. That is checked + // and flagged as an error in the visitor. + continue + } + v.curFunc = funcName + ast.WalkStmtList(v, function.Body) + v.curFunc = "" + } + for _, stmts := range prog.Begin { + ast.WalkStmtList(v, stmts) + } + for _, action := range prog.Actions { + ast.Walk(v, action) + } + for _, stmts := range prog.End { + ast.WalkStmtList(v, stmts) + } +} + +func (v *mainVisitor) Visit(node ast.Node) ast.Visitor { + switch n := node.(type) { + case *ast.VarExpr: + v.r.recordVar(v.curFunc, n.Name, Scalar, n.Pos) + + case *ast.ForInStmt: + v.r.recordVar(v.curFunc, n.Var, Scalar, n.VarPos) + v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos) + ast.WalkStmtList(v, n.Body) + + case *ast.IndexExpr: + ast.WalkExprList(v, n.Index) + v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos) + + case *ast.InExpr: + ast.WalkExprList(v, n.Index) + v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos) + + case *ast.DeleteStmt: + v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos) + ast.WalkExprList(v, n.Index) + + case *ast.CallExpr: + switch n.Func { + case lexer.F_SPLIT: + ast.Walk(v, n.Args[0]) + varExpr := n.Args[1].(*ast.VarExpr) // split()'s 2nd arg is always an array + v.r.recordVar(v.curFunc, varExpr.Name, Array, varExpr.Pos) + ast.WalkExprList(v, n.Args[2:]) + + case lexer.F_LENGTH: + if len(n.Args) > 0 { + if varExpr, ok := n.Args[0].(*ast.VarExpr); ok { + // In a call to length(x), x may be a scalar or an array, + // so set it to unknown for now. + v.r.recordVar(v.curFunc, varExpr.Name, unknown, varExpr.Pos) + return nil + } + } + ast.WalkExprList(v, n.Args) + + default: + ast.WalkExprList(v, n.Args) + } + + case *ast.UserCallExpr: + _, _, varFunc, exists := v.r.lookupVar(v.curFunc, n.Name) + if varFunc != "" && exists { + panic(ast.PosErrorf(n.Pos, "can't call local variable %q as function", n.Name)) + } + + funcInfo, exists := v.r.funcInfo[n.Name] + if !exists { + panic(ast.PosErrorf(n.Pos, "undefined function %q", n.Name)) + } + + numParams := len(funcInfo.Params) + if funcInfo.Native { + typ := reflect.TypeOf(v.nativeFuncs[n.Name]) + numParams = typ.NumIn() + if typ.IsVariadic() { + numParams = 1000000000 // bigger than any reasonable len(n.Args) value! + } + } + if len(n.Args) > numParams { + panic(ast.PosErrorf(n.Pos, "%q called with more arguments than declared", n.Name)) + } + + for i, arg := range n.Args { + varExpr, ok := arg.(*ast.VarExpr) + if !ok { + // Argument is not a variable, process normally. + if !funcInfo.Native { + paramInfo := v.r.varInfo[n.Name][funcInfo.Params[i]] // type info of corresponding parameter + if paramInfo.Type == Array { + panic(ast.PosErrorf(n.Pos, "can't pass scalar %s as array param", arg)) + } + } + ast.Walk(v, arg) + continue + } + + if funcInfo.Native { + // Arguments to native function can only be scalar. + v.r.recordVar(v.curFunc, varExpr.Name, Scalar, varExpr.Pos) + continue + } + + // Variable passed to AWK-defined function may be scalar or array, + // determine from how it was used elsewhere. + paramName := funcInfo.Params[i] // name of corresponding parameter + paramInfo := v.r.varInfo[n.Name][paramName] // type info of parameter + _, varInfo, _, _ := v.r.lookupVar(v.curFunc, varExpr.Name) + switch { + case varInfo.Type == unknown && paramInfo.Type != unknown: + // Variable's type is not known but param type is, set variable type. + v.r.recordVar(v.curFunc, varExpr.Name, paramInfo.Type, varExpr.Pos) + case varInfo.Type != unknown && paramInfo.Type == unknown: + // Variable's type is known but param type is not, set param type. + funcPos := v.r.funcs[n.Name].Pos // best position we have at this point + v.r.recordVar(n.Name, paramName, varInfo.Type, funcPos) + case varInfo.Type != paramInfo.Type && varInfo.Type != unknown && paramInfo.Type != unknown: + // Both types are known but don't match -- type error! + panic(ast.PosErrorf(varExpr.Pos, "can't pass %s %q as %s param", + varInfo.Type, varExpr.Name, paramInfo.Type)) + default: + // Ensure variable references are recorded, even if the type + // is not yet known. + v.r.recordVar(v.curFunc, varExpr.Name, unknown, varExpr.Pos) + } + } + + default: + return v + } + return nil +} diff --git a/play/vendor/github.com/benhoyt/goawk/parser/toposort.go b/play/vendor/github.com/benhoyt/goawk/internal/resolver/toposort.go similarity index 98% rename from play/vendor/github.com/benhoyt/goawk/parser/toposort.go rename to play/vendor/github.com/benhoyt/goawk/internal/resolver/toposort.go index 90b71fa..745b012 100644 --- a/play/vendor/github.com/benhoyt/goawk/parser/toposort.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/resolver/toposort.go @@ -1,6 +1,6 @@ // Topological sorting -package parser +package resolver /* This algorithm is taken from: diff --git a/play/vendor/github.com/benhoyt/goawk/interp/functions.go b/play/vendor/github.com/benhoyt/goawk/interp/functions.go index 4eff792..8e12547 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/functions.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/functions.go @@ -3,6 +3,7 @@ package interp import ( + "bufio" "bytes" "errors" "fmt" @@ -12,7 +13,7 @@ import ( "strings" "unicode/utf8" - "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/resolver" . "github.com/benhoyt/goawk/lexer" ) @@ -242,15 +243,34 @@ func validNativeType(typ reflect.Type) bool { } // Guts of the split() function -func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) { +func (p *interp) split(s string, scope resolver.Scope, index int, fs string, mode IOMode) (int, error) { var parts []string - if fs == " " { + switch { + case mode == CSVMode || mode == TSVMode: + // Set up for parsing a CSV/TSV record + splitter := csvSplitter{ + separator: p.csvInputConfig.Separator, + sepLen: utf8.RuneLen(p.csvInputConfig.Separator), + comment: p.csvInputConfig.Comment, + fields: &parts, + } + scanner := bufio.NewScanner(strings.NewReader(s)) + scanner.Split(splitter.scan) + if p.splitBuffer == nil { + p.splitBuffer = make([]byte, inputBufSize) + } + scanner.Buffer(p.splitBuffer, maxRecordLength) + + // Parse one record. Errors shouldn't happen, but if there is one, + // len(parts) will be 0. + scanner.Scan() + case fs == " ": parts = strings.Fields(s) - } else if s == "" { + case s == "": // Leave parts 0 length on empty string - } else if utf8.RuneCountInString(fs) <= 1 { + case utf8.RuneCountInString(fs) <= 1: parts = strings.Split(s, fs) - } else { + default: re, err := p.compileRegex(fs) if err != nil { return 0, err diff --git a/play/vendor/github.com/benhoyt/goawk/interp/interp.go b/play/vendor/github.com/benhoyt/goawk/interp/interp.go index af97cf1..7f5c8c3 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/interp.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/interp.go @@ -17,11 +17,9 @@ import ( "errors" "fmt" "io" - "io/ioutil" "math" "math/rand" "os" - "os/exec" "regexp" "runtime" "strconv" @@ -30,13 +28,15 @@ import ( "github.com/benhoyt/goawk/internal/ast" "github.com/benhoyt/goawk/internal/compiler" + "github.com/benhoyt/goawk/internal/resolver" "github.com/benhoyt/goawk/parser" ) var ( - errExit = errors.New("exit") - errBreak = errors.New("break") - errNext = errors.New("next") + errExit = errors.New("exit") + errBreak = errors.New("break") + errNext = errors.New("next") + errNextfile = errors.New("nextfile") errCSVSeparator = errors.New("invalid CSV field separator or comment delimiter") @@ -79,25 +79,27 @@ type interp struct { hadFiles bool input io.Reader inputBuffer []byte - inputStreams map[string]io.ReadCloser - outputStreams map[string]io.WriteCloser - commands map[string]*exec.Cmd + inputStreams map[string]inputStream + outputStreams map[string]outputStream noExec bool noFileWrites bool noFileReads bool shellCommand []string csvOutput *bufio.Writer noArgVars bool + splitBuffer []byte // Scalars, arrays, and function state - globals []value - stack []value - sp int - frame []value - arrays []map[string]value - localArrays [][]int - callDepth int - nativeFuncs []nativeFunc + globals []value + stack []value + sp int + frame []value + arrays []map[string]value + localArrays [][]int + callDepth int + nativeFuncs []nativeFunc + scalarIndexes map[string]int + arrayIndexes map[string]int // File, line, and field handling filename value @@ -256,8 +258,8 @@ type Config struct { // You can also enable CSV or TSV input mode by setting INPUTMODE to "csv" // or "tsv" in Vars or in the BEGIN block (those override this setting). // - // For further documentation about GoAWK's CSV support, see the full docs: - // https://github.com/benhoyt/goawk/blob/master/csv.md + // For further documentation about GoAWK's CSV support, see the full docs + // in "../docs/csv.md". InputMode IOMode // Additional options if InputMode is CSVMode or TSVMode. The zero value @@ -358,10 +360,19 @@ func newInterp(program *parser.Program) *interp { } // Allocate memory for variables and virtual machine stack - p.globals = make([]value, len(program.Scalars)) + p.scalarIndexes = make(map[string]int) + p.arrayIndexes = make(map[string]int) + program.IterVars("", func(name string, info resolver.VarInfo) { + if info.Type == resolver.Array { + p.arrayIndexes[name] = info.Index + } else { + p.scalarIndexes[name] = info.Index + } + }) + p.globals = make([]value, len(p.scalarIndexes)) p.stack = make([]value, initialStackSize) - p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize) - for i := 0; i < len(program.Arrays); i++ { + p.arrays = make([]map[string]value, len(p.arrayIndexes), len(p.arrayIndexes)+initialStackSize) + for i := 0; i < len(p.arrayIndexes); i++ { p.arrays[i] = make(map[string]value) } @@ -379,9 +390,8 @@ func newInterp(program *parser.Program) *interp { p.outputRecordSep = "\n" p.subscriptSep = "\x1c" - p.inputStreams = make(map[string]io.ReadCloser) - p.outputStreams = make(map[string]io.WriteCloser) - p.commands = make(map[string]*exec.Cmd) + p.inputStreams = make(map[string]inputStream) + p.outputStreams = make(map[string]outputStream) p.scanners = make(map[string]*bufio.Scanner) return p @@ -433,11 +443,11 @@ func (p *interp) setExecuteConfig(config *Config) error { } // Set up ARGV and other variables from config - argvIndex := p.program.Arrays["ARGV"] - p.setArrayValue(ast.ScopeGlobal, argvIndex, "0", str(config.Argv0)) + argvIndex := p.arrayIndexes["ARGV"] + p.setArrayValue(resolver.Global, argvIndex, "0", str(config.Argv0)) p.argc = len(config.Args) + 1 for i, arg := range config.Args { - p.setArrayValue(ast.ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg)) + p.setArrayValue(resolver.Global, argvIndex, strconv.Itoa(i+1), numStr(arg)) } p.noArgVars = config.NoArgVars p.filenameIndex = 1 @@ -460,16 +470,16 @@ func (p *interp) setExecuteConfig(config *Config) error { } // Set up ENVIRON from config or environment variables - environIndex := p.program.Arrays["ENVIRON"] + environIndex := p.arrayIndexes["ENVIRON"] if config.Environ != nil { for i := 0; i < len(config.Environ); i += 2 { - p.setArrayValue(ast.ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1])) + p.setArrayValue(resolver.Global, environIndex, config.Environ[i], numStr(config.Environ[i+1])) } } else { for _, kv := range os.Environ() { eq := strings.IndexByte(kv, '=') if eq >= 0 { - p.setArrayValue(ast.ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:])) + p.setArrayValue(resolver.Global, environIndex, kv[:eq], numStr(kv[eq+1:])) } } } @@ -548,7 +558,7 @@ func (p *interp) executeAll() (int, error) { } return 0, err } - if p.program.Actions == nil && p.program.End == nil { + if len(p.program.Compiled.Actions) == 0 && len(p.program.Compiled.End) == 0 { return p.exitStatus, nil // only BEGIN specified, don't process input } if err != errExit { @@ -588,7 +598,7 @@ func Exec(source, fieldSep string, input io.Reader, output io.Writer) error { config := &Config{ Stdin: input, Output: output, - Error: ioutil.Discard, + Error: io.Discard, Vars: []string{"FS", fieldSep}, } _, err = ExecProgram(prog, config) @@ -662,11 +672,15 @@ lineLoop: // Execute the body statements err := p.execute(action.Body) - if err == errNext { + switch { + case err == errNext: // "next" statement skips straight to next line continue lineLoop - } - if err != nil { + case err == errNextfile: + // Tell nextLine to move on to next file + p.scanner = nil + continue lineLoop + case err != nil: return err } } @@ -723,7 +737,7 @@ func (p *interp) setVarByName(name, value string) error { if index > 0 { return p.setSpecial(index, numStr(value)) } - index, ok := p.program.Scalars[name] + index, ok := p.scalarIndexes[name] if ok { p.globals[index] = numStr(value) return nil @@ -764,7 +778,11 @@ func (p *interp) setSpecial(index int, v value) error { case ast.V_FNR: p.fileLineNum = int(v.num()) case ast.V_ARGC: - p.argc = int(v.num()) + argc := int(v.num()) + if argc > maxFieldIndex { + return newError("ARGC set too large: %d", argc) + } + p.argc = argc case ast.V_CONVFMT: p.convertFormat = p.toString(v) case ast.V_FILENAME: @@ -833,8 +851,8 @@ func (p *interp) setSpecial(index int, v value) error { // Determine the index of given array into the p.arrays slice. Global // arrays are just at p.arrays[index], local arrays have to be looked // up indirectly. -func (p *interp) arrayIndex(scope ast.VarScope, index int) int { - if scope == ast.ScopeGlobal { +func (p *interp) arrayIndex(scope resolver.Scope, index int) int { + if scope == resolver.Global { return index } else { return p.localArrays[len(p.localArrays)-1][index] @@ -842,7 +860,7 @@ func (p *interp) arrayIndex(scope ast.VarScope, index int) int { } // Return array with given scope and index. -func (p *interp) array(scope ast.VarScope, index int) map[string]value { +func (p *interp) array(scope resolver.Scope, index int) map[string]value { return p.arrays[p.arrayIndex(scope, index)] } @@ -852,7 +870,7 @@ func (p *interp) localArray(index int) map[string]value { } // Set a value in given array by key (index) -func (p *interp) setArrayValue(scope ast.VarScope, arrayIndex int, index string, v value) { +func (p *interp) setArrayValue(scope resolver.Scope, arrayIndex int, index string, v value) { array := p.array(scope, arrayIndex) array[index] = v } diff --git a/play/vendor/github.com/benhoyt/goawk/interp/io.go b/play/vendor/github.com/benhoyt/goawk/interp/io.go index ec23c21..2652ae5 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/io.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/io.go @@ -8,7 +8,6 @@ import ( "encoding/csv" "fmt" "io" - "io/ioutil" "os" "os/exec" "regexp" @@ -17,7 +16,7 @@ import ( "strings" "unicode/utf8" - "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/resolver" . "github.com/benhoyt/goawk/lexer" ) @@ -97,26 +96,6 @@ func (p *interp) writeCSV(output io.Writer, fields []string) error { return nil } -// Implement a buffered version of WriteCloser so output is buffered -// when redirecting to a file (eg: print >"out") -type bufferedWriteCloser struct { - *bufio.Writer - io.Closer -} - -func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser { - writer := bufio.NewWriterSize(w, outputBufSize) - return &bufferedWriteCloser{writer, w} -} - -func (wc *bufferedWriteCloser) Close() error { - err := wc.Writer.Flush() - if err != nil { - return err - } - return wc.Closer.Close() -} - // Determine the output stream for given redirect token and // destination (file or pipe name) func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) { @@ -145,13 +124,13 @@ func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, er } else { flags |= os.O_APPEND } - w, err := os.OpenFile(name, flags, 0644) + f, err := os.OpenFile(name, flags, 0644) if err != nil { return nil, newError("output redirection error: %s", err) } - buffered := newBufferedWriteCloser(w) - p.outputStreams[name] = buffered - return buffered, nil + out := newOutFileStream(f, outputBufSize) + p.outputStreams[name] = out + return out, nil case PIPE: // Pipe to command @@ -159,22 +138,16 @@ func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, er return nil, newError("can't write to pipe due to NoExec") } cmd := p.execShell(name) - w, err := cmd.StdinPipe() - if err != nil { - return nil, newError("error connecting to stdin pipe: %v", err) - } cmd.Stdout = p.output cmd.Stderr = p.errorOutput p.flushOutputAndError() // ensure synchronization - err = cmd.Start() + out, err := newOutCmdStream(cmd) if err != nil { p.printErrorf("%s\n", err) - return ioutil.Discard, nil + out = newOutNullStream() } - p.commands[name] = cmd - buffered := newBufferedWriteCloser(w) - p.outputStreams[name] = buffered - return buffered, nil + p.outputStreams[name] = out + return out, nil default: // Should never happen @@ -214,13 +187,14 @@ func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) { if p.noFileReads { return nil, newError("can't read from file due to NoFileReads") } - r, err := os.Open(name) + f, err := os.Open(name) if err != nil { return nil, err // *os.PathError is handled by caller (getline returns -1) } - scanner := p.newScanner(r, make([]byte, inputBufSize)) + in := newInFileStream(f) + scanner := p.newScanner(in, make([]byte, inputBufSize)) p.scanners[name] = scanner - p.inputStreams[name] = r + p.inputStreams[name] = in return scanner, nil } @@ -238,19 +212,15 @@ func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) { cmd := p.execShell(name) cmd.Stdin = p.stdin cmd.Stderr = p.errorOutput - r, err := cmd.StdoutPipe() - if err != nil { - return nil, newError("error connecting to stdout pipe: %v", err) - } p.flushOutputAndError() // ensure synchronization - err = cmd.Start() + in, err := newInCmdStream(cmd) if err != nil { p.printErrorf("%s\n", err) return bufio.NewScanner(strings.NewReader("")), nil } - scanner := p.newScanner(r, make([]byte, inputBufSize)) - p.commands[name] = cmd - p.inputStreams[name] = r + + scanner := p.newScanner(in, make([]byte, inputBufSize)) + p.inputStreams[name] = in p.scanners[name] = scanner return scanner, nil } @@ -294,7 +264,7 @@ func (p *interp) setFieldNames(names []string) { p.fieldIndexes = nil // clear name-to-index cache // Populate FIELDS array (mapping of field indexes to field names). - fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"]) + fieldsArray := p.array(resolver.Global, p.arrayIndexes["FIELDS"]) for k := range fieldsArray { delete(fieldsArray, k) } @@ -648,6 +618,24 @@ func (p *interp) setLine(line string, isTrueStr bool) { p.reparseCSV = true } +// Splits on FS as a regex, appending each field to fields and returning the +// new slice (for efficiency). +func (p *interp) splitOnFieldSepRegex(fields []string, line string) []string { + indices := p.fieldSepRegex.FindAllStringIndex(line, -1) + prevIndex := 0 + for _, match := range indices { + start, end := match[0], match[1] + // skip empty matches (https://www.austingroupbugs.net/view.php?id=1468) + if start == end { + continue + } + fields = append(fields, line[prevIndex:start]) + prevIndex = end + } + fields = append(fields, line[prevIndex:]) + return fields +} + // Ensure that the current line is parsed into fields, splitting it // into fields if it hasn't been already func (p *interp) ensureFields() { @@ -684,7 +672,7 @@ func (p *interp) ensureFields() { p.fields = strings.Split(p.line, p.fieldSep) default: // Split on FS as a regex - p.fields = p.fieldSepRegex.Split(p.line, -1) + p.fields = p.splitOnFieldSepRegex(p.fields[:0], p.line) } // Special case for when RS=="" and FS is single character, @@ -732,8 +720,8 @@ func (p *interp) nextLine() (string, error) { // getArrayValue() here as it would set the value if // not present index := strconv.Itoa(p.filenameIndex) - argvIndex := p.program.Arrays["ARGV"] - argvArray := p.array(ast.ScopeGlobal, argvIndex) + argvIndex := p.arrayIndexes["ARGV"] + argvArray := p.array(resolver.Global, argvIndex) filename := p.toString(argvArray[index]) p.filenameIndex++ @@ -814,7 +802,7 @@ func writeOutput(w io.Writer, s string) error { return err } -// Close all streams, commands, and so on (after program execution). +// Close all streams and so on (after program execution). func (p *interp) closeAll() { if prevInput, ok := p.input.(io.Closer); ok { _ = prevInput.Close() @@ -825,9 +813,6 @@ func (p *interp) closeAll() { for _, w := range p.outputStreams { _ = w.Close() } - for _, cmd := range p.commands { - _ = cmd.Wait() - } if f, ok := p.output.(flusher); ok { _ = f.Flush() } @@ -841,11 +826,12 @@ func (p *interp) closeAll() { func (p *interp) flushAll() bool { allGood := true for name, writer := range p.outputStreams { - allGood = allGood && p.flushWriter(name, writer) + if !p.flushWriter(name, writer) { + allGood = false + } } - if _, ok := p.output.(flusher); ok { - // User-provided output may or may not be flushable - allGood = allGood && p.flushWriter("stdout", p.output) + if !p.flushWriter("stdout", p.output) { + allGood = false } return allGood } diff --git a/play/vendor/github.com/benhoyt/goawk/interp/iostream.go b/play/vendor/github.com/benhoyt/goawk/interp/iostream.go new file mode 100644 index 0000000..572cdf6 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/interp/iostream.go @@ -0,0 +1,224 @@ +package interp + +// I/O streams are interfaces which allow file redirects and command pipelines to be treated +// equivalently. + +import ( + "bufio" + "errors" + "io" + "os/exec" + "syscall" +) + +const ( + notClosedExitCode = -127 +) + +var ( + doubleCloseError = errors.New("close: stream already closed") +) + +// firstError returns the first non-nil error or nil if all errors are nil. +func firstError(errs ...error) error { + for _, err := range errs { + if err != nil { + return err + } + } + return nil +} + +// Close the cmd and convert the error result into the result returned from goawk builtin functions. +// A nil error is returned if that error describes a non-zero exit status or an unhandled signal. +// Any other type of error returns -1 and err. +// +// The result mimicks gawk for expected child process errors: +// 1. Returns the exit status of the child process and nil error on normal process exit. +// 2. Returns 256 + signal on unhandled signal exit. +// 3. Returns 512 + signal on unhandled signal exit which caused a core dump. +func waitExitCode(cmd *exec.Cmd) (int, error) { + err := cmd.Wait() + if err == nil { + return 0, nil + } + ee, ok := err.(*exec.ExitError) + if !ok { + // Wait() returned an io error. + return -1, err + } + status, ok := ee.ProcessState.Sys().(syscall.WaitStatus) + if !ok { + // Maybe not all platforms support WaitStatus? + return -1, err + } + switch { + case status.CoreDump(): + return 512 + int(status.Signal()), nil + case status.Signaled(): + return 256 + int(status.Signal()), nil + case status.Exited(): + return status.ExitStatus(), nil + default: + return -1, err + } +} + +type inputStream interface { + io.ReadCloser + ExitCode() int +} + +type outputStream interface { + io.WriteCloser + Flush() error + ExitCode() int +} + +type outFileStream struct { + *bufio.Writer + closer io.Closer + exitCode int + closed bool +} + +func newOutFileStream(wc io.WriteCloser, size int) outputStream { + b := bufio.NewWriterSize(wc, size) + return &outFileStream{b, wc, notClosedExitCode, false} +} + +func (s *outFileStream) Close() error { + if s.closed { + return doubleCloseError + } + s.closed = true + flushErr := s.Writer.Flush() + closeErr := s.closer.Close() + if err := firstError(flushErr, closeErr); err != nil { + s.exitCode = -1 + return err + } + s.exitCode = 0 + return nil +} + +func (s *outFileStream) ExitCode() int { + return s.exitCode +} + +type outCmdStream struct { + *bufio.Writer + closer io.Closer + cmd *exec.Cmd + exitCode int + closed bool +} + +func newOutCmdStream(cmd *exec.Cmd) (outputStream, error) { + w, err := cmd.StdinPipe() + if err != nil { + return nil, newError("error connecting to stdin pipe: %v", err) + } + err = cmd.Start() + if err != nil { + w.Close() + return nil, err + } + out := &outCmdStream{bufio.NewWriterSize(w, outputBufSize), w, cmd, notClosedExitCode, false} + return out, nil +} + +func (s *outCmdStream) Close() error { + if s.closed { + return doubleCloseError + } + s.closed = true + flushErr := s.Writer.Flush() + closeErr := s.closer.Close() + var waitErr error + s.exitCode, waitErr = waitExitCode(s.cmd) + return firstError(waitErr, flushErr, closeErr) +} + +func (s *outCmdStream) ExitCode() int { + return s.exitCode +} + +// An outNullStream allows writes to not do anything while fulfilling the outputStream interface. +type outNullStream struct { + io.Writer + closed bool +} + +func newOutNullStream() outputStream { return &outNullStream{io.Discard, false} } +func (s outNullStream) Flush() error { return nil } +func (s *outNullStream) Close() error { + if s.closed { + return doubleCloseError + } + s.closed = true + return nil +} +func (s outNullStream) ExitCode() int { return -1 } + +type inFileStream struct { + io.ReadCloser + exitCode int + closed bool +} + +func newInFileStream(rc io.ReadCloser) inputStream { + return &inFileStream{rc, notClosedExitCode, false} +} + +func (s *inFileStream) Close() error { + if s.closed { + return doubleCloseError + } + s.closed = true + if err := s.ReadCloser.Close(); err != nil { + s.exitCode = -1 + return err + } + s.exitCode = 0 + return nil +} + +func (s *inFileStream) ExitCode() int { + return s.exitCode +} + +type inCmdStream struct { + io.ReadCloser + cmd *exec.Cmd + exitCode int + closed bool +} + +func newInCmdStream(cmd *exec.Cmd) (inputStream, error) { + r, err := cmd.StdoutPipe() + if err != nil { + return nil, newError("error connecting to stdout pipe: %v", err) + } + err = cmd.Start() + if err != nil { + r.Close() + return nil, err + } + return &inCmdStream{r, cmd, notClosedExitCode, false}, nil +} + +func (s *inCmdStream) Close() error { + if s.closed { + return doubleCloseError + } + s.closed = true + closeErr := s.ReadCloser.Close() + var waitErr error + s.exitCode, waitErr = waitExitCode(s.cmd) + return firstError(waitErr, closeErr) +} + +func (s *inCmdStream) ExitCode() int { + return s.exitCode +} diff --git a/play/vendor/github.com/benhoyt/goawk/interp/newexecute.go b/play/vendor/github.com/benhoyt/goawk/interp/newexecute.go index 438fe6d..f1b9ed6 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/newexecute.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/newexecute.go @@ -6,6 +6,7 @@ import ( "context" "math" + "github.com/benhoyt/goawk/internal/resolver" "github.com/benhoyt/goawk/parser" ) @@ -60,6 +61,29 @@ func (p *Interpreter) Execute(config *Config) (int, error) { return p.interp.executeAll() } +// Array returns a map representing the items in the named AWK array. AWK +// numbers are included as type float64, strings (including "numeric strings") +// are included as type string. If the named array does not exist, return nil. +func (p *Interpreter) Array(name string) map[string]interface{} { + index, exists := p.interp.arrayIndexes[name] + if !exists { + return nil + } + array := p.interp.array(resolver.Global, index) + result := make(map[string]interface{}, len(array)) + for k, v := range array { + switch v.typ { + case typeNum: + result[k] = v.n + case typeStr, typeNumStr: + result[k] = v.s + default: + result[k] = "" + } + } + return result +} + func (p *interp) resetCore() { p.scanner = nil for k := range p.scanners { @@ -72,9 +96,6 @@ func (p *interp) resetCore() { for k := range p.outputStreams { delete(p.outputStreams, k) } - for k := range p.commands { - delete(p.commands, k) - } p.sp = 0 p.localArrays = p.localArrays[:0] diff --git a/play/vendor/github.com/benhoyt/goawk/interp/vm.go b/play/vendor/github.com/benhoyt/goawk/interp/vm.go index 49b07f5..e531afb 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/vm.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/vm.go @@ -6,12 +6,11 @@ import ( "io" "math" "os" - "os/exec" "strings" "time" - "github.com/benhoyt/goawk/internal/ast" "github.com/benhoyt/goawk/internal/compiler" + "github.com/benhoyt/goawk/internal/resolver" "github.com/benhoyt/goawk/lexer" ) @@ -61,6 +60,11 @@ func (p *interp) execute(code []compiler.Opcode) error { l, r := p.peekTwo() p.replaceTwo(r, l) + case compiler.Rote: + s := p.peekSlice(3) + v0, v1, v2 := s[0], s[1], s[2] + s[0], s[1], s[2] = v1, v2, v0 + case compiler.Field: index := p.peekTop() v := p.getField(int(index.num())) @@ -180,7 +184,7 @@ func (p *interp) execute(code []compiler.Opcode) error { arrayScope := code[ip] arrayIndex := code[ip+1] ip += 2 - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + array := p.array(resolver.Scope(arrayScope), int(arrayIndex)) index := p.toString(p.pop()) delete(array, index) @@ -188,7 +192,7 @@ func (p *interp) execute(code []compiler.Opcode) error { arrayScope := code[ip] arrayIndex := code[ip+1] ip += 2 - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + array := p.array(resolver.Scope(arrayScope), int(arrayIndex)) for k := range array { delete(array, k) } @@ -587,11 +591,17 @@ func (p *interp) execute(code []compiler.Opcode) error { case compiler.Next: return errNext + case compiler.Nextfile: + return errNextfile + case compiler.Exit: - p.exitStatus = int(p.pop().num()) // Return special errExit value "caught" by top-level executor return errExit + case compiler.ExitStatus: + p.exitStatus = int(p.pop().num()) + return errExit + case compiler.ForIn: varScope := code[ip] varIndex := code[ip+1] @@ -599,15 +609,15 @@ func (p *interp) execute(code []compiler.Opcode) error { arrayIndex := code[ip+3] offset := code[ip+4] ip += 5 - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + array := p.array(resolver.Scope(arrayScope), int(arrayIndex)) loopCode := code[ip : ip+int(offset)] for index := range array { - switch ast.VarScope(varScope) { - case ast.ScopeGlobal: + switch resolver.Scope(varScope) { + case resolver.Global: p.globals[varIndex] = str(index) - case ast.ScopeLocal: + case resolver.Local: p.frame[varIndex] = str(index) - default: // ScopeSpecial + default: // resolver.Special err := p.setSpecial(int(varIndex), str(index)) if err != nil { return err @@ -634,12 +644,19 @@ func (p *interp) execute(code []compiler.Opcode) error { return err } + case compiler.CallLengthArray: + arrayScope := code[ip] + arrayIndex := code[ip+1] + ip += 2 + array := p.array(resolver.Scope(arrayScope), int(arrayIndex)) + p.push(num(float64(len(array)))) + case compiler.CallSplit: arrayScope := code[ip] arrayIndex := code[ip+1] ip += 2 s := p.toString(p.peekTop()) - n, err := p.split(s, ast.VarScope(arrayScope), int(arrayIndex), p.fieldSep) + n, err := p.split(s, resolver.Scope(arrayScope), int(arrayIndex), p.fieldSep, p.inputMode) if err != nil { return err } @@ -650,7 +667,8 @@ func (p *interp) execute(code []compiler.Opcode) error { arrayIndex := code[ip+1] ip += 2 s, fieldSep := p.peekPop() - n, err := p.split(p.toString(s), ast.VarScope(arrayScope), int(arrayIndex), p.toString(fieldSep)) + // 3-argument form of split() ignores input mode + n, err := p.split(p.toString(s), resolver.Scope(arrayScope), int(arrayIndex), p.toString(fieldSep), DefaultMode) if err != nil { return err } @@ -683,7 +701,7 @@ func (p *interp) execute(code []compiler.Opcode) error { // Handle array arguments var arrays []int for j := 0; j < numArrayArgs; j++ { - arrayScope := ast.VarScope(code[ip]) + arrayScope := resolver.Scope(code[ip]) arrayIndex := int(code[ip+1]) ip += 2 arrays = append(arrays, p.arrayIndex(arrayScope, arrayIndex)) @@ -880,7 +898,7 @@ func (p *interp) execute(code []compiler.Opcode) error { } index := p.toString(p.peekTop()) if ret == 1 { - array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + array := p.array(resolver.Scope(arrayScope), int(arrayIndex)) array[index] = numStr(line) } p.replaceTop(num(ret)) @@ -897,33 +915,25 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { p.replaceTop(num(math.Atan2(y.num(), x.num()))) case compiler.BuiltinClose: + var err error + code := -1 name := p.toString(p.peekTop()) - var c io.Closer = p.inputStreams[name] - if c != nil { + if stream := p.inputStreams[name]; stream != nil { // Close input stream delete(p.inputStreams, name) - err := c.Close() - if err != nil { - p.replaceTop(num(-1)) - } else { - p.replaceTop(num(0)) - } - } else { - c = p.outputStreams[name] - if c != nil { - // Close output stream - delete(p.outputStreams, name) - err := c.Close() - if err != nil { - p.replaceTop(num(-1)) - } else { - p.replaceTop(num(0)) - } - } else { - // Nothing to close - p.replaceTop(num(-1)) - } + delete(p.scanners, name) + err = stream.Close() + code = stream.ExitCode() + } else if stream := p.outputStreams[name]; stream != nil { + // Close output stream + delete(p.outputStreams, name) + err = stream.Close() + code = stream.ExitCode() } + if err != nil { + p.printErrorf("error closing %q: %v\n", name, err) + } + p.replaceTop(num(float64(code))) case compiler.BuiltinCos: p.replaceTop(num(math.Cos(p.peekTop().num()))) @@ -1071,20 +1081,21 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { cmd.Stdout = p.output cmd.Stderr = p.errorOutput _ = p.flushAll() // ensure synchronization - err := cmd.Run() - ret := 0.0 + err := cmd.Start() + if err != nil { + // Could not start the shell so skip waiting on it. + p.printErrorf("%v\n", err) + p.replaceTop(num(-1.0)) + return nil + } + exitCode, err := waitExitCode(cmd) if err != nil { if p.checkCtx && p.ctx.Err() != nil { return p.ctx.Err() } - if exitErr, ok := err.(*exec.ExitError); ok { - ret = float64(exitErr.ProcessState.ExitCode()) - } else { - p.printErrorf("%v\n", err) - ret = -1 - } + p.printErrorf("%v\n", err) } - p.replaceTop(num(ret)) + p.replaceTop(num(float64(exitCode))) case compiler.BuiltinTolower: p.replaceTop(str(strings.ToLower(p.toString(p.peekTop())))) diff --git a/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go b/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go index dc3a48d..10545cc 100644 --- a/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go +++ b/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go @@ -9,6 +9,8 @@ package lexer import ( "errors" + "fmt" + "unicode/utf8" ) // Lexer tokenizes a byte string of AWK source code. Use NewLexer to @@ -32,6 +34,11 @@ type Position struct { Column int } +// String returns the position in "line:col" format. +func (p Position) String() string { + return fmt.Sprintf("%d:%d", p.Line, p.Column) +} + // NewLexer creates a new lexer that will tokenize the given source // code. See the module-level example for a working example. func NewLexer(src []byte) *Lexer { @@ -461,7 +468,7 @@ func parseString(quote byte, ch func() byte, next func()) (string, error) { c = '\v' next() case 'x': - // Hex byte of one of two hex digits + // Hex byte of one or two hex digits next() digit := hexDigit(ch()) if digit < 0 { @@ -474,6 +481,29 @@ func parseString(quote byte, ch func() byte, next func()) (string, error) { c = c*16 + byte(digit) next() } + case 'u': + // Hex Unicode character of 1-8 digits + next() + r := hexDigit(ch()) + if r < 0 { + return "", errors.New("1-8 hex digits expected") + } + next() + for i := 0; i < 7; i++ { + digit := hexDigit(ch()) + if digit < 0 { + break + } + next() + r = r*16 + digit + } + if !utf8.ValidRune(rune(r)) { + return "", errors.New("invalid Unicode character") + } + runeBytes := make([]byte, utf8.UTFMax) + n := utf8.EncodeRune(runeBytes, rune(r)) + chars = append(chars, runeBytes[:n]...) + continue case '0', '1', '2', '3', '4', '5', '6', '7': // Octal byte of 1-3 octal digits c = ch() - '0' diff --git a/play/vendor/github.com/benhoyt/goawk/lexer/token.go b/play/vendor/github.com/benhoyt/goawk/lexer/token.go index b3be569..02dae30 100644 --- a/play/vendor/github.com/benhoyt/goawk/lexer/token.go +++ b/play/vendor/github.com/benhoyt/goawk/lexer/token.go @@ -70,6 +70,7 @@ const ( IF IN NEXT + NEXTFILE PRINT PRINTF RETURN @@ -127,6 +128,7 @@ var keywordTokens = map[string]Token{ "if": IF, "in": IN, "next": NEXT, + "nextfile": NEXTFILE, "print": PRINT, "printf": PRINTF, "return": RETURN, @@ -223,6 +225,7 @@ var tokenNames = map[Token]string{ IF: "if", IN: "in", NEXT: "next", + NEXTFILE: "nextfile", PRINT: "print", PRINTF: "printf", RETURN: "return", diff --git a/play/vendor/github.com/benhoyt/goawk/parser/parser.go b/play/vendor/github.com/benhoyt/goawk/parser/parser.go index 491b346..ebd31cb 100644 --- a/play/vendor/github.com/benhoyt/goawk/parser/parser.go +++ b/play/vendor/github.com/benhoyt/goawk/parser/parser.go @@ -13,6 +13,7 @@ import ( "github.com/benhoyt/goawk/internal/ast" "github.com/benhoyt/goawk/internal/compiler" + "github.com/benhoyt/goawk/internal/resolver" . "github.com/benhoyt/goawk/lexer" ) @@ -45,35 +46,49 @@ type ParserConfig struct { Funcs map[string]interface{} } +func (c *ParserConfig) toResolverConfig() *resolver.Config { + if c == nil { + return nil + } + return &resolver.Config{ + DebugTypes: c.DebugTypes, + DebugWriter: c.DebugWriter, + Funcs: c.Funcs, + } +} + // ParseProgram parses an entire AWK program, returning the *Program // abstract syntax tree or a *ParseError on error. "config" describes // the parser configuration (and is allowed to be nil). func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { defer func() { - // The parser uses panic with a *ParseError to signal parsing - // errors internally, and they're caught here. This - // significantly simplifies the recursive descent calls as - // we don't have to check errors everywhere. + // The parser and resolver use panic with an *ast.PositionError to signal parsing + // errors internally, and they're caught here. This significantly simplifies + // the recursive descent calls as we don't have to check errors everywhere. if r := recover(); r != nil { - // Convert to ParseError or re-panic - err = r.(*ParseError) + // Convert to PositionError or re-panic + posError := *r.(*ast.PositionError) + err = &ParseError{ + Position: posError.Position, + Message: posError.Message, + } } }() lexer := NewLexer(src) p := parser{lexer: lexer} - if config != nil { - p.debugTypes = config.DebugTypes - p.debugWriter = config.DebugWriter - p.nativeFuncs = config.Funcs - } - p.initResolve() + p.multiExprs = make(map[*ast.MultiExpr]Position, 3) + p.next() // initialize p.tok // Parse into abstract syntax tree - prog = p.program() + astProg := p.program() + + // Resolve variable scopes and types + prog = &Program{} + prog.ResolvedProgram = *resolver.Resolve(astProg, config.toResolverConfig()) // Compile to virtual machine code - prog.Compiled, err = compiler.Compile(prog.toAST()) + prog.Compiled, err = compiler.Compile(&prog.ResolvedProgram) return prog, err } @@ -83,19 +98,14 @@ type Program struct { // but are exported for the interpreter (Program itself needs to // be exported in package "parser", otherwise these could live in // "internal/ast".) - Begin []ast.Stmts - Actions []ast.Action - End []ast.Stmts - Functions []ast.Function - Scalars map[string]int - Arrays map[string]int - Compiled *compiler.Program + resolver.ResolvedProgram + Compiled *compiler.Program } // String returns an indented, pretty-printed version of the parsed // program. func (p *Program) String() string { - return p.toAST().String() + return p.ResolvedProgram.Program.String() } // Disassemble writes a human-readable form of the program's virtual machine @@ -104,18 +114,6 @@ func (p *Program) Disassemble(writer io.Writer) error { return p.Compiled.Disassemble(writer) } -// toAST converts the *Program to an *ast.Program. -func (p *Program) toAST() *ast.Program { - return &ast.Program{ - Begin: p.Begin, - Actions: p.Actions, - End: p.End, - Functions: p.Functions, - Scalars: p.Scalars, - Arrays: p.Arrays, - } -} - // Parser state type parser struct { // Lexer instance and current token values @@ -131,28 +129,36 @@ type parser struct { loopDepth int // current loop depth (0 if not in any loops) // Variable tracking and resolving - locals map[string]bool // current function's locals (for determining scope) - varTypes map[string]map[string]typeInfo // map of func name to var name to type - varRefs []varRef // all variable references (usually scalars) - arrayRefs []arrayRef // all array references - multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions - - // Function tracking - functions map[string]int // map of function name to index - userCalls []userCall // record calls so we can resolve them later - nativeFuncs map[string]interface{} - - // Configuration and debugging - debugTypes bool // show variable types for debugging - debugWriter io.Writer // where the debug output goes + multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions } // Parse an entire AWK program. -func (p *parser) program() *Program { - prog := &Program{} - p.optionalNewlines() +func (p *parser) program() *ast.Program { + prog := &ast.Program{} + + // Terminator "(SEMICOLON|NEWLINE) NEWLINE*" is required after each item + // with two exceptions where it is optional: + // + // 1. after the last item, or + // 2. when the previous item ended with a closing brace. + // + // NOTE: The second exception does not seem to be correct according to + // the POSIX grammar definition, but it is the common behaviour for the + // major AWK implementations. + needsTerminator := false + for p.tok != EOF { + if needsTerminator { + if !p.matches(NEWLINE, SEMICOLON) { + panic(p.errorf("expected ; or newline between items")) + } + p.next() + needsTerminator = false + } + p.optionalNewlines() switch p.tok { + case EOF: + break case BEGIN: p.next() prog.Begin = append(prog.Begin, p.stmtsBrace()) @@ -161,7 +167,6 @@ func (p *parser) program() *Program { prog.End = append(prog.End, p.stmtsBrace()) case FUNCTION: function := p.function() - p.addFunction(function.Name, len(prog.Functions)) prog.Functions = append(prog.Functions, function) default: p.inAction = true @@ -170,23 +175,22 @@ func (p *parser) program() *Program { if !p.matches(LBRACE, EOF) { pattern = append(pattern, p.expr()) } - if !p.matches(LBRACE, EOF, NEWLINE) { + if !p.matches(LBRACE, EOF, NEWLINE, SEMICOLON) { p.commaNewlines() pattern = append(pattern, p.expr()) } // Or an empty action (equivalent to { print $0 }) - action := ast.Action{pattern, nil} + action := &ast.Action{pattern, nil} if p.tok == LBRACE { action.Stmts = p.stmtsBrace() + } else { + needsTerminator = true } prog.Actions = append(prog.Actions, action) p.inAction = false } - p.optionalNewlines() } - p.resolveUserCalls(prog) - p.resolveVars(prog) p.checkMultiExprs() return prog @@ -213,6 +217,10 @@ func (p *parser) stmtsBrace() ast.Stmts { p.optionalNewlines() ss := []ast.Stmt{} for p.tok != RBRACE && p.tok != EOF { + if p.matches(SEMICOLON, NEWLINE) { + p.next() + continue + } ss = append(ss, p.stmt()) } p.expect(RBRACE) @@ -224,6 +232,7 @@ func (p *parser) stmtsBrace() ast.Stmts { // Parse a "simple" statement (eg: allowed in a for loop init clause). func (p *parser) simpleStmt() ast.Stmt { + startPos := p.pos switch p.tok { case PRINT, PRINTF: op := p.tok @@ -244,17 +253,16 @@ func (p *parser) simpleStmt() ast.Stmt { dest = p.expr() } if op == PRINT { - return &ast.PrintStmt{args, redirect, dest} + return &ast.PrintStmt{args, redirect, dest, startPos, p.pos} } else { if len(args) == 0 { panic(p.errorf("expected printf args, got none")) } - return &ast.PrintfStmt{args, redirect, dest} + return &ast.PrintfStmt{args, redirect, dest, startPos, p.pos} } case DELETE: p.next() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) + name, namePos := p.expectName() var index []ast.Expr if p.tok == LBRACKET { p.next() @@ -264,20 +272,18 @@ func (p *parser) simpleStmt() ast.Stmt { } p.expect(RBRACKET) } - return &ast.DeleteStmt{ref, index} - case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN: + return &ast.DeleteStmt{name, namePos, index, startPos, p.pos} + case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, NEXTFILE, EXIT, RETURN: panic(p.errorf("expected print/printf, delete, or expression")) default: - return &ast.ExprStmt{p.expr()} + return &ast.ExprStmt{p.expr(), startPos, p.pos} } } // Parse any top-level statement. func (p *parser) stmt() ast.Stmt { - for p.matches(SEMICOLON, NEWLINE) { - p.next() - } var s ast.Stmt + startPos := p.pos switch p.tok { case IF: p.next() @@ -285,6 +291,7 @@ func (p *parser) stmt() ast.Stmt { cond := p.expr() p.expect(RPAREN) p.optionalNewlines() + bodyStart := p.pos body := p.stmts() p.optionalNewlines() var elseBody ast.Stmts @@ -293,7 +300,7 @@ func (p *parser) stmt() ast.Stmt { p.optionalNewlines() elseBody = p.stmts() } - s = &ast.IfStmt{cond, body, elseBody} + s = &ast.IfStmt{cond, bodyStart, body, elseBody, startPos, p.pos} case FOR: // Parse for statement, either "for in" or C-like for loop. // @@ -327,8 +334,18 @@ func (p *parser) stmt() ast.Stmt { if !ok { panic(p.errorf("expected 'for (var in array) ...'")) } + bodyStart := p.pos body := p.loopStmts() - s = &ast.ForInStmt{varExpr, inExpr.Array, body} + s = &ast.ForInStmt{ + Var: varExpr.Name, + VarPos: varExpr.Pos, + Array: inExpr.Array, + ArrayPos: inExpr.ArrayPos, + BodyStart: bodyStart, + Body: body, + Start: startPos, + End: p.pos, + } } else { // Match: for ([pre]; [cond]; [post]) body p.expect(SEMICOLON) @@ -345,8 +362,9 @@ func (p *parser) stmt() ast.Stmt { } p.expect(RPAREN) p.optionalNewlines() + bodyStart := p.pos body := p.loopStmts() - s = &ast.ForStmt{pre, cond, post, body} + s = &ast.ForStmt{pre, cond, post, bodyStart, body, startPos, p.pos} } case WHILE: p.next() @@ -354,42 +372,50 @@ func (p *parser) stmt() ast.Stmt { cond := p.expr() p.expect(RPAREN) p.optionalNewlines() + bodyStart := p.pos body := p.loopStmts() - s = &ast.WhileStmt{cond, body} + s = &ast.WhileStmt{cond, bodyStart, body, startPos, p.pos} case DO: p.next() p.optionalNewlines() body := p.loopStmts() + p.optionalNewlines() p.expect(WHILE) p.expect(LPAREN) cond := p.expr() p.expect(RPAREN) - s = &ast.DoWhileStmt{body, cond} + s = &ast.DoWhileStmt{body, cond, startPos, p.pos} case BREAK: if p.loopDepth == 0 { panic(p.errorf("break must be inside a loop body")) } p.next() - s = &ast.BreakStmt{} + s = &ast.BreakStmt{startPos, p.pos} case CONTINUE: if p.loopDepth == 0 { panic(p.errorf("continue must be inside a loop body")) } p.next() - s = &ast.ContinueStmt{} + s = &ast.ContinueStmt{startPos, p.pos} case NEXT: if !p.inAction && p.funcName == "" { panic(p.errorf("next can't be inside BEGIN or END")) } p.next() - s = &ast.NextStmt{} + s = &ast.NextStmt{startPos, p.pos} + case NEXTFILE: + if !p.inAction && p.funcName == "" { + panic(p.errorf("nextfile can't be inside BEGIN or END")) + } + p.next() + s = &ast.NextfileStmt{startPos, p.pos} case EXIT: p.next() var status ast.Expr if !p.matches(NEWLINE, SEMICOLON, RBRACE) { status = p.expr() } - s = &ast.ExitStmt{status} + s = &ast.ExitStmt{status, startPos, p.pos} case RETURN: if p.funcName == "" { panic(p.errorf("return must be inside a function")) @@ -399,10 +425,10 @@ func (p *parser) stmt() ast.Stmt { if !p.matches(NEWLINE, SEMICOLON, RBRACE) { value = p.expr() } - s = &ast.ReturnStmt{value} + s = &ast.ReturnStmt{value, startPos, p.pos} case LBRACE: body := p.stmtsBrace() - s = &ast.BlockStmt{body} + s = &ast.BlockStmt{body, startPos, p.pos} default: s = p.simpleStmt() } @@ -429,22 +455,18 @@ func (p *parser) loopStmts() ast.Stmts { // Parse a function definition and body. As it goes, this resolves // the local variable indexes and tracks which parameters are array // parameters. -func (p *parser) function() ast.Function { +func (p *parser) function() *ast.Function { if p.funcName != "" { // Should never actually get here (FUNCTION token is only // handled at the top level), but just in case. panic(p.errorf("can't nest functions")) } p.next() - name := p.val - if _, ok := p.functions[name]; ok { - panic(p.errorf("function %q already defined", name)) - } - p.expect(NAME) + name, funcNamePos := p.expectName() p.expect(LPAREN) first := true params := make([]string, 0, 7) // pre-allocate some to reduce allocations - p.locals = make(map[string]bool, 7) + locals := make(map[string]bool, 7) for p.tok != RPAREN { if !first { p.commaNewlines() @@ -454,23 +476,24 @@ func (p *parser) function() ast.Function { if param == name { panic(p.errorf("can't use function name as parameter name")) } - if p.locals[param] { + if locals[param] { panic(p.errorf("duplicate parameter name %q", param)) } p.expect(NAME) params = append(params, param) - p.locals[param] = true + locals[param] = true } p.expect(RPAREN) p.optionalNewlines() // Parse the body - p.startFunction(name, params) - body := p.stmtsBrace() - p.stopFunction() - p.locals = nil + p.funcName = name - return ast.Function{name, params, nil, body} + body := p.stmtsBrace() + + p.funcName = "" + + return &ast.Function{name, params, body, funcNamePos} } // Parse expressions separated by commas: args to print[f] or user @@ -520,37 +543,57 @@ func (p *parser) getLine() ast.Expr { // An lvalue is a variable name, an array[expr] index expression, or // an $expr field expression. func (p *parser) _assign(higher func() ast.Expr) ast.Expr { + leftPos := p.pos expr := higher() - _, isNamedField := expr.(*ast.NamedFieldExpr) - if (isNamedField || ast.IsLValue(expr)) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, - MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) { + if p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) { + _, isNamedField := expr.(*ast.NamedFieldExpr) if isNamedField { panic(p.errorf("assigning @ expression not supported")) } op := p.tok p.next() right := p._assign(higher) - switch op { - case ASSIGN: - return &ast.AssignExpr{expr, right} - case ADD_ASSIGN: - op = ADD - case DIV_ASSIGN: - op = DIV - case MOD_ASSIGN: - op = MOD - case MUL_ASSIGN: - op = MUL - case POW_ASSIGN: - op = POW - case SUB_ASSIGN: - op = SUB + if !ast.IsLValue(expr) { + // Partial backtracking to allow expressions like "1 && x=1", + // which isn't really valid, as assignments are lower-precedence + // than binary operators, but onetrueawk, Gawk, and mawk all + // support this for logical, match and comparison operators. See + // issue #166. + binary, isBinary := expr.(*ast.BinaryExpr) + if isBinary && ast.IsLValue(binary.Right) { + switch binary.Op { + case AND, OR, MATCH, NOT_MATCH, EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER: + assign := makeAssign(binary.Right, op, right) + return &ast.BinaryExpr{binary.Left, binary.Op, assign} + } + } + panic(ast.PosErrorf(leftPos, "expected lvalue before %s", op)) } - return &ast.AugAssignExpr{expr, op, right} + return makeAssign(expr, op, right) } return expr } +func makeAssign(left ast.Expr, op Token, right ast.Expr) ast.Expr { + switch op { + case ASSIGN: + return &ast.AssignExpr{left, right} + case ADD_ASSIGN: + op = ADD + case DIV_ASSIGN: + op = DIV + case MOD_ASSIGN: + op = MOD + case MUL_ASSIGN: + op = MUL + case POW_ASSIGN: + op = POW + case SUB_ASSIGN: + op = SUB + } + return &ast.AugAssignExpr{left, op, right} +} + // Parse a ?: conditional expression: // // or [QUESTION NEWLINE* cond COLON NEWLINE* cond] @@ -593,9 +636,8 @@ func (p *parser) _in(higher func() ast.Expr) ast.Expr { expr := higher() for p.tok == IN { p.next() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) - expr = &ast.InExpr{[]ast.Expr{expr}, ref} + name, namePos := p.expectName() + expr = &ast.InExpr{[]ast.Expr{expr}, name, namePos} } return expr } @@ -654,7 +696,7 @@ func (p *parser) mul() ast.Expr { func (p *parser) pow() ast.Expr { // Note that pow (expr ^ expr) is right-associative - expr := p.preIncr() + expr := p.postIncr() if p.tok == POW { p.next() right := p.pow() @@ -663,20 +705,6 @@ func (p *parser) pow() ast.Expr { return expr } -func (p *parser) preIncr() ast.Expr { - if p.tok == INCR || p.tok == DECR { - op := p.tok - p.next() - exprPos := p.pos - expr := p.preIncr() - if !ast.IsLValue(expr) { - panic(p.posErrorf(exprPos, "expected lvalue after ++ or --")) - } - return &ast.IncrExpr{expr, op, true} - } - return p.postIncr() -} - func (p *parser) postIncr() ast.Expr { expr := p.primary() if (p.tok == INCR || p.tok == DECR) && ast.IsLValue(expr) { @@ -698,7 +726,7 @@ func (p *parser) primary() ast.Expr { case STRING: s := p.val p.next() - return &ast.StrExpr{s} + return &ast.StrExpr{Value: s} case DIV, DIV_ASSIGN: // If we get to DIV or DIV_ASSIGN as a primary expression, // it's actually a regex. @@ -706,7 +734,16 @@ func (p *parser) primary() ast.Expr { return &ast.RegExpr{regex} case DOLLAR: p.next() - return &ast.FieldExpr{p.primary()} + var expr ast.Expr = &ast.FieldExpr{p.primary()} + // Post-increment operators have lower precedence than primary + // expressions by default, except for field expressions with + // post-increments (e.g., $$1++ = $($1++), NOT $($1)++). + if p.tok == INCR || p.tok == DECR { + op := p.tok + p.next() + expr = &ast.IncrExpr{expr, op, false} + } + return expr case AT: p.next() return &ast.NamedFieldExpr{p.primary()} @@ -714,10 +751,17 @@ func (p *parser) primary() ast.Expr { op := p.tok p.next() return &ast.UnaryExpr{op, p.pow()} - case NAME: - name := p.val - namePos := p.pos + case INCR, DECR: + op := p.tok p.next() + exprPos := p.pos + expr := p.optionalLValue() + if expr == nil { + panic(ast.PosErrorf(exprPos, "expected lvalue after %s", op)) + } + return &ast.IncrExpr{expr, op, true} + case NAME: + name, namePos := p.expectName() if p.tok == LBRACKET { // a[x] or a[x, y] array index expression p.next() @@ -726,17 +770,14 @@ func (p *parser) primary() ast.Expr { panic(p.errorf("expected expression instead of ]")) } p.expect(RBRACKET) - return &ast.IndexExpr{p.arrayRef(name, namePos), index} + return &ast.IndexExpr{name, namePos, index} } else if p.tok == LPAREN && !p.lexer.HadSpace() { - if p.locals[name] { - panic(p.errorf("can't call local variable %q as function", name)) - } // Grammar requires no space between function name and // left paren for user function calls, hence the funky // lexer.HadSpace() method. return p.userCall(name, namePos) } - return p.varRef(name, namePos) + return &ast.VarExpr{name, namePos} case LPAREN: parenPos := p.pos p.next() @@ -746,15 +787,14 @@ func (p *parser) primary() ast.Expr { panic(p.errorf("expected expression, not %s", p.tok)) case 1: p.expect(RPAREN) - return exprs[0] + return &ast.GroupingExpr{exprs[0]} default: // Multi-dimensional array "in" requires parens around index p.expect(RPAREN) if p.tok == IN { p.next() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) - return &ast.InExpr{exprs, ref} + name, namePos := p.expectName() + return &ast.InExpr{exprs, name, namePos} } // MultiExpr is used as a pseudo-expression for print[f] parsing. return p.multiExpr(exprs, parenPos) @@ -785,7 +825,7 @@ func (p *parser) primary() ast.Expr { inPos := p.pos in := p.expr() if !ast.IsLValue(in) { - panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) + panic(ast.PosErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) } args = append(args, in) } @@ -796,9 +836,8 @@ func (p *parser) primary() ast.Expr { p.expect(LPAREN) str := p.expr() p.commaNewlines() - ref := p.arrayRef(p.val, p.pos) - p.expect(NAME) - args := []ast.Expr{str, ref} + name, namePos := p.expectName() + args := []ast.Expr{str, &ast.VarExpr{name, namePos}} if p.tok == COMMA { p.commaNewlines() args = append(args, p.regexStr(p.expr)) @@ -902,9 +941,7 @@ func (p *parser) optionalLValue() ast.Expr { // User function call, e.g., foo() not lvalue. return nil } - name := p.val - namePos := p.pos - p.next() + name, namePos := p.expectName() if p.tok == LBRACKET { // a[x] or a[x, y] array index expression p.next() @@ -913,9 +950,9 @@ func (p *parser) optionalLValue() ast.Expr { panic(p.errorf("expected expression instead of ]")) } p.expect(RBRACKET) - return &ast.IndexExpr{p.arrayRef(name, namePos), index} + return &ast.IndexExpr{name, namePos, index} } - return p.varRef(name, namePos) + return &ast.VarExpr{name, namePos} case DOLLAR: p.next() return &ast.FieldExpr{p.primary()} @@ -930,7 +967,7 @@ func (p *parser) optionalLValue() ast.Expr { func (p *parser) regexStr(parse func() ast.Expr) ast.Expr { if p.matches(DIV, DIV_ASSIGN) { regex := p.nextRegex() - return &ast.StrExpr{regex} + return &ast.StrExpr{Value: regex, Regex: true} } return parse() } @@ -987,7 +1024,7 @@ func (p *parser) nextRegex() string { panic(p.errorf("%s", p.val)) } regex := p.val - _, err := regexp.Compile(regex) + _, err := regexp.Compile(compiler.AddRegexFlags(regex)) if err != nil { panic(p.errorf("%v", err)) } @@ -1003,6 +1040,13 @@ func (p *parser) expect(tok Token) { p.next() } +// Ensure current token is a name, parse it, and return name and position. +func (p *parser) expectName() (string, Position) { + name, pos := p.val, p.pos + p.expect(NAME) + return name, pos +} + // Return true iff current token matches one of the given operators, // but don't parse next token. func (p *parser) matches(operators ...Token) bool { @@ -1017,13 +1061,7 @@ func (p *parser) matches(operators ...Token) bool { // Format given string and args with Sprintf and return *ParseError // with that message and the current position. func (p *parser) errorf(format string, args ...interface{}) error { - return p.posErrorf(p.pos, format, args...) -} - -// Like errorf, but with an explicit position. -func (p *parser) posErrorf(pos Position, format string, args ...interface{}) error { - message := fmt.Sprintf(format, args...) - return &ParseError{pos, message} + return ast.PosErrorf(p.pos, format, args...) } // Parse call to a user-defined function (and record call site for @@ -1037,12 +1075,37 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { p.commaNewlines() } arg := p.expr() - p.processUserCallArg(name, arg, i) args = append(args, arg) i++ } p.expect(RPAREN) - call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later - p.recordUserCall(call, pos) - return call + return &ast.UserCallExpr{name, args, pos} +} + +// Record a "multi expression" (comma-separated pseudo-expression +// used to allow commas around print/printf arguments). +func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { + expr := &ast.MultiExpr{exprs} + p.multiExprs[expr] = pos + return expr +} + +// Mark the multi expression as used (by a print/printf statement). +func (p *parser) useMultiExpr(expr *ast.MultiExpr) { + delete(p.multiExprs, expr) +} + +// Check that there are no unused multi expressions (syntax error). +func (p *parser) checkMultiExprs() { + if len(p.multiExprs) == 0 { + return + } + // Show error on first comma-separated expression + min := Position{1000000000, 1000000000} + for _, pos := range p.multiExprs { + if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column { + min = pos + } + } + panic(ast.PosErrorf(min, "unexpected comma-separated expression")) } diff --git a/play/vendor/github.com/benhoyt/goawk/parser/resolve.go b/play/vendor/github.com/benhoyt/goawk/parser/resolve.go deleted file mode 100644 index a2ed08d..0000000 --- a/play/vendor/github.com/benhoyt/goawk/parser/resolve.go +++ /dev/null @@ -1,462 +0,0 @@ -// Resolve function calls and variable types - -package parser - -import ( - "fmt" - "reflect" - "sort" - - "github.com/benhoyt/goawk/internal/ast" - . "github.com/benhoyt/goawk/lexer" -) - -type varType int - -const ( - typeUnknown varType = iota - typeScalar - typeArray -) - -func (t varType) String() string { - switch t { - case typeScalar: - return "Scalar" - case typeArray: - return "Array" - default: - return "Unknown" - } -} - -// typeInfo records type information for a single variable -type typeInfo struct { - typ varType - ref *ast.VarExpr - scope ast.VarScope - index int - callName string - argIndex int -} - -// Used by printVarTypes when debugTypes is turned on -func (t typeInfo) String() string { - var scope string - switch t.scope { - case ast.ScopeGlobal: - scope = "Global" - case ast.ScopeLocal: - scope = "Local" - default: - scope = "Special" - } - return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d", - t.typ, t.ref, scope, t.index, t.callName, t.argIndex) -} - -// A single variable reference (normally scalar) -type varRef struct { - funcName string - ref *ast.VarExpr - isArg bool - pos Position -} - -// A single array reference -type arrayRef struct { - funcName string - ref *ast.ArrayExpr - pos Position -} - -// Initialize the resolver -func (p *parser) initResolve() { - p.varTypes = make(map[string]map[string]typeInfo) - p.varTypes[""] = make(map[string]typeInfo) // globals - p.functions = make(map[string]int) - p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present - p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays - p.arrayRef("FIELDS", Position{1, 1}) - p.multiExprs = make(map[*ast.MultiExpr]Position, 3) -} - -// Signal the start of a function -func (p *parser) startFunction(name string, params []string) { - p.funcName = name - p.varTypes[name] = make(map[string]typeInfo) -} - -// Signal the end of a function -func (p *parser) stopFunction() { - p.funcName = "" -} - -// Add function by name with given index -func (p *parser) addFunction(name string, index int) { - p.functions[name] = index -} - -// Records a call to a user function (for resolving indexes later) -type userCall struct { - call *ast.UserCallExpr - pos Position - inFunc string -} - -// Record a user call site -func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) { - p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName}) -} - -// After parsing, resolve all user calls to their indexes. Also -// ensures functions called have actually been defined, and that -// they're not being called with too many arguments. -func (p *parser) resolveUserCalls(prog *Program) { - // Number the native funcs (order by name to get consistent order) - nativeNames := make([]string, 0, len(p.nativeFuncs)) - for name := range p.nativeFuncs { - nativeNames = append(nativeNames, name) - } - sort.Strings(nativeNames) - nativeIndexes := make(map[string]int, len(nativeNames)) - for i, name := range nativeNames { - nativeIndexes[name] = i - } - - for _, c := range p.userCalls { - // AWK-defined functions take precedence over native Go funcs - index, ok := p.functions[c.call.Name] - if !ok { - f, haveNative := p.nativeFuncs[c.call.Name] - if !haveNative { - panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name)) - } - typ := reflect.TypeOf(f) - if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() { - panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) - } - c.call.Native = true - c.call.Index = nativeIndexes[c.call.Name] - continue - } - function := prog.Functions[index] - if len(c.call.Args) > len(function.Params) { - panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) - } - c.call.Index = index - } -} - -// For arguments that are variable references, we don't know the -// type based on context, so mark the types for these as unknown. -func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) { - if varExpr, ok := arg.(*ast.VarExpr); ok { - scope, varFuncName := p.getScope(varExpr.Name) - ref := p.varTypes[varFuncName][varExpr.Name].ref - if ref == varExpr { - // Only applies if this is the first reference to this - // variable (otherwise we know the type already) - p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index} - } - // Mark the last related varRef (the most recent one) as a - // call argument for later error handling - p.varRefs[len(p.varRefs)-1].isArg = true - } -} - -// Determine scope of given variable reference (and funcName if it's -// a local, otherwise empty string) -func (p *parser) getScope(name string) (ast.VarScope, string) { - switch { - case p.locals[name]: - return ast.ScopeLocal, p.funcName - case ast.SpecialVarIndex(name) > 0: - return ast.ScopeSpecial, "" - default: - return ast.ScopeGlobal, "" - } -} - -// Record a variable (scalar) reference and return the *VarExpr (but -// VarExpr.Index won't be set till later) -func (p *parser) varRef(name string, pos Position) *ast.VarExpr { - scope, funcName := p.getScope(name) - expr := &ast.VarExpr{scope, 0, name} - p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos}) - info := p.varTypes[funcName][name] - if info.typ == typeUnknown { - p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0} - } - return expr -} - -// Record an array reference and return the *ArrayExpr (but -// ArrayExpr.Index won't be set till later) -func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr { - scope, funcName := p.getScope(name) - if scope == ast.ScopeSpecial { - panic(p.errorf("can't use scalar %q as array", name)) - } - expr := &ast.ArrayExpr{scope, 0, name} - p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos}) - info := p.varTypes[funcName][name] - if info.typ == typeUnknown { - p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0} - } - return expr -} - -// Print variable type information (for debugging) on p.debugWriter -func (p *parser) printVarTypes(prog *Program) { - fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars) - fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays) - funcNames := []string{} - for funcName := range p.varTypes { - funcNames = append(funcNames, funcName) - } - sort.Strings(funcNames) - for _, funcName := range funcNames { - if funcName != "" { - fmt.Fprintf(p.debugWriter, "function %s\n", funcName) - } else { - fmt.Fprintf(p.debugWriter, "globals\n") - } - varNames := []string{} - for name := range p.varTypes[funcName] { - varNames = append(varNames, name) - } - sort.Strings(varNames) - for _, name := range varNames { - info := p.varTypes[funcName][name] - fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info) - } - } -} - -// Resolve unknown variables types and generate variable indexes and -// name-to-index mappings for interpreter -func (p *parser) resolveVars(prog *Program) { - // First go through all unknown types and try to determine the - // type from the parameter type in that function definition. - // Iterate through functions in topological order, for example - // if f() calls g(), process g first, then f. - callGraph := make(map[string]map[string]struct{}) - for _, call := range p.userCalls { - if _, ok := callGraph[call.inFunc]; !ok { - callGraph[call.inFunc] = make(map[string]struct{}) - } - callGraph[call.inFunc][call.call.Name] = struct{}{} - } - sortedFuncs := topoSort(callGraph) - for _, funcName := range sortedFuncs { - infos := p.varTypes[funcName] - for name, info := range infos { - if info.scope == ast.ScopeSpecial || info.typ != typeUnknown { - // It's a special var or type is already known - continue - } - funcIndex, ok := p.functions[info.callName] - if !ok { - // Function being called is a native function - continue - } - // Determine var type based on type of this parameter - // in the called function (if we know that) - paramName := prog.Functions[funcIndex].Params[info.argIndex] - typ := p.varTypes[info.callName][paramName].typ - if typ != typeUnknown { - if p.debugTypes { - fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n", - funcName, name, typ) - } - info.typ = typ - p.varTypes[funcName][name] = info - } - } - } - - // Resolve global variables (iteration order is undefined, so - // assign indexes basically randomly) - prog.Scalars = make(map[string]int) - prog.Arrays = make(map[string]int) - for name, info := range p.varTypes[""] { - _, isFunc := p.functions[name] - if isFunc { - // Global var can't also be the name of a function - panic(p.errorf("global var %q can't also be a function", name)) - } - var index int - if info.scope == ast.ScopeSpecial { - index = ast.SpecialVarIndex(name) - } else if info.typ == typeArray { - index = len(prog.Arrays) - prog.Arrays[name] = index - } else { - index = len(prog.Scalars) - prog.Scalars[name] = index - } - info.index = index - p.varTypes[""][name] = info - } - - // Fill in unknown parameter types that are being called with arrays, - // for example, as in the following code: - // - // BEGIN { arr[0]; f(arr) } - // function f(a) { } - for _, c := range p.userCalls { - if c.call.Native { - continue - } - function := prog.Functions[c.call.Index] - for i, arg := range c.call.Args { - varExpr, ok := arg.(*ast.VarExpr) - if !ok { - continue - } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - argType := p.varTypes[funcName][varExpr.Name] - paramType := p.varTypes[function.Name][function.Params[i]] - if argType.typ == typeArray && paramType.typ == typeUnknown { - paramType.typ = argType.typ - p.varTypes[function.Name][function.Params[i]] = paramType - } - } - } - - // Resolve local variables (assign indexes in order of params). - // Also patch up Function.Arrays (tells interpreter which args - // are arrays). - for funcName, infos := range p.varTypes { - if funcName == "" { - continue - } - scalarIndex := 0 - arrayIndex := 0 - functionIndex := p.functions[funcName] - function := prog.Functions[functionIndex] - arrays := make([]bool, len(function.Params)) - for i, name := range function.Params { - info := infos[name] - var index int - if info.typ == typeArray { - index = arrayIndex - arrayIndex++ - arrays[i] = true - } else { - // typeScalar or typeUnknown: variables may still be - // of unknown type if they've never been referenced -- - // default to scalar in that case - index = scalarIndex - scalarIndex++ - } - info.index = index - p.varTypes[funcName][name] = info - } - prog.Functions[functionIndex].Arrays = arrays - } - - // Check that variables passed to functions are the correct type - for _, c := range p.userCalls { - // Check native function calls - if c.call.Native { - for _, arg := range c.call.Args { - varExpr, ok := arg.(*ast.VarExpr) - if !ok { - // Non-variable expression, must be scalar - continue - } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - info := p.varTypes[funcName][varExpr.Name] - if info.typ == typeArray { - panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) - } - } - continue - } - - // Check AWK function calls - function := prog.Functions[c.call.Index] - for i, arg := range c.call.Args { - varExpr, ok := arg.(*ast.VarExpr) - if !ok { - if function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg)) - } - continue - } - funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) - info := p.varTypes[funcName][varExpr.Name] - if info.typ == typeArray && !function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) - } - if info.typ != typeArray && function.Arrays[i] { - panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) - } - } - } - - if p.debugTypes { - p.printVarTypes(prog) - } - - // Patch up variable indexes (interpreter uses an index instead - // of name for more efficient lookups) - for _, varRef := range p.varRefs { - info := p.varTypes[varRef.funcName][varRef.ref.Name] - if info.typ == typeArray && !varRef.isArg { - panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) - } - varRef.ref.Index = info.index - } - for _, arrayRef := range p.arrayRefs { - info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name] - if info.typ == typeScalar { - panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) - } - arrayRef.ref.Index = info.index - } -} - -// If name refers to a local (in function inFunc), return that -// function's name, otherwise return "" (meaning global). -func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string { - if inFunc == "" { - return "" - } - for _, param := range prog.Functions[p.functions[inFunc]].Params { - if name == param { - return inFunc - } - } - return "" -} - -// Record a "multi expression" (comma-separated pseudo-expression -// used to allow commas around print/printf arguments). -func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { - expr := &ast.MultiExpr{exprs} - p.multiExprs[expr] = pos - return expr -} - -// Mark the multi expression as used (by a print/printf statement). -func (p *parser) useMultiExpr(expr *ast.MultiExpr) { - delete(p.multiExprs, expr) -} - -// Check that there are no unused multi expressions (syntax error). -func (p *parser) checkMultiExprs() { - if len(p.multiExprs) == 0 { - return - } - // Show error on first comma-separated expression - min := Position{1000000000, 1000000000} - for _, pos := range p.multiExprs { - if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column { - min = pos - } - } - panic(p.posErrorf(min, "unexpected comma-separated expression")) -} diff --git a/play/vendor/modules.txt b/play/vendor/modules.txt index 774cc0d..7958a0e 100644 --- a/play/vendor/modules.txt +++ b/play/vendor/modules.txt @@ -1,7 +1,8 @@ -# github.com/benhoyt/goawk v1.20.0 -## explicit; go 1.14 +# github.com/benhoyt/goawk v1.26.0 +## explicit; go 1.16 github.com/benhoyt/goawk/internal/ast github.com/benhoyt/goawk/internal/compiler +github.com/benhoyt/goawk/internal/resolver github.com/benhoyt/goawk/interp github.com/benhoyt/goawk/lexer github.com/benhoyt/goawk/parser