Update dependencies, add Gitea workflow

This commit is contained in:
Alex Palaistras 2024-04-07 11:26:35 +01:00
parent 6da6d8c20f
commit e3f7931341
23 changed files with 2141 additions and 1138 deletions

View File

@ -0,0 +1,32 @@
name: Grawkit Play Container Build
on:
push:
paths:
- 'grawkit'
- 'play/**'
env:
CONTAINER_NAME: ${{ github.repository_owner }}/grawkit-play
CONTAINER_TAG: latest
jobs:
build:
runs-on: ubuntu-latest
name: Container Build
steps:
- name: Check Out Repository
uses: actions/checkout@v4
- name: Login to Container Registry
uses: docker/login-action@v2
with:
registry: ${{ vars.CONTAINER_REGISTRY_URL }}
username: ${{ secrets.CONTAINER_REGISTRY_USERNAME }}
password: ${{ secrets.CONTAINER_REGISTRY_PASSWORD }}
- name: Build and Push Container
uses: docker/build-push-action@v4
with:
context: .
file: play/Containerfile
platforms: |
linux/amd64
linux/arm64
push: true
tags: ${{ vars.CONTAINER_REGISTRY_URL }}/${{ env.CONTAINER_NAME }}:${{ env.CONTAINER_TAG }}

View File

@ -1,4 +1,4 @@
FROM docker.io/golang:1.19 AS builder
FROM docker.io/golang:1.22 AS builder
WORKDIR /src
COPY play/ /src/

View File

@ -1,5 +1,5 @@
module go.deuill.org/grawkit/play
go 1.19
go 1.20
require github.com/benhoyt/goawk v1.20.0
require github.com/benhoyt/goawk v1.26.0

View File

@ -1,2 +1,2 @@
github.com/benhoyt/goawk v1.20.0 h1:oz81agTfP/8Z7afMvmOwX4Ms9qTtGhZxPEzHCycIFds=
github.com/benhoyt/goawk v1.20.0/go.mod h1:Dp3jBsApuiItYR9atsCm//q/70OnqjihLh5WkU6eW7U=
github.com/benhoyt/goawk v1.26.0 h1:TuZiBi/u7Ra7092CXs+1iGd5PVd0YXicyVcFv5zWVeE=
github.com/benhoyt/goawk v1.26.0/go.mod h1:FjIAicXvrv3wbqAhSTo5bn4mIM5y1iy3lcnIynlJvoI=

View File

@ -10,14 +10,12 @@ import (
. "github.com/benhoyt/goawk/lexer"
)
// Program is an entire AWK program.
// Program is a parsed AWK program.
type Program struct {
Begin []Stmts
Actions []Action
Actions []*Action
End []Stmts
Functions []Function
Scalars map[string]int
Arrays map[string]int
Functions []*Function
}
// String returns an indented, pretty-printed version of the parsed
@ -75,32 +73,137 @@ func (a *Action) String() string {
return strings.Join(patterns, ", ") + sep + stmtsStr
}
// Node is an interface to be satisfied by all AST elements.
// We need it to be able to work with AST in a generic way, like in ast.Walk().
type Node interface {
node()
}
// All these types implement the Node interface.
func (p *Program) node() {}
func (a *Action) node() {}
func (f *Function) node() {}
func (e *FieldExpr) node() {}
func (e *NamedFieldExpr) node() {}
func (e *UnaryExpr) node() {}
func (e *BinaryExpr) node() {}
func (e *InExpr) node() {}
func (e *CondExpr) node() {}
func (e *NumExpr) node() {}
func (e *StrExpr) node() {}
func (e *RegExpr) node() {}
func (e *VarExpr) node() {}
func (e *IndexExpr) node() {}
func (e *AssignExpr) node() {}
func (e *AugAssignExpr) node() {}
func (e *IncrExpr) node() {}
func (e *CallExpr) node() {}
func (e *UserCallExpr) node() {}
func (e *MultiExpr) node() {}
func (e *GetlineExpr) node() {}
func (e *GroupingExpr) node() {}
func (s *PrintStmt) node() {}
func (s *PrintfStmt) node() {}
func (s *ExprStmt) node() {}
func (s *IfStmt) node() {}
func (s *ForStmt) node() {}
func (s *ForInStmt) node() {}
func (s *WhileStmt) node() {}
func (s *DoWhileStmt) node() {}
func (s *BreakStmt) node() {}
func (s *ContinueStmt) node() {}
func (s *NextStmt) node() {}
func (s *NextfileStmt) node() {}
func (s *ExitStmt) node() {}
func (s *DeleteStmt) node() {}
func (s *ReturnStmt) node() {}
func (s *BlockStmt) node() {}
// Expr is the abstract syntax tree for any AWK expression.
type Expr interface {
expr()
Node
precedence() int
String() string
}
// Table of operator precedence, lowest to highest
const (
precAssign = iota
precCond
precOr
precAnd
precIn
precMatch
precCompare
precConcat
precAdd
precMul
precUnary
precPower
precPreIncr
precPostIncr
precField
precPrimary
precGrouping
)
// All these types implement the Expr interface.
func (e *FieldExpr) expr() {}
func (e *NamedFieldExpr) expr() {}
func (e *UnaryExpr) expr() {}
func (e *BinaryExpr) expr() {}
func (e *ArrayExpr) expr() {}
func (e *InExpr) expr() {}
func (e *CondExpr) expr() {}
func (e *NumExpr) expr() {}
func (e *StrExpr) expr() {}
func (e *RegExpr) expr() {}
func (e *VarExpr) expr() {}
func (e *IndexExpr) expr() {}
func (e *AssignExpr) expr() {}
func (e *AugAssignExpr) expr() {}
func (e *IncrExpr) expr() {}
func (e *CallExpr) expr() {}
func (e *UserCallExpr) expr() {}
func (e *MultiExpr) expr() {}
func (e *GetlineExpr) expr() {}
func (e *FieldExpr) precedence() int { return precField }
func (e *NamedFieldExpr) precedence() int { return precField }
func (e *UnaryExpr) precedence() int { return precUnary }
func (e *InExpr) precedence() int { return precIn }
func (e *CondExpr) precedence() int { return precCond }
func (e *NumExpr) precedence() int { return precPrimary }
func (e *StrExpr) precedence() int { return precPrimary }
func (e *RegExpr) precedence() int { return precPrimary }
func (e *VarExpr) precedence() int { return precPrimary }
func (e *IndexExpr) precedence() int { return precPrimary }
func (e *AssignExpr) precedence() int { return precAssign }
func (e *AugAssignExpr) precedence() int { return precAssign }
func (e *CallExpr) precedence() int { return precPrimary }
func (e *UserCallExpr) precedence() int { return precPrimary }
func (e *MultiExpr) precedence() int { return precPrimary }
func (e *GetlineExpr) precedence() int { return precPrimary }
func (e *GroupingExpr) precedence() int { return precGrouping }
func (e *IncrExpr) precedence() int {
if e.Pre {
return precPreIncr
}
return precPostIncr
}
func (e *BinaryExpr) precedence() int {
switch e.Op {
case AND:
return precAnd
case OR:
return precOr
case CONCAT:
return precConcat
case ADD, SUB:
return precAdd
case MUL, DIV, MOD:
return precMul
case EQUALS, LESS, LTE, GREATER, GTE, NOT_EQUALS:
return precCompare
case MATCH, NOT_MATCH:
return precMatch
case POW:
return precPower
default:
return precPrimary
}
}
// parenthesize returns the string version of e, surrounding it in
// parentheses if e's precedence is lower than that of other.
func parenthesize(e, other Expr) string {
if e.precedence() < other.precedence() {
return "(" + e.String() + ")"
}
return e.String()
}
// FieldExpr is an expression like $0.
type FieldExpr struct {
@ -108,7 +211,7 @@ type FieldExpr struct {
}
func (e *FieldExpr) String() string {
return "$" + e.Index.String()
return "$" + parenthesize(e.Index, e)
}
// NamedFieldExpr is an expression like @"name".
@ -117,7 +220,7 @@ type NamedFieldExpr struct {
}
func (e *NamedFieldExpr) String() string {
return "@" + e.Field.String()
return "@" + parenthesize(e.Field, e)
}
// UnaryExpr is an expression like -1234.
@ -127,7 +230,7 @@ type UnaryExpr struct {
}
func (e *UnaryExpr) String() string {
return e.Op.String() + e.Value.String()
return e.Op.String() + parenthesize(e.Value, e)
}
// BinaryExpr is an expression like 1 + 2.
@ -138,43 +241,31 @@ type BinaryExpr struct {
}
func (e *BinaryExpr) String() string {
var opStr string
var op string
if e.Op == CONCAT {
opStr = " "
op = " "
} else {
opStr = " " + e.Op.String() + " "
op = " " + e.Op.String() + " "
}
return "(" + e.Left.String() + opStr + e.Right.String() + ")"
}
// ArrayExpr is an array reference. Not really a stand-alone
// expression, except as an argument to split() or a user function
// call.
type ArrayExpr struct {
Scope VarScope
Index int
Name string
}
func (e *ArrayExpr) String() string {
return e.Name
return parenthesize(e.Left, e) + op + parenthesize(e.Right, e)
}
// InExpr is an expression like (index in array).
type InExpr struct {
Index []Expr
Array *ArrayExpr
Index []Expr
Array string
ArrayPos Position
}
func (e *InExpr) String() string {
if len(e.Index) == 1 {
return "(" + e.Index[0].String() + " in " + e.Array.String() + ")"
return parenthesize(e.Index[0], e) + " in " + e.Array
}
indices := make([]string, len(e.Index))
for i, index := range e.Index {
indices[i] = index.String()
}
return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")"
return "(" + strings.Join(indices, ", ") + ") in " + e.Array
}
// CondExpr is an expression like cond ? 1 : 0.
@ -185,7 +276,7 @@ type CondExpr struct {
}
func (e *CondExpr) String() string {
return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")"
return parenthesize(e.Cond, e) + " ? " + parenthesize(e.True, e) + " : " + parenthesize(e.False, e)
}
// NumExpr is a literal number like 1234.
@ -201,12 +292,16 @@ func (e *NumExpr) String() string {
}
}
// StrExpr is a literal string like "foo".
// StrExpr is a literal string like "foo" or a regex constant like /foo/.
type StrExpr struct {
Value string
Regex bool
}
func (e *StrExpr) String() string {
if e.Regex {
return formatRegex(e.Value)
}
return strconv.Quote(e.Value)
}
@ -217,25 +312,13 @@ type RegExpr struct {
}
func (e *RegExpr) String() string {
escaped := strings.Replace(e.Regex, "/", `\/`, -1)
return "/" + escaped + "/"
return formatRegex(e.Regex)
}
type VarScope int
const (
ScopeSpecial VarScope = iota
ScopeGlobal
ScopeLocal
)
// VarExpr is a variable reference (special var, global, or local).
// Index is the resolved variable index used by the interpreter; Name
// is the original name used by String().
type VarExpr struct {
Scope VarScope
Index int
Name string
Name string
Pos Position
}
func (e *VarExpr) String() string {
@ -244,8 +327,9 @@ func (e *VarExpr) String() string {
// IndexExpr is an expression like a[k] (rvalue or lvalue).
type IndexExpr struct {
Array *ArrayExpr
Index []Expr
Array string
ArrayPos Position
Index []Expr
}
func (e *IndexExpr) String() string {
@ -253,7 +337,7 @@ func (e *IndexExpr) String() string {
for i, index := range e.Index {
indices[i] = index.String()
}
return e.Array.String() + "[" + strings.Join(indices, ", ") + "]"
return e.Array + "[" + strings.Join(indices, ", ") + "]"
}
// AssignExpr is an expression like x = 1234.
@ -263,7 +347,7 @@ type AssignExpr struct {
}
func (e *AssignExpr) String() string {
return e.Left.String() + " = " + e.Right.String()
return parenthesize(e.Left, e) + " = " + parenthesize(e.Right, e)
}
// AugAssignExpr is an assignment expression like x += 5.
@ -274,7 +358,7 @@ type AugAssignExpr struct {
}
func (e *AugAssignExpr) String() string {
return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String()
return parenthesize(e.Left, e) + " " + e.Op.String() + "= " + parenthesize(e.Right, e)
}
// IncrExpr is an increment or decrement expression like x++ or --y.
@ -286,9 +370,9 @@ type IncrExpr struct {
func (e *IncrExpr) String() string {
if e.Pre {
return e.Op.String() + e.Expr.String()
return e.Op.String() + parenthesize(e.Expr, e)
} else {
return e.Expr.String() + e.Op.String()
return parenthesize(e.Expr, e) + e.Op.String()
}
}
@ -306,15 +390,12 @@ func (e *CallExpr) String() string {
return e.Func.String() + "(" + strings.Join(args, ", ") + ")"
}
// UserCallExpr is a user-defined function call like my_func(1, 2, 3)
//
// Index is the resolved function index used by the interpreter; Name
// is the original name used by String().
// UserCallExpr is a user-defined function call like my_func(1, 2, 3),
// where my_func is either AWK-defined or a native Go function.
type UserCallExpr struct {
Native bool // false = AWK-defined function, true = native Go func
Index int
Name string
Args []Expr
Name string
Args []Expr
Pos Position
}
func (e *UserCallExpr) String() string {
@ -349,18 +430,27 @@ type GetlineExpr struct {
func (e *GetlineExpr) String() string {
s := ""
if e.Command != nil {
s += e.Command.String() + " |"
s += parenthesize(e.Command, e) + " |"
}
s += "getline"
if e.Target != nil {
s += " " + e.Target.String()
}
if e.File != nil {
s += " <" + e.File.String()
s += " <" + parenthesize(e.File, e)
}
return s
}
// GroupingExpr is a parenthesized grouping expression.
type GroupingExpr struct {
Expr Expr
}
func (e *GroupingExpr) String() string {
return "(" + e.Expr.String() + ")"
}
// IsLValue returns true if the given expression can be used as an
// lvalue (on the left-hand side of an assignment, in a ++ or --
// operation, or as the third argument to sub or gsub).
@ -373,8 +463,17 @@ func IsLValue(expr Expr) bool {
}
}
// formatRegex formats the regex string r.
func formatRegex(r string) string {
escaped := strings.Replace(r, "/", `\/`, -1)
return "/" + escaped + "/"
}
// Stmt is the abstract syntax tree for any AWK statement.
type Stmt interface {
Node
StartPos() Position // position of first character belonging to the node
EndPos() Position // position of first character immediately after the node
stmt()
String() string
}
@ -391,16 +490,53 @@ func (s *DoWhileStmt) stmt() {}
func (s *BreakStmt) stmt() {}
func (s *ContinueStmt) stmt() {}
func (s *NextStmt) stmt() {}
func (s *NextfileStmt) stmt() {}
func (s *ExitStmt) stmt() {}
func (s *DeleteStmt) stmt() {}
func (s *ReturnStmt) stmt() {}
func (s *BlockStmt) stmt() {}
func (s *PrintStmt) StartPos() Position { return s.Start }
func (s *PrintfStmt) StartPos() Position { return s.Start }
func (s *ExprStmt) StartPos() Position { return s.Start }
func (s *IfStmt) StartPos() Position { return s.Start }
func (s *ForStmt) StartPos() Position { return s.Start }
func (s *ForInStmt) StartPos() Position { return s.Start }
func (s *WhileStmt) StartPos() Position { return s.Start }
func (s *DoWhileStmt) StartPos() Position { return s.Start }
func (s *BreakStmt) StartPos() Position { return s.Start }
func (s *ContinueStmt) StartPos() Position { return s.Start }
func (s *NextStmt) StartPos() Position { return s.Start }
func (s *NextfileStmt) StartPos() Position { return s.Start }
func (s *ExitStmt) StartPos() Position { return s.Start }
func (s *DeleteStmt) StartPos() Position { return s.Start }
func (s *ReturnStmt) StartPos() Position { return s.Start }
func (s *BlockStmt) StartPos() Position { return s.Start }
func (s *PrintStmt) EndPos() Position { return s.End }
func (s *PrintfStmt) EndPos() Position { return s.End }
func (s *ExprStmt) EndPos() Position { return s.End }
func (s *IfStmt) EndPos() Position { return s.End }
func (s *ForStmt) EndPos() Position { return s.End }
func (s *ForInStmt) EndPos() Position { return s.End }
func (s *WhileStmt) EndPos() Position { return s.End }
func (s *DoWhileStmt) EndPos() Position { return s.End }
func (s *BreakStmt) EndPos() Position { return s.End }
func (s *ContinueStmt) EndPos() Position { return s.End }
func (s *NextStmt) EndPos() Position { return s.End }
func (s *NextfileStmt) EndPos() Position { return s.End }
func (s *ExitStmt) EndPos() Position { return s.End }
func (s *DeleteStmt) EndPos() Position { return s.End }
func (s *ReturnStmt) EndPos() Position { return s.End }
func (s *BlockStmt) EndPos() Position { return s.End }
// PrintStmt is a statement like print $1, $3.
type PrintStmt struct {
Args []Expr
Redirect Token
Dest Expr
Start Position
End Position
}
func (s *PrintStmt) String() string {
@ -424,6 +560,8 @@ type PrintfStmt struct {
Args []Expr
Redirect Token
Dest Expr
Start Position
End Position
}
func (s *PrintfStmt) String() string {
@ -432,7 +570,9 @@ func (s *PrintfStmt) String() string {
// ExprStmt is statement like a bare function call: my_func(x).
type ExprStmt struct {
Expr Expr
Expr Expr
Start Position
End Position
}
func (s *ExprStmt) String() string {
@ -441,13 +581,16 @@ func (s *ExprStmt) String() string {
// IfStmt is an if or if-else statement.
type IfStmt struct {
Cond Expr
Body Stmts
Else Stmts
Cond Expr
BodyStart Position
Body Stmts
Else Stmts
Start Position
End Position
}
func (s *IfStmt) String() string {
str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
str := "if (" + s.Cond.String() + ") {\n" + s.Body.String() + "}"
if len(s.Else) > 0 {
str += " else {\n" + s.Else.String() + "}"
}
@ -456,10 +599,13 @@ func (s *IfStmt) String() string {
// ForStmt is a C-like for loop: for (i=0; i<10; i++) print i.
type ForStmt struct {
Pre Stmt
Cond Expr
Post Stmt
Body Stmts
Pre Stmt
Cond Expr
Post Stmt
BodyStart Position
Body Stmts
Start Position
End Position
}
func (s *ForStmt) String() string {
@ -469,7 +615,7 @@ func (s *ForStmt) String() string {
}
condStr := ""
if s.Cond != nil {
condStr = " " + trimParens(s.Cond.String())
condStr = " " + s.Cond.String()
}
postStr := ""
if s.Post != nil {
@ -480,59 +626,90 @@ func (s *ForStmt) String() string {
// ForInStmt is a for loop like for (k in a) print k, a[k].
type ForInStmt struct {
Var *VarExpr
Array *ArrayExpr
Body Stmts
Var string
VarPos Position
Array string
ArrayPos Position
BodyStart Position
Body Stmts
Start Position
End Position
}
func (s *ForInStmt) String() string {
return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}"
return "for (" + s.Var + " in " + s.Array + ") {\n" + s.Body.String() + "}"
}
// WhileStmt is a while loop.
type WhileStmt struct {
Cond Expr
Body Stmts
Cond Expr
BodyStart Position
Body Stmts
Start Position
End Position
}
func (s *WhileStmt) String() string {
return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
return "while (" + s.Cond.String() + ") {\n" + s.Body.String() + "}"
}
// DoWhileStmt is a do-while loop.
type DoWhileStmt struct {
Body Stmts
Cond Expr
Body Stmts
Cond Expr
Start Position
End Position
}
func (s *DoWhileStmt) String() string {
return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")"
return "do {\n" + s.Body.String() + "} while (" + s.Cond.String() + ")"
}
// BreakStmt is a break statement.
type BreakStmt struct{}
type BreakStmt struct {
Start Position
End Position
}
func (s *BreakStmt) String() string {
return "break"
}
// ContinueStmt is a continue statement.
type ContinueStmt struct{}
type ContinueStmt struct {
Start Position
End Position
}
func (s *ContinueStmt) String() string {
return "continue"
}
// NextStmt is a next statement.
type NextStmt struct{}
type NextStmt struct {
Start Position
End Position
}
func (s *NextStmt) String() string {
return "next"
}
// NextfileStmt is a nextfile statement.
type NextfileStmt struct {
Start Position
End Position
}
func (s *NextfileStmt) String() string {
return "nextfile"
}
// ExitStmt is an exit statement.
type ExitStmt struct {
Status Expr
Start Position
End Position
}
func (s *ExitStmt) String() string {
@ -545,21 +722,29 @@ func (s *ExitStmt) String() string {
// DeleteStmt is a statement like delete a[k].
type DeleteStmt struct {
Array *ArrayExpr
Index []Expr
Array string
ArrayPos Position
Index []Expr
Start Position
End Position
}
func (s *DeleteStmt) String() string {
if len(s.Index) == 0 {
return "delete " + s.Array
}
indices := make([]string, len(s.Index))
for i, index := range s.Index {
indices[i] = index.String()
}
return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]"
return "delete " + s.Array + "[" + strings.Join(indices, ", ") + "]"
}
// ReturnStmt is a return statement.
type ReturnStmt struct {
Value Expr
Start Position
End Position
}
func (s *ReturnStmt) String() string {
@ -572,7 +757,9 @@ func (s *ReturnStmt) String() string {
// BlockStmt is a stand-alone block like { print "x" }.
type BlockStmt struct {
Body Stmts
Body Stmts
Start Position
End Position
}
func (s *BlockStmt) String() string {
@ -583,8 +770,8 @@ func (s *BlockStmt) String() string {
type Function struct {
Name string
Params []string
Arrays []bool
Body Stmts
Pos Position
}
func (f *Function) String() string {
@ -592,9 +779,22 @@ func (f *Function) String() string {
f.Body.String() + "}"
}
func trimParens(s string) string {
if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") {
s = s[1 : len(s)-1]
}
return s
// PositionError represents an error bound to specific position in source.
type PositionError struct {
// Source line/column position where the error occurred.
Position Position
// Error message.
Message string
}
// PosErrorf like fmt.Errorf, but with an explicit position.
func PosErrorf(pos Position, format string, args ...interface{}) error {
message := fmt.Sprintf(format, args...)
return &PositionError{pos, message}
}
// Error returns a formatted version of the error, including the line
// and column numbers.
func (e *PositionError) Error() string {
return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message)
}

View File

@ -0,0 +1,177 @@
package ast
import "fmt"
// Visitor has a Visit method which is invoked for each node encountered by Walk.
// If the result visitor w is not nil, Walk visits each of the children
// of node with the visitor w, followed by a call of w.Visit(nil).
type Visitor interface {
Visit(node Node) (w Visitor)
}
// WalkExprList walks a visitor over a list of expression AST nodes
func WalkExprList(v Visitor, exprs []Expr) {
for _, expr := range exprs {
Walk(v, expr)
}
}
// WalkStmtList walks a visitor over a list of statement AST nodes
func WalkStmtList(v Visitor, stmts []Stmt) {
for _, stmt := range stmts {
Walk(v, stmt)
}
}
// Walk traverses an AST in depth-first order: It starts by calling
// v.Visit(node); if node is nil, it does nothing. If the visitor w returned by
// v.Visit(node) is not nil, Walk is invoked recursively with visitor
// w for each of the non-nil children of node, followed by a call of
// w.Visit(nil).
func Walk(v Visitor, node Node) {
if node == nil {
return
}
if v = v.Visit(node); v == nil {
return
}
// walk children
// (the order of the cases matches the order
// of the corresponding node types in ast.go)
switch n := node.(type) {
// expressions
case *FieldExpr:
Walk(v, n.Index)
case *NamedFieldExpr:
Walk(v, n.Field)
case *UnaryExpr:
Walk(v, n.Value)
case *BinaryExpr:
Walk(v, n.Left)
Walk(v, n.Right)
case *InExpr:
WalkExprList(v, n.Index)
case *CondExpr:
Walk(v, n.Cond)
Walk(v, n.True)
Walk(v, n.False)
case *NumExpr: // leaf
case *StrExpr: // leaf
case *RegExpr: // leaf
case *VarExpr: // leaf
case *IndexExpr:
WalkExprList(v, n.Index)
case *AssignExpr:
Walk(v, n.Left)
Walk(v, n.Right)
case *AugAssignExpr:
Walk(v, n.Left)
Walk(v, n.Right)
case *IncrExpr:
Walk(v, n.Expr)
case *CallExpr:
WalkExprList(v, n.Args)
case *UserCallExpr:
WalkExprList(v, n.Args)
case *MultiExpr:
WalkExprList(v, n.Exprs)
case *GetlineExpr:
Walk(v, n.Command)
Walk(v, n.Target)
Walk(v, n.File)
case *GroupingExpr:
Walk(v, n.Expr)
// statements
case *PrintStmt:
WalkExprList(v, n.Args)
Walk(v, n.Dest)
case *PrintfStmt:
WalkExprList(v, n.Args)
Walk(v, n.Dest)
case *ExprStmt:
Walk(v, n.Expr)
case *IfStmt:
Walk(v, n.Cond)
WalkStmtList(v, n.Body)
WalkStmtList(v, n.Else)
case *ForStmt:
Walk(v, n.Pre)
Walk(v, n.Cond)
Walk(v, n.Post)
WalkStmtList(v, n.Body)
case *ForInStmt:
WalkStmtList(v, n.Body)
case *WhileStmt:
Walk(v, n.Cond)
WalkStmtList(v, n.Body)
case *DoWhileStmt:
WalkStmtList(v, n.Body)
Walk(v, n.Cond)
case *BreakStmt: // leaf
case *ContinueStmt: // leaf
case *NextStmt: // leaf
case *NextfileStmt: // leaf
case *ExitStmt:
Walk(v, n.Status)
case *DeleteStmt:
WalkExprList(v, n.Index)
case *ReturnStmt:
Walk(v, n.Value)
case *BlockStmt:
WalkStmtList(v, n.Body)
case *Program:
for _, stmts := range n.Begin {
WalkStmtList(v, stmts)
}
for _, action := range n.Actions {
Walk(v, action)
}
for _, function := range n.Functions {
Walk(v, function)
}
for _, stmts := range n.End {
WalkStmtList(v, stmts)
}
case *Action:
WalkExprList(v, n.Pattern)
WalkStmtList(v, n.Stmts)
case *Function:
WalkStmtList(v, n.Body)
default:
panic(fmt.Sprintf("ast.Walk: unexpected node type %T", n))
}
v.Visit(nil)
}

View File

@ -5,8 +5,10 @@ import (
"fmt"
"math"
"regexp"
"strconv"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/resolver"
"github.com/benhoyt/goawk/lexer"
)
@ -54,7 +56,7 @@ func (e *compileError) Error() string {
}
// Compile compiles an AST (parsed program) into virtual machine instructions.
func Compile(prog *ast.Program) (compiledProg *Program, err error) {
func Compile(resolved *resolver.ResolvedProgram) (compiledProg *Program, err error) {
defer func() {
// The compiler uses panic with a *compileError to signal compile
// errors internally, and they're caught here. This avoids the
@ -77,57 +79,60 @@ func Compile(prog *ast.Program) (compiledProg *Program, err error) {
// Compile functions. For functions called before they're defined or
// recursive functions, we have to set most p.Functions data first, then
// compile Body afterward.
p.Functions = make([]Function, len(prog.Functions))
for i, astFunc := range prog.Functions {
p.Functions = make([]Function, len(resolved.Functions))
for i, astFunc := range resolved.Functions {
arrays := make([]bool, len(astFunc.Params))
numArrays := 0
for _, a := range astFunc.Arrays {
if a {
for j, param := range astFunc.Params {
_, info, _ := resolved.LookupVar(astFunc.Name, param)
if info.Type == resolver.Array {
arrays[j] = true
numArrays++
}
}
compiledFunc := Function{
Name: astFunc.Name,
Params: astFunc.Params,
Arrays: astFunc.Arrays,
NumScalars: len(astFunc.Arrays) - numArrays,
Arrays: arrays,
NumScalars: len(astFunc.Params) - numArrays,
NumArrays: numArrays,
}
p.Functions[i] = compiledFunc
}
for i, astFunc := range prog.Functions {
c := &compiler{program: p, indexes: indexes}
for i, astFunc := range resolved.Functions {
c := compiler{resolved: resolved, program: p, indexes: indexes, funcName: astFunc.Name}
c.stmts(astFunc.Body)
p.Functions[i].Body = c.finish()
}
// Compile BEGIN blocks.
for _, stmts := range prog.Begin {
c := &compiler{program: p, indexes: indexes}
for _, stmts := range resolved.Begin {
c := compiler{resolved: resolved, program: p, indexes: indexes}
c.stmts(stmts)
p.Begin = append(p.Begin, c.finish()...)
}
// Compile pattern-action blocks.
for _, action := range prog.Actions {
for _, action := range resolved.Actions {
var pattern [][]Opcode
switch len(action.Pattern) {
case 0:
// Always considered a match
case 1:
c := &compiler{program: p, indexes: indexes}
c := compiler{resolved: resolved, program: p, indexes: indexes}
c.expr(action.Pattern[0])
pattern = [][]Opcode{c.finish()}
case 2:
c := &compiler{program: p, indexes: indexes}
c := compiler{resolved: resolved, program: p, indexes: indexes}
c.expr(action.Pattern[0])
pattern = append(pattern, c.finish())
c = &compiler{program: p, indexes: indexes}
c = compiler{resolved: resolved, program: p, indexes: indexes}
c.expr(action.Pattern[1])
pattern = append(pattern, c.finish())
}
var body []Opcode
if len(action.Stmts) > 0 {
c := &compiler{program: p, indexes: indexes}
c := compiler{resolved: resolved, program: p, indexes: indexes}
c.stmts(action.Stmts)
body = c.finish()
}
@ -138,21 +143,33 @@ func Compile(prog *ast.Program) (compiledProg *Program, err error) {
}
// Compile END blocks.
for _, stmts := range prog.End {
c := &compiler{program: p, indexes: indexes}
for _, stmts := range resolved.End {
c := compiler{resolved: resolved, program: p, indexes: indexes}
c.stmts(stmts)
p.End = append(p.End, c.finish()...)
}
// Build slices that map indexes to names (for variables and functions).
// These are only used for disassembly, but set them up here.
p.scalarNames = make([]string, len(prog.Scalars))
for name, index := range prog.Scalars {
p.scalarNames[index] = name
}
p.arrayNames = make([]string, len(prog.Arrays))
for name, index := range prog.Arrays {
p.arrayNames[index] = name
}
resolved.IterVars("", func(name string, info resolver.VarInfo) {
if info.Type == resolver.Array {
for len(p.arrayNames) <= info.Index {
p.arrayNames = append(p.arrayNames, "")
}
p.arrayNames[info.Index] = name
} else {
for len(p.scalarNames) <= info.Index {
p.scalarNames = append(p.scalarNames, "")
}
p.scalarNames[info.Index] = name
}
})
resolved.IterFuncs(func(name string, info resolver.FuncInfo) {
for len(p.nativeFuncNames) <= info.Index {
p.nativeFuncNames = append(p.nativeFuncNames, "")
}
p.nativeFuncNames[info.Index] = name
})
return p, nil
}
@ -166,13 +183,31 @@ type constantIndexes struct {
// Holds the compilation state.
type compiler struct {
resolved *resolver.ResolvedProgram
program *Program
indexes constantIndexes
funcName string
code []Opcode
breaks [][]int
continues [][]int
}
func (c *compiler) scalarInfo(name string) (scope resolver.Scope, index int) {
scope, info, _ := c.resolved.LookupVar(c.funcName, name)
if info.Type != resolver.Scalar {
panic(fmt.Sprintf("internal error: found %s when expecting scalar %q", info.Type, name))
}
return scope, info.Index
}
func (c *compiler) arrayInfo(name string) (scope resolver.Scope, index int) {
scope, info, _ := c.resolved.LookupVar(c.funcName, name)
if info.Type != resolver.Array {
panic(fmt.Sprintf("internal error: found %s when expecting array %q", info.Type, name))
}
return scope, info.Index
}
func (c *compiler) add(ops ...Opcode) {
c.code = append(c.code, ops...)
}
@ -201,24 +236,26 @@ func (c *compiler) stmt(stmt ast.Stmt) {
// Pre or post doesn't matter for an assignment expression
switch target := expr.Expr.(type) {
case *ast.VarExpr:
switch target.Scope {
case ast.ScopeGlobal:
c.add(IncrGlobal, incrAmount(expr.Op), opcodeInt(target.Index))
case ast.ScopeLocal:
c.add(IncrLocal, incrAmount(expr.Op), opcodeInt(target.Index))
scope, index := c.scalarInfo(target.Name)
switch scope {
case resolver.Global:
c.add(IncrGlobal, incrAmount(expr.Op), opcodeInt(index))
case resolver.Local:
c.add(IncrLocal, incrAmount(expr.Op), opcodeInt(index))
default: // ScopeSpecial
c.add(IncrSpecial, incrAmount(expr.Op), opcodeInt(target.Index))
c.add(IncrSpecial, incrAmount(expr.Op), opcodeInt(index))
}
case *ast.FieldExpr:
c.expr(target.Index)
c.add(IncrField, incrAmount(expr.Op))
case *ast.IndexExpr:
c.index(target.Index)
switch target.Array.Scope {
case ast.ScopeGlobal:
c.add(IncrArrayGlobal, incrAmount(expr.Op), opcodeInt(target.Array.Index))
scope, index := c.arrayInfo(target.Array)
switch scope {
case resolver.Global:
c.add(IncrArrayGlobal, incrAmount(expr.Op), opcodeInt(index))
default: // ScopeLocal
c.add(IncrArrayLocal, incrAmount(expr.Op), opcodeInt(target.Array.Index))
c.add(IncrArrayLocal, incrAmount(expr.Op), opcodeInt(index))
}
}
return
@ -244,24 +281,26 @@ func (c *compiler) stmt(stmt ast.Stmt) {
switch target := expr.Left.(type) {
case *ast.VarExpr:
switch target.Scope {
case ast.ScopeGlobal:
c.add(AugAssignGlobal, Opcode(augOp), opcodeInt(target.Index))
case ast.ScopeLocal:
c.add(AugAssignLocal, Opcode(augOp), opcodeInt(target.Index))
scope, index := c.scalarInfo(target.Name)
switch scope {
case resolver.Global:
c.add(AugAssignGlobal, Opcode(augOp), opcodeInt(index))
case resolver.Local:
c.add(AugAssignLocal, Opcode(augOp), opcodeInt(index))
default: // ScopeSpecial
c.add(AugAssignSpecial, Opcode(augOp), opcodeInt(target.Index))
c.add(AugAssignSpecial, Opcode(augOp), opcodeInt(index))
}
case *ast.FieldExpr:
c.expr(target.Index)
c.add(AugAssignField, Opcode(augOp))
case *ast.IndexExpr:
c.index(target.Index)
switch target.Array.Scope {
case ast.ScopeGlobal:
c.add(AugAssignArrayGlobal, Opcode(augOp), opcodeInt(target.Array.Index))
scope, index := c.arrayInfo(target.Array)
switch scope {
case resolver.Global:
c.add(AugAssignArrayGlobal, Opcode(augOp), opcodeInt(index))
default: // ScopeLocal
c.add(AugAssignArrayLocal, Opcode(augOp), opcodeInt(target.Array.Index))
c.add(AugAssignArrayLocal, Opcode(augOp), opcodeInt(index))
}
}
return
@ -345,8 +384,10 @@ func (c *compiler) stmt(stmt ast.Stmt) {
// Otherwise we'd need to build a slice of all keys rather than
// iterating, or write our own hash table that has a more flexible
// iterator.
mark := c.jumpForward(ForIn, opcodeInt(int(s.Var.Scope)), opcodeInt(s.Var.Index),
Opcode(s.Array.Scope), opcodeInt(s.Array.Index))
varScope, varIndex := c.scalarInfo(s.Var)
arrayScope, arrayIndex := c.arrayInfo(s.Array)
mark := c.jumpForward(ForIn, opcodeInt(int(varScope)), opcodeInt(varIndex),
Opcode(arrayScope), opcodeInt(arrayIndex))
c.breaks = append(c.breaks, nil) // nil tells BreakStmt it's a for-in loop
c.continues = append(c.continues, []int{})
@ -415,20 +456,24 @@ func (c *compiler) stmt(stmt ast.Stmt) {
case *ast.NextStmt:
c.add(Next)
case *ast.NextfileStmt:
c.add(Nextfile)
case *ast.ExitStmt:
if s.Status != nil {
c.expr(s.Status)
c.add(ExitStatus)
} else {
c.expr(&ast.NumExpr{0})
c.add(Exit)
}
c.add(Exit)
case *ast.DeleteStmt:
scope, index := c.arrayInfo(s.Array)
if len(s.Index) > 0 {
c.index(s.Index)
c.add(Delete, Opcode(s.Array.Scope), opcodeInt(s.Array.Index))
c.add(Delete, Opcode(scope), opcodeInt(index))
} else {
c.add(DeleteAll, Opcode(s.Array.Scope), opcodeInt(s.Array.Index))
c.add(DeleteAll, Opcode(scope), opcodeInt(index))
}
case *ast.BlockStmt:
@ -451,27 +496,48 @@ func incrAmount(op lexer.Token) Opcode {
// Generate opcodes for an assignment.
func (c *compiler) assign(target ast.Expr) {
switch target := target.(type) {
switch t := target.(type) {
case *ast.VarExpr:
switch target.Scope {
case ast.ScopeGlobal:
c.add(AssignGlobal, opcodeInt(target.Index))
case ast.ScopeLocal:
c.add(AssignLocal, opcodeInt(target.Index))
case ast.ScopeSpecial:
c.add(AssignSpecial, opcodeInt(target.Index))
scope, index := c.scalarInfo(t.Name)
switch scope {
case resolver.Global:
c.add(AssignGlobal, opcodeInt(index))
case resolver.Local:
c.add(AssignLocal, opcodeInt(index))
case resolver.Special:
c.add(AssignSpecial, opcodeInt(index))
}
case *ast.FieldExpr:
c.expr(target.Index)
c.expr(t.Index)
c.add(AssignField)
case *ast.IndexExpr:
c.index(target.Index)
switch target.Array.Scope {
case ast.ScopeGlobal:
c.add(AssignArrayGlobal, opcodeInt(target.Array.Index))
case ast.ScopeLocal:
c.add(AssignArrayLocal, opcodeInt(target.Array.Index))
}
c.index(t.Index)
c.assignIndexExpr(t)
}
}
func (c *compiler) assignIndexExpr(target *ast.IndexExpr) {
scope, index := c.arrayInfo(target.Array)
switch scope {
case resolver.Global:
c.add(AssignArrayGlobal, opcodeInt(index))
case resolver.Local:
c.add(AssignArrayLocal, opcodeInt(index))
}
}
// Assign to target, but instead of evaluating the index, rotate it to the top
// of the stack first (for applicable target types).
func (c *compiler) assignRoteIndex(target ast.Expr) {
switch t := target.(type) {
case *ast.VarExpr:
c.assign(target) // no index for VarExpr, just call assign
case *ast.FieldExpr:
c.add(Rote)
c.add(AssignField)
case *ast.IndexExpr:
c.add(Rote)
c.assignIndexExpr(t)
}
}
@ -611,13 +677,14 @@ func (c *compiler) expr(expr ast.Expr) {
c.add(FieldByName)
case *ast.VarExpr:
switch e.Scope {
case ast.ScopeGlobal:
c.add(Global, opcodeInt(e.Index))
case ast.ScopeLocal:
c.add(Local, opcodeInt(e.Index))
case ast.ScopeSpecial:
c.add(Special, opcodeInt(e.Index))
scope, index := c.scalarInfo(e.Name)
switch scope {
case resolver.Global:
c.add(Global, opcodeInt(index))
case resolver.Local:
c.add(Local, opcodeInt(index))
case resolver.Special:
c.add(Special, opcodeInt(index))
}
case *ast.RegExpr:
@ -658,19 +725,20 @@ func (c *compiler) expr(expr ast.Expr) {
op = Subtract
}
if e.Pre {
c.expr(e.Expr)
c.dupeIndexLValue(e.Expr)
c.expr(&ast.NumExpr{1})
c.add(op)
c.add(Dupe)
c.assignRoteIndex(e.Expr)
} else {
c.expr(e.Expr)
c.expr(&ast.NumExpr{0})
c.dupeIndexLValue(e.Expr)
c.expr(&ast.NumExpr{0}) // add 0 to coerce result to number
c.add(Add)
c.add(Dupe)
c.expr(&ast.NumExpr{1})
c.add(op)
c.assignRoteIndex(e.Expr)
}
c.assign(e.Expr)
case *ast.AssignExpr:
// Most AssignExpr (standalone) will be handled by the ExprStmt special case
@ -680,12 +748,22 @@ func (c *compiler) expr(expr ast.Expr) {
case *ast.AugAssignExpr:
// Most AugAssignExpr (standalone) will be handled by the ExprStmt special case
c.expr(e.Right)
c.expr(e.Left)
c.add(Swap)
c.binaryOp(e.Op)
c.add(Dupe)
c.assign(e.Left)
switch e.Left.(type) {
case *ast.FieldExpr, *ast.IndexExpr:
c.expr(e.Right)
c.dupeIndexLValue(e.Left)
c.add(Rote)
c.binaryOp(e.Op)
c.add(Dupe)
c.assignRoteIndex(e.Left)
case *ast.VarExpr:
c.expr(e.Right)
c.expr(e.Left)
c.add(Swap)
c.binaryOp(e.Op)
c.add(Dupe)
c.assign(e.Left)
}
case *ast.CondExpr:
jump := c.condition(e.Cond, true)
@ -698,24 +776,20 @@ func (c *compiler) expr(expr ast.Expr) {
case *ast.IndexExpr:
c.index(e.Index)
switch e.Array.Scope {
case ast.ScopeGlobal:
c.add(ArrayGlobal, opcodeInt(e.Array.Index))
case ast.ScopeLocal:
c.add(ArrayLocal, opcodeInt(e.Array.Index))
}
c.indexExpr(e)
case *ast.CallExpr:
// split and sub/gsub require special cases as they have lvalue arguments
switch e.Func {
case lexer.F_SPLIT:
c.expr(e.Args[0])
arrayExpr := e.Args[1].(*ast.ArrayExpr)
varExpr := e.Args[1].(*ast.VarExpr) // split()'s 2nd arg is always an array
scope, index := c.arrayInfo(varExpr.Name)
if len(e.Args) > 2 {
c.expr(e.Args[2])
c.add(CallSplitSep, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index))
c.add(CallSplitSep, Opcode(scope), opcodeInt(index))
} else {
c.add(CallSplit, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index))
c.add(CallSplit, Opcode(scope), opcodeInt(index))
}
return
case lexer.F_SUB, lexer.F_GSUB:
@ -727,11 +801,38 @@ func (c *compiler) expr(expr ast.Expr) {
if len(e.Args) == 3 {
target = e.Args[2]
}
c.expr(e.Args[0])
c.expr(e.Args[1])
c.expr(target)
c.add(CallBuiltin, Opcode(op))
c.assign(target)
switch target.(type) {
case *ast.FieldExpr, *ast.IndexExpr:
c.dupeIndexLValue(target)
c.expr(e.Args[0])
c.expr(e.Args[1])
c.add(Rote)
c.add(CallBuiltin, Opcode(op))
c.assignRoteIndex(target)
case *ast.VarExpr:
c.expr(e.Args[0])
c.expr(e.Args[1])
c.expr(target)
c.add(CallBuiltin, Opcode(op))
c.assign(target)
}
return
case lexer.F_LENGTH:
if len(e.Args) > 0 {
// Determine if the call is length(arrayVar) or length(stringExpr).
if varExpr, ok := e.Args[0].(*ast.VarExpr); ok {
scope, info, _ := c.resolved.LookupVar(c.funcName, varExpr.Name)
if info.Type == resolver.Array {
c.add(CallLengthArray, Opcode(scope), opcodeInt(info.Index))
return
}
}
c.expr(e.Args[0])
c.add(CallBuiltin, Opcode(BuiltinLengthArg))
} else {
c.add(CallBuiltin, Opcode(BuiltinLength))
}
return
}
@ -757,12 +858,6 @@ func (c *compiler) expr(expr ast.Expr) {
c.add(CallBuiltin, Opcode(BuiltinIndex))
case lexer.F_INT:
c.add(CallBuiltin, Opcode(BuiltinInt))
case lexer.F_LENGTH:
if len(e.Args) > 0 {
c.add(CallBuiltin, Opcode(BuiltinLengthArg))
} else {
c.add(CallBuiltin, Opcode(BuiltinLength))
}
case lexer.F_LOG:
c.add(CallBuiltin, Opcode(BuiltinLog))
case lexer.F_MATCH:
@ -810,31 +905,30 @@ func (c *compiler) expr(expr ast.Expr) {
case *ast.InExpr:
c.index(e.Index)
switch e.Array.Scope {
case ast.ScopeGlobal:
c.add(InGlobal, opcodeInt(e.Array.Index))
scope, index := c.arrayInfo(e.Array)
switch scope {
case resolver.Global:
c.add(InGlobal, opcodeInt(index))
default: // ScopeLocal
c.add(InLocal, opcodeInt(e.Array.Index))
c.add(InLocal, opcodeInt(index))
}
case *ast.UserCallExpr:
if e.Native {
funcInfo, _ := c.resolved.LookupFunc(e.Name)
if funcInfo.Native {
for _, arg := range e.Args {
c.expr(arg)
}
c.add(CallNative, opcodeInt(e.Index), opcodeInt(len(e.Args)))
for len(c.program.nativeFuncNames) <= e.Index {
c.program.nativeFuncNames = append(c.program.nativeFuncNames, "")
}
c.program.nativeFuncNames[e.Index] = e.Name
c.add(CallNative, opcodeInt(funcInfo.Index), opcodeInt(len(e.Args)))
} else {
f := c.program.Functions[e.Index]
f := c.program.Functions[funcInfo.Index]
var arrayOpcodes []Opcode
numScalarArgs := 0
for i, arg := range e.Args {
if f.Arrays[i] {
a := arg.(*ast.VarExpr)
arrayOpcodes = append(arrayOpcodes, Opcode(a.Scope), opcodeInt(a.Index))
scope, index := c.arrayInfo(a.Name)
arrayOpcodes = append(arrayOpcodes, Opcode(scope), opcodeInt(index))
} else {
c.expr(arg)
numScalarArgs++
@ -843,7 +937,7 @@ func (c *compiler) expr(expr ast.Expr) {
if numScalarArgs < f.NumScalars {
c.add(Nulls, opcodeInt(f.NumScalars-numScalarArgs))
}
c.add(CallUser, opcodeInt(e.Index), opcodeInt(len(arrayOpcodes)/2))
c.add(CallUser, opcodeInt(funcInfo.Index), opcodeInt(len(arrayOpcodes)/2))
c.add(arrayOpcodes...)
}
@ -862,30 +956,62 @@ func (c *compiler) expr(expr ast.Expr) {
}
switch target := e.Target.(type) {
case *ast.VarExpr:
switch target.Scope {
case ast.ScopeGlobal:
c.add(GetlineGlobal, redirect(), opcodeInt(target.Index))
case ast.ScopeLocal:
c.add(GetlineLocal, redirect(), opcodeInt(target.Index))
case ast.ScopeSpecial:
c.add(GetlineSpecial, redirect(), opcodeInt(target.Index))
scope, index := c.scalarInfo(target.Name)
switch scope {
case resolver.Global:
c.add(GetlineGlobal, redirect(), opcodeInt(index))
case resolver.Local:
c.add(GetlineLocal, redirect(), opcodeInt(index))
case resolver.Special:
c.add(GetlineSpecial, redirect(), opcodeInt(index))
}
case *ast.FieldExpr:
c.expr(target.Index)
c.add(GetlineField, redirect())
case *ast.IndexExpr:
c.index(target.Index)
c.add(GetlineArray, redirect(), Opcode(target.Array.Scope), opcodeInt(target.Array.Index))
scope, index := c.arrayInfo(target.Array)
c.add(GetlineArray, redirect(), Opcode(scope), opcodeInt(index))
default:
c.add(Getline, redirect())
}
case *ast.GroupingExpr:
c.expr(e.Expr)
default:
// Should never happen
panic(fmt.Sprintf("unexpected expr type: %T", expr))
}
}
func (c *compiler) indexExpr(e *ast.IndexExpr) {
scope, index := c.arrayInfo(e.Array)
switch scope {
case resolver.Global:
c.add(ArrayGlobal, opcodeInt(index))
case resolver.Local:
c.add(ArrayLocal, opcodeInt(index))
}
}
// Compile an lvalue expression, but Dupe the index for applicable expr types
// so it can be used later for assignIndexExpr (without evaluating it again).
func (c *compiler) dupeIndexLValue(expr ast.Expr) {
switch e := expr.(type) {
case *ast.VarExpr:
c.expr(expr) // VarExpr has no index, so Dupe is not needed
case *ast.FieldExpr:
c.expr(e.Index)
c.add(Dupe)
c.add(Field)
case *ast.IndexExpr:
c.index(e.Index)
c.add(Dupe)
c.indexExpr(e)
}
}
// Generate a Concat opcode or, if possible, compact multiple Concats into one
// ConcatMulti opcode.
func (c *compiler) concatOp(expr *ast.BinaryExpr) {
@ -997,6 +1123,13 @@ func (c *compiler) binaryOp(op lexer.Token) {
// Generate an array index, handling multi-indexes properly.
func (c *compiler) index(index []ast.Expr) {
for _, expr := range index {
if e, ok := expr.(*ast.NumExpr); ok && e.Value == float64(int(e.Value)) {
// If index expression is integer constant, optimize to string "n"
// to avoid toString() at runtime.
s := strconv.Itoa(int(e.Value))
c.expr(&ast.StrExpr{Value: s})
continue
}
c.expr(expr)
}
if len(index) > 1 {

View File

@ -8,6 +8,7 @@ import (
"strings"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/resolver"
"github.com/benhoyt/goawk/lexer"
)
@ -200,12 +201,12 @@ func (d *disassembler) disassemble(prefix string) error {
d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex))
case Delete:
arrayScope := ast.VarScope(d.fetch())
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex))
case DeleteAll:
arrayScope := ast.VarScope(d.fetch())
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex))
@ -316,9 +317,9 @@ func (d *disassembler) disassemble(prefix string) error {
d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset))
case ForIn:
varScope := ast.VarScope(d.fetch())
varScope := resolver.Scope(d.fetch())
varIndex := int(d.fetch())
arrayScope := ast.VarScope(d.fetch())
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
offset := d.fetch()
d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset))
@ -327,13 +328,18 @@ func (d *disassembler) disassemble(prefix string) error {
builtinOp := BuiltinOp(d.fetch())
d.writeOpf("CallBuiltin %s", builtinOp)
case CallLengthArray:
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("CallLengthArray %s", d.arrayName(arrayScope, arrayIndex))
case CallSplit:
arrayScope := ast.VarScope(d.fetch())
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex))
case CallSplitSep:
arrayScope := ast.VarScope(d.fetch())
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex))
@ -346,7 +352,7 @@ func (d *disassembler) disassemble(prefix string) error {
numArrayArgs := int(d.fetch())
var arrayArgs []string
for i := 0; i < numArrayArgs; i++ {
arrayScope := ast.VarScope(d.fetch())
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex))
}
@ -404,7 +410,7 @@ func (d *disassembler) disassemble(prefix string) error {
case GetlineArray:
redirect := lexer.Token(d.fetch())
arrayScope := ast.VarScope(d.fetch())
arrayScope := resolver.Scope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex))
@ -443,13 +449,13 @@ func (d *disassembler) writeOpf(format string, args ...interface{}) {
}
// Return the scalar variable name described by scope and index.
func (d *disassembler) varName(scope ast.VarScope, index int) string {
func (d *disassembler) varName(scope resolver.Scope, index int) string {
switch scope {
case ast.ScopeGlobal:
case resolver.Global:
return d.program.scalarNames[index]
case ast.ScopeLocal:
case resolver.Local:
return d.localName(index)
default: // ScopeSpecial
default: // resolver.Special
return ast.SpecialVarName(index)
}
}
@ -471,8 +477,8 @@ func (d *disassembler) localName(index int) string {
}
// Return the array variable name describes by scope and index.
func (d *disassembler) arrayName(scope ast.VarScope, index int) string {
if scope == ast.ScopeLocal {
func (d *disassembler) arrayName(scope resolver.Scope, index int) string {
if scope == resolver.Local {
return d.localArrayName(index)
}
return d.program.arrayNames[index]

View File

@ -14,95 +14,99 @@ func _() {
_ = x[Dupe-3]
_ = x[Drop-4]
_ = x[Swap-5]
_ = x[Field-6]
_ = x[FieldInt-7]
_ = x[FieldByName-8]
_ = x[FieldByNameStr-9]
_ = x[Global-10]
_ = x[Local-11]
_ = x[Special-12]
_ = x[ArrayGlobal-13]
_ = x[ArrayLocal-14]
_ = x[InGlobal-15]
_ = x[InLocal-16]
_ = x[AssignField-17]
_ = x[AssignGlobal-18]
_ = x[AssignLocal-19]
_ = x[AssignSpecial-20]
_ = x[AssignArrayGlobal-21]
_ = x[AssignArrayLocal-22]
_ = x[Delete-23]
_ = x[DeleteAll-24]
_ = x[IncrField-25]
_ = x[IncrGlobal-26]
_ = x[IncrLocal-27]
_ = x[IncrSpecial-28]
_ = x[IncrArrayGlobal-29]
_ = x[IncrArrayLocal-30]
_ = x[AugAssignField-31]
_ = x[AugAssignGlobal-32]
_ = x[AugAssignLocal-33]
_ = x[AugAssignSpecial-34]
_ = x[AugAssignArrayGlobal-35]
_ = x[AugAssignArrayLocal-36]
_ = x[Regex-37]
_ = x[IndexMulti-38]
_ = x[ConcatMulti-39]
_ = x[Add-40]
_ = x[Subtract-41]
_ = x[Multiply-42]
_ = x[Divide-43]
_ = x[Power-44]
_ = x[Modulo-45]
_ = x[Equals-46]
_ = x[NotEquals-47]
_ = x[Less-48]
_ = x[Greater-49]
_ = x[LessOrEqual-50]
_ = x[GreaterOrEqual-51]
_ = x[Concat-52]
_ = x[Match-53]
_ = x[NotMatch-54]
_ = x[Not-55]
_ = x[UnaryMinus-56]
_ = x[UnaryPlus-57]
_ = x[Boolean-58]
_ = x[Jump-59]
_ = x[JumpFalse-60]
_ = x[JumpTrue-61]
_ = x[JumpEquals-62]
_ = x[JumpNotEquals-63]
_ = x[JumpLess-64]
_ = x[JumpGreater-65]
_ = x[JumpLessOrEqual-66]
_ = x[JumpGreaterOrEqual-67]
_ = x[Next-68]
_ = x[Exit-69]
_ = x[ForIn-70]
_ = x[BreakForIn-71]
_ = x[CallBuiltin-72]
_ = x[CallSplit-73]
_ = x[CallSplitSep-74]
_ = x[CallSprintf-75]
_ = x[CallUser-76]
_ = x[CallNative-77]
_ = x[Return-78]
_ = x[ReturnNull-79]
_ = x[Nulls-80]
_ = x[Print-81]
_ = x[Printf-82]
_ = x[Getline-83]
_ = x[GetlineField-84]
_ = x[GetlineGlobal-85]
_ = x[GetlineLocal-86]
_ = x[GetlineSpecial-87]
_ = x[GetlineArray-88]
_ = x[EndOpcode-89]
_ = x[Rote-6]
_ = x[Field-7]
_ = x[FieldInt-8]
_ = x[FieldByName-9]
_ = x[FieldByNameStr-10]
_ = x[Global-11]
_ = x[Local-12]
_ = x[Special-13]
_ = x[ArrayGlobal-14]
_ = x[ArrayLocal-15]
_ = x[InGlobal-16]
_ = x[InLocal-17]
_ = x[AssignField-18]
_ = x[AssignGlobal-19]
_ = x[AssignLocal-20]
_ = x[AssignSpecial-21]
_ = x[AssignArrayGlobal-22]
_ = x[AssignArrayLocal-23]
_ = x[Delete-24]
_ = x[DeleteAll-25]
_ = x[IncrField-26]
_ = x[IncrGlobal-27]
_ = x[IncrLocal-28]
_ = x[IncrSpecial-29]
_ = x[IncrArrayGlobal-30]
_ = x[IncrArrayLocal-31]
_ = x[AugAssignField-32]
_ = x[AugAssignGlobal-33]
_ = x[AugAssignLocal-34]
_ = x[AugAssignSpecial-35]
_ = x[AugAssignArrayGlobal-36]
_ = x[AugAssignArrayLocal-37]
_ = x[Regex-38]
_ = x[IndexMulti-39]
_ = x[ConcatMulti-40]
_ = x[Add-41]
_ = x[Subtract-42]
_ = x[Multiply-43]
_ = x[Divide-44]
_ = x[Power-45]
_ = x[Modulo-46]
_ = x[Equals-47]
_ = x[NotEquals-48]
_ = x[Less-49]
_ = x[Greater-50]
_ = x[LessOrEqual-51]
_ = x[GreaterOrEqual-52]
_ = x[Concat-53]
_ = x[Match-54]
_ = x[NotMatch-55]
_ = x[Not-56]
_ = x[UnaryMinus-57]
_ = x[UnaryPlus-58]
_ = x[Boolean-59]
_ = x[Jump-60]
_ = x[JumpFalse-61]
_ = x[JumpTrue-62]
_ = x[JumpEquals-63]
_ = x[JumpNotEquals-64]
_ = x[JumpLess-65]
_ = x[JumpGreater-66]
_ = x[JumpLessOrEqual-67]
_ = x[JumpGreaterOrEqual-68]
_ = x[Next-69]
_ = x[Nextfile-70]
_ = x[Exit-71]
_ = x[ExitStatus-72]
_ = x[ForIn-73]
_ = x[BreakForIn-74]
_ = x[CallBuiltin-75]
_ = x[CallLengthArray-76]
_ = x[CallSplit-77]
_ = x[CallSplitSep-78]
_ = x[CallSprintf-79]
_ = x[CallUser-80]
_ = x[CallNative-81]
_ = x[Return-82]
_ = x[ReturnNull-83]
_ = x[Nulls-84]
_ = x[Print-85]
_ = x[Printf-86]
_ = x[Getline-87]
_ = x[GetlineField-88]
_ = x[GetlineGlobal-89]
_ = x[GetlineLocal-90]
_ = x[GetlineSpecial-91]
_ = x[GetlineArray-92]
_ = x[EndOpcode-93]
}
const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode"
const _Opcode_name = "NopNumStrDupeDropSwapRoteFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextNextfileExitExitStatusForInBreakForInCallBuiltinCallLengthArrayCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode"
var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826}
var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 25, 30, 38, 49, 63, 69, 74, 81, 92, 102, 110, 117, 128, 140, 151, 164, 181, 197, 203, 212, 221, 231, 240, 251, 266, 280, 294, 309, 323, 339, 359, 378, 383, 393, 404, 407, 415, 423, 429, 434, 440, 446, 455, 459, 466, 477, 491, 497, 502, 510, 513, 523, 532, 539, 543, 552, 560, 570, 583, 591, 602, 617, 635, 639, 647, 651, 661, 666, 676, 687, 702, 711, 723, 734, 742, 752, 758, 768, 773, 778, 784, 791, 803, 816, 828, 842, 854, 863}
func (i Opcode) String() string {
if i < 0 || i >= Opcode(len(_Opcode_index)-1) {

View File

@ -1,6 +1,6 @@
package compiler
//go:generate go run golang.org/x/tools/cmd/[email protected].8 -type=Opcode,AugOp,BuiltinOp
//go:generate go run golang.org/x/tools/cmd/[email protected]0.0 -type=Opcode,AugOp,BuiltinOp
// Opcode represents a single virtual machine instruction (or argument). The
// comments beside each opcode show any arguments that instruction consumes.
@ -22,6 +22,7 @@ const (
Dupe
Drop
Swap
Rote
// Fetch a field, variable, or array item
Field
@ -107,15 +108,18 @@ const (
JumpLessOrEqual // offset
JumpGreaterOrEqual // offset
Next
Nextfile
Exit
ExitStatus
ForIn // varScope varIndex arrayScope arrayIndex offset
BreakForIn
// Builtin functions
CallBuiltin // builtinOp
CallSplit // arrayScope arrayIndex
CallSplitSep // arrayScope arrayIndex
CallSprintf // numArgs
CallBuiltin // builtinOp
CallLengthArray // arrayScope arrayIndex
CallSplit // arrayScope arrayIndex
CallSplitSep // arrayScope arrayIndex
CallSprintf // numArgs
// User and native functions
CallUser // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...]

View File

@ -0,0 +1,532 @@
// Package resolver assigns integer indexes to functions and variables, as
// well as determining and checking their types (scalar or array).
package resolver
import (
"fmt"
"io"
"reflect"
"sort"
"strings"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/lexer"
)
// ResolvedProgram is a parsed AWK program plus variable scope and type data
// prepared by the resolver that is needed for subsequent interpretation.
type ResolvedProgram struct {
ast.Program
resolver *resolver
}
// LookupVar looks up a (possibly-local) variable by function name and
// variable name, returning its scope, info, and whether it exists.
func (r *ResolvedProgram) LookupVar(funcName, name string) (Scope, VarInfo, bool) {
scope, info, _, exists := r.resolver.lookupVar(funcName, name)
return scope, info, exists
}
// IterVars iterates over the variables from the given function ("" to iterate
// globals), calling f for each variable.
func (r *ResolvedProgram) IterVars(funcName string, f func(name string, info VarInfo)) {
for name, info := range r.resolver.varInfo[funcName] {
f(name, info)
}
}
// LookupFunc looks up a function by name, returning its info and whether it
// exists.
func (r *ResolvedProgram) LookupFunc(name string) (FuncInfo, bool) {
info, ok := r.resolver.funcInfo[name]
return info, ok
}
// IterFuncs iterates over all the functions, including native (Go-defined)
// ones, calling f for each function.
func (r *ResolvedProgram) IterFuncs(f func(name string, info FuncInfo)) {
for name, info := range r.resolver.funcInfo {
f(name, info)
}
}
// VarInfo holds resolved information about a variable.
type VarInfo struct {
Type Type
Index int
}
// FuncInfo holds resolved information about a function.
type FuncInfo struct {
Native bool // true if function is a native (Go-defined) function
Index int
Params []string // list of parameter names
}
// Scope represents the scope of a variable.
type Scope int
const (
Local Scope = iota + 1 // locals (function parameters)
Special // special variables (such as NF)
Global // globals
)
func (s Scope) String() string {
switch s {
case Local:
return "local"
case Special:
return "special"
case Global:
return "global"
default:
return "unknown scope"
}
}
// Type represents the type of a variable: scalar or array.
type Type int
const (
unknown Type = iota
Scalar
Array
)
func (t Type) String() string {
switch t {
case Scalar:
return "scalar"
case Array:
return "array"
default:
return "unknown type"
}
}
// Config holds resolver configuration.
type Config struct {
// Enable printing of type information
DebugTypes bool
// io.Writer to print type information on (for example, os.Stderr)
DebugWriter io.Writer
// Map of named Go functions to allow calling from AWK. See docs
// on interp.Config.Funcs for details.
Funcs map[string]interface{}
}
// Resolve assigns integer indexes to functions and variables, as well as
// determining and checking their types (scalar or array).
func Resolve(prog *ast.Program, config *Config) *ResolvedProgram {
if config == nil {
config = &Config{}
}
// Assign indexes to native (Go-defined) functions, in order of name.
// Do this before our first pass, so that AWK-defined functions override
// Go-defined ones and take precedence.
funcInfo := make(map[string]FuncInfo)
var nativeNames []string
for name := range config.Funcs {
nativeNames = append(nativeNames, name)
}
sort.Strings(nativeNames)
for i, name := range nativeNames {
funcInfo[name] = FuncInfo{Native: true, Index: i}
}
// First pass determines call graph so we can process functions in
// topological order: e.g., if f() calls g(), process g first, then f.
callGraph := callGraphVisitor{
calls: make(map[string]map[string]struct{}),
funcs: make(map[string]*ast.Function),
funcInfo: funcInfo,
}
ast.Walk(&callGraph, prog)
orderedFuncs := topoSort(callGraph.calls)
// Ensure functions that weren't called are added to the orderedFuncs list
// (order of those doesn't matter, so add them at the end).
called := make(map[string]struct{}, len(orderedFuncs))
for _, name := range orderedFuncs {
called[name] = struct{}{}
}
for name := range callGraph.funcs {
if _, ok := called[name]; !ok {
orderedFuncs = append(orderedFuncs, name)
}
}
// Define the local variable names (we don't know their types yet).
varInfo := make(map[string]map[string]VarInfo)
for funcName, info := range funcInfo {
if info.Native {
continue
}
varInfo[funcName] = make(map[string]VarInfo)
for _, param := range info.Params {
varInfo[funcName][param] = VarInfo{}
}
}
// Create our type resolver.
r := resolver{varInfo: varInfo, funcInfo: funcInfo, funcs: callGraph.funcs}
r.varInfo[""] = make(map[string]VarInfo) // func of "" stores global vars
// Interpreter relies on ARGV and other built-in arrays being present.
r.recordVar("", "ARGV", Array, lexer.Position{1, 1})
r.recordVar("", "ENVIRON", Array, lexer.Position{1, 1})
r.recordVar("", "FIELDS", Array, lexer.Position{1, 1})
// Main resolver pass: determine types of variables and find function
// information. Can't call ast.Walk on prog directly, as it will not
// iterate through functions in topological (call graph) order.
main := mainVisitor{r: &r, nativeFuncs: config.Funcs}
updates := r.updates
main.walkOrdered(prog, orderedFuncs)
// Do additional passes while we're still making type updates. Topological
// sorting takes care of ordinary call graphs, but additional passes are
// needed for at least these two cases:
//
// 1. Functions which don't use their parameters, such as f1's A parameter
// in this example:
//
// function f1(A) {} function f2(x, A) { x[0]; f1(a); f2(a) }
//
// 2. For complex mutually-recursive functions, such as this example:
//
// function f1(a) { if (0) f5(z1); f2(a) }
// function f2(b) { if (0) f4(z2); f3(b) }
// function f3(c) { if (0) f3(z3); f4(c) }
// function f4(d) { if (0) f2(z4); f5(d) }
// function f5(i) { if (0) f1(z5); i[1]=42 }
// BEGIN { x[1]=3; f5(x); print x[1] }
//
// Limit it to a sensible maximum number of iterations that almost
// certainly won't happen in the real world.
for i := 0; r.updates != updates; i++ {
updates = r.updates
main.walkOrdered(prog, orderedFuncs)
if i >= 100 {
panic(ast.PosErrorf(lexer.Position{1, 1},
"too many iterations trying to resolve variable types"))
}
}
// For any variables that are still unknown, set their type to scalar.
// This can happen for unused variables, such as in the following:
// { f(z) } function f(x) { print NR }
for _, infos := range r.varInfo {
for varName, info := range infos {
if info.Type == unknown {
infos[varName] = VarInfo{Type: Scalar, Index: info.Index}
}
}
}
// Assign indexes to globals and locals (separate for scalars and arrays).
for funcName, infos := range r.varInfo {
var names []string
if funcName == "" {
// For global vars, order indexes by name.
for name := range infos {
names = append(names, name)
}
sort.Strings(names)
} else {
// For local vars, order indexes by parameter order.
names = r.funcInfo[funcName].Params
}
scalar := 0
array := 0
for _, name := range names {
info := infos[name]
if info.Type == Array {
infos[name] = VarInfo{Type: info.Type, Index: array}
array++
} else {
infos[name] = VarInfo{Type: info.Type, Index: scalar}
scalar++
}
}
}
if config.DebugTypes {
printVarTypes(config.DebugWriter, r.varInfo, r.funcInfo)
}
return &ResolvedProgram{
Program: *prog,
resolver: &r,
}
}
// Print variable type information (for debugging) on given writer.
func printVarTypes(w io.Writer, varInfo map[string]map[string]VarInfo, funcInfo map[string]FuncInfo) {
var funcNames []string
for funcName := range varInfo {
funcNames = append(funcNames, funcName)
}
sort.Strings(funcNames)
for _, funcName := range funcNames {
if funcName != "" {
info := funcInfo[funcName]
fmt.Fprintf(w, "function %s(%s) # index %d\n",
funcName, strings.Join(info.Params, ", "), info.Index)
} else {
fmt.Fprintln(w, "globals")
}
var varNames []string
for name := range varInfo[funcName] {
varNames = append(varNames, name)
}
sort.Strings(varNames)
for _, name := range varNames {
info := varInfo[funcName][name]
fmt.Fprintf(w, " %s: %s %d\n", name, info.Type, info.Index)
}
}
}
// resolver tracks variable scopes and types as well as function information.
type resolver struct {
varInfo map[string]map[string]VarInfo
funcInfo map[string]FuncInfo
funcs map[string]*ast.Function
updates int
}
// Look up variable from function funcName and return its scope and type
// information, the function it was defined in, and whether it exists.
func (r *resolver) lookupVar(funcName, varName string) (scope Scope, info VarInfo, varFunc string, exists bool) {
// If inside a function, try looking for a local variable first.
if funcName != "" {
if info, exists = r.varInfo[funcName][varName]; exists {
return Local, info, funcName, true
}
}
// Next try looking for a special variable (such as NR).
index := ast.SpecialVarIndex(varName)
if index > 0 {
// Special variables are all scalar (ARGV and similar are done as
// regular arrays).
return Special, VarInfo{Type: Scalar, Index: index}, "", true
}
// Then try looking for a global variable.
if info, exists = r.varInfo[""][varName]; exists {
return Global, info, "", true
}
return 0, VarInfo{}, "", false // not defined at all
}
// Record that the given variable (in function funcName) is of the given type.
func (r *resolver) recordVar(funcName, varName string, typ Type, pos lexer.Position) {
_, info, varFunc, exists := r.lookupVar(funcName, varName)
if !exists {
// Doesn't exist as a local or a global, add it as a new global.
r.varInfo[""][varName] = VarInfo{Type: typ}
r.updates++
if _, isFunc := r.funcs[varName]; isFunc {
panic(ast.PosErrorf(pos, "global var %q can't also be a function", varName))
}
return
}
if info.Type != typ && info.Type != unknown && typ != unknown {
panic(ast.PosErrorf(pos, "can't use %s %q as %s", info.Type, varName, typ))
}
if info.Type == unknown && typ != unknown {
r.varInfo[varFunc][varName] = VarInfo{Type: typ, Index: info.Index}
r.updates++
}
}
// callGraphVisitor records what functions are called by the current function
// to build our call graph.
type callGraphVisitor struct {
calls map[string]map[string]struct{} // map of current function to called function
funcs map[string]*ast.Function
funcInfo map[string]FuncInfo
curFunc string
}
func (v *callGraphVisitor) Visit(node ast.Node) ast.Visitor {
switch n := node.(type) {
case *ast.Function:
if _, ok := v.funcs[n.Name]; ok {
panic(ast.PosErrorf(n.Pos, "function %q already defined", n.Name))
}
v.funcInfo[n.Name] = FuncInfo{Index: len(v.funcs), Params: n.Params}
v.funcs[n.Name] = n
v.curFunc = n.Name
ast.WalkStmtList(v, n.Body)
v.curFunc = ""
case *ast.UserCallExpr:
if _, ok := v.calls[v.curFunc]; !ok {
v.calls[v.curFunc] = make(map[string]struct{})
}
v.calls[v.curFunc][n.Name] = struct{}{}
ast.WalkExprList(v, n.Args)
default:
return v
}
return nil
}
// mainVisitor records types of variables and performs various checks.
type mainVisitor struct {
r *resolver
nativeFuncs map[string]interface{}
curFunc string
}
// Walk prog's AST, with functions walked as ordered by orderedFuncs.
func (v *mainVisitor) walkOrdered(prog *ast.Program, orderedFuncs []string) {
for _, funcName := range orderedFuncs {
if funcName == "" {
continue // BEGIN, END, and actions are processed below
}
function, exists := v.r.funcs[funcName]
if !exists {
// Happens in the case where someone tries to call a local
// variable as a function: function f(x) { x() }. That is checked
// and flagged as an error in the visitor.
continue
}
v.curFunc = funcName
ast.WalkStmtList(v, function.Body)
v.curFunc = ""
}
for _, stmts := range prog.Begin {
ast.WalkStmtList(v, stmts)
}
for _, action := range prog.Actions {
ast.Walk(v, action)
}
for _, stmts := range prog.End {
ast.WalkStmtList(v, stmts)
}
}
func (v *mainVisitor) Visit(node ast.Node) ast.Visitor {
switch n := node.(type) {
case *ast.VarExpr:
v.r.recordVar(v.curFunc, n.Name, Scalar, n.Pos)
case *ast.ForInStmt:
v.r.recordVar(v.curFunc, n.Var, Scalar, n.VarPos)
v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos)
ast.WalkStmtList(v, n.Body)
case *ast.IndexExpr:
ast.WalkExprList(v, n.Index)
v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos)
case *ast.InExpr:
ast.WalkExprList(v, n.Index)
v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos)
case *ast.DeleteStmt:
v.r.recordVar(v.curFunc, n.Array, Array, n.ArrayPos)
ast.WalkExprList(v, n.Index)
case *ast.CallExpr:
switch n.Func {
case lexer.F_SPLIT:
ast.Walk(v, n.Args[0])
varExpr := n.Args[1].(*ast.VarExpr) // split()'s 2nd arg is always an array
v.r.recordVar(v.curFunc, varExpr.Name, Array, varExpr.Pos)
ast.WalkExprList(v, n.Args[2:])
case lexer.F_LENGTH:
if len(n.Args) > 0 {
if varExpr, ok := n.Args[0].(*ast.VarExpr); ok {
// In a call to length(x), x may be a scalar or an array,
// so set it to unknown for now.
v.r.recordVar(v.curFunc, varExpr.Name, unknown, varExpr.Pos)
return nil
}
}
ast.WalkExprList(v, n.Args)
default:
ast.WalkExprList(v, n.Args)
}
case *ast.UserCallExpr:
_, _, varFunc, exists := v.r.lookupVar(v.curFunc, n.Name)
if varFunc != "" && exists {
panic(ast.PosErrorf(n.Pos, "can't call local variable %q as function", n.Name))
}
funcInfo, exists := v.r.funcInfo[n.Name]
if !exists {
panic(ast.PosErrorf(n.Pos, "undefined function %q", n.Name))
}
numParams := len(funcInfo.Params)
if funcInfo.Native {
typ := reflect.TypeOf(v.nativeFuncs[n.Name])
numParams = typ.NumIn()
if typ.IsVariadic() {
numParams = 1000000000 // bigger than any reasonable len(n.Args) value!
}
}
if len(n.Args) > numParams {
panic(ast.PosErrorf(n.Pos, "%q called with more arguments than declared", n.Name))
}
for i, arg := range n.Args {
varExpr, ok := arg.(*ast.VarExpr)
if !ok {
// Argument is not a variable, process normally.
if !funcInfo.Native {
paramInfo := v.r.varInfo[n.Name][funcInfo.Params[i]] // type info of corresponding parameter
if paramInfo.Type == Array {
panic(ast.PosErrorf(n.Pos, "can't pass scalar %s as array param", arg))
}
}
ast.Walk(v, arg)
continue
}
if funcInfo.Native {
// Arguments to native function can only be scalar.
v.r.recordVar(v.curFunc, varExpr.Name, Scalar, varExpr.Pos)
continue
}
// Variable passed to AWK-defined function may be scalar or array,
// determine from how it was used elsewhere.
paramName := funcInfo.Params[i] // name of corresponding parameter
paramInfo := v.r.varInfo[n.Name][paramName] // type info of parameter
_, varInfo, _, _ := v.r.lookupVar(v.curFunc, varExpr.Name)
switch {
case varInfo.Type == unknown && paramInfo.Type != unknown:
// Variable's type is not known but param type is, set variable type.
v.r.recordVar(v.curFunc, varExpr.Name, paramInfo.Type, varExpr.Pos)
case varInfo.Type != unknown && paramInfo.Type == unknown:
// Variable's type is known but param type is not, set param type.
funcPos := v.r.funcs[n.Name].Pos // best position we have at this point
v.r.recordVar(n.Name, paramName, varInfo.Type, funcPos)
case varInfo.Type != paramInfo.Type && varInfo.Type != unknown && paramInfo.Type != unknown:
// Both types are known but don't match -- type error!
panic(ast.PosErrorf(varExpr.Pos, "can't pass %s %q as %s param",
varInfo.Type, varExpr.Name, paramInfo.Type))
default:
// Ensure variable references are recorded, even if the type
// is not yet known.
v.r.recordVar(v.curFunc, varExpr.Name, unknown, varExpr.Pos)
}
}
default:
return v
}
return nil
}

View File

@ -1,6 +1,6 @@
// Topological sorting
package parser
package resolver
/*
This algorithm is taken from:

View File

@ -3,6 +3,7 @@
package interp
import (
"bufio"
"bytes"
"errors"
"fmt"
@ -12,7 +13,7 @@ import (
"strings"
"unicode/utf8"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/resolver"
. "github.com/benhoyt/goawk/lexer"
)
@ -242,15 +243,34 @@ func validNativeType(typ reflect.Type) bool {
}
// Guts of the split() function
func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) {
func (p *interp) split(s string, scope resolver.Scope, index int, fs string, mode IOMode) (int, error) {
var parts []string
if fs == " " {
switch {
case mode == CSVMode || mode == TSVMode:
// Set up for parsing a CSV/TSV record
splitter := csvSplitter{
separator: p.csvInputConfig.Separator,
sepLen: utf8.RuneLen(p.csvInputConfig.Separator),
comment: p.csvInputConfig.Comment,
fields: &parts,
}
scanner := bufio.NewScanner(strings.NewReader(s))
scanner.Split(splitter.scan)
if p.splitBuffer == nil {
p.splitBuffer = make([]byte, inputBufSize)
}
scanner.Buffer(p.splitBuffer, maxRecordLength)
// Parse one record. Errors shouldn't happen, but if there is one,
// len(parts) will be 0.
scanner.Scan()
case fs == " ":
parts = strings.Fields(s)
} else if s == "" {
case s == "":
// Leave parts 0 length on empty string
} else if utf8.RuneCountInString(fs) <= 1 {
case utf8.RuneCountInString(fs) <= 1:
parts = strings.Split(s, fs)
} else {
default:
re, err := p.compileRegex(fs)
if err != nil {
return 0, err

View File

@ -17,11 +17,9 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"math"
"math/rand"
"os"
"os/exec"
"regexp"
"runtime"
"strconv"
@ -30,13 +28,15 @@ import (
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/compiler"
"github.com/benhoyt/goawk/internal/resolver"
"github.com/benhoyt/goawk/parser"
)
var (
errExit = errors.New("exit")
errBreak = errors.New("break")
errNext = errors.New("next")
errExit = errors.New("exit")
errBreak = errors.New("break")
errNext = errors.New("next")
errNextfile = errors.New("nextfile")
errCSVSeparator = errors.New("invalid CSV field separator or comment delimiter")
@ -79,25 +79,27 @@ type interp struct {
hadFiles bool
input io.Reader
inputBuffer []byte
inputStreams map[string]io.ReadCloser
outputStreams map[string]io.WriteCloser
commands map[string]*exec.Cmd
inputStreams map[string]inputStream
outputStreams map[string]outputStream
noExec bool
noFileWrites bool
noFileReads bool
shellCommand []string
csvOutput *bufio.Writer
noArgVars bool
splitBuffer []byte
// Scalars, arrays, and function state
globals []value
stack []value
sp int
frame []value
arrays []map[string]value
localArrays [][]int
callDepth int
nativeFuncs []nativeFunc
globals []value
stack []value
sp int
frame []value
arrays []map[string]value
localArrays [][]int
callDepth int
nativeFuncs []nativeFunc
scalarIndexes map[string]int
arrayIndexes map[string]int
// File, line, and field handling
filename value
@ -256,8 +258,8 @@ type Config struct {
// You can also enable CSV or TSV input mode by setting INPUTMODE to "csv"
// or "tsv" in Vars or in the BEGIN block (those override this setting).
//
// For further documentation about GoAWK's CSV support, see the full docs:
// https://github.com/benhoyt/goawk/blob/master/csv.md
// For further documentation about GoAWK's CSV support, see the full docs
// in "../docs/csv.md".
InputMode IOMode
// Additional options if InputMode is CSVMode or TSVMode. The zero value
@ -358,10 +360,19 @@ func newInterp(program *parser.Program) *interp {
}
// Allocate memory for variables and virtual machine stack
p.globals = make([]value, len(program.Scalars))
p.scalarIndexes = make(map[string]int)
p.arrayIndexes = make(map[string]int)
program.IterVars("", func(name string, info resolver.VarInfo) {
if info.Type == resolver.Array {
p.arrayIndexes[name] = info.Index
} else {
p.scalarIndexes[name] = info.Index
}
})
p.globals = make([]value, len(p.scalarIndexes))
p.stack = make([]value, initialStackSize)
p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize)
for i := 0; i < len(program.Arrays); i++ {
p.arrays = make([]map[string]value, len(p.arrayIndexes), len(p.arrayIndexes)+initialStackSize)
for i := 0; i < len(p.arrayIndexes); i++ {
p.arrays[i] = make(map[string]value)
}
@ -379,9 +390,8 @@ func newInterp(program *parser.Program) *interp {
p.outputRecordSep = "\n"
p.subscriptSep = "\x1c"
p.inputStreams = make(map[string]io.ReadCloser)
p.outputStreams = make(map[string]io.WriteCloser)
p.commands = make(map[string]*exec.Cmd)
p.inputStreams = make(map[string]inputStream)
p.outputStreams = make(map[string]outputStream)
p.scanners = make(map[string]*bufio.Scanner)
return p
@ -433,11 +443,11 @@ func (p *interp) setExecuteConfig(config *Config) error {
}
// Set up ARGV and other variables from config
argvIndex := p.program.Arrays["ARGV"]
p.setArrayValue(ast.ScopeGlobal, argvIndex, "0", str(config.Argv0))
argvIndex := p.arrayIndexes["ARGV"]
p.setArrayValue(resolver.Global, argvIndex, "0", str(config.Argv0))
p.argc = len(config.Args) + 1
for i, arg := range config.Args {
p.setArrayValue(ast.ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg))
p.setArrayValue(resolver.Global, argvIndex, strconv.Itoa(i+1), numStr(arg))
}
p.noArgVars = config.NoArgVars
p.filenameIndex = 1
@ -460,16 +470,16 @@ func (p *interp) setExecuteConfig(config *Config) error {
}
// Set up ENVIRON from config or environment variables
environIndex := p.program.Arrays["ENVIRON"]
environIndex := p.arrayIndexes["ENVIRON"]
if config.Environ != nil {
for i := 0; i < len(config.Environ); i += 2 {
p.setArrayValue(ast.ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1]))
p.setArrayValue(resolver.Global, environIndex, config.Environ[i], numStr(config.Environ[i+1]))
}
} else {
for _, kv := range os.Environ() {
eq := strings.IndexByte(kv, '=')
if eq >= 0 {
p.setArrayValue(ast.ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:]))
p.setArrayValue(resolver.Global, environIndex, kv[:eq], numStr(kv[eq+1:]))
}
}
}
@ -548,7 +558,7 @@ func (p *interp) executeAll() (int, error) {
}
return 0, err
}
if p.program.Actions == nil && p.program.End == nil {
if len(p.program.Compiled.Actions) == 0 && len(p.program.Compiled.End) == 0 {
return p.exitStatus, nil // only BEGIN specified, don't process input
}
if err != errExit {
@ -588,7 +598,7 @@ func Exec(source, fieldSep string, input io.Reader, output io.Writer) error {
config := &Config{
Stdin: input,
Output: output,
Error: ioutil.Discard,
Error: io.Discard,
Vars: []string{"FS", fieldSep},
}
_, err = ExecProgram(prog, config)
@ -662,11 +672,15 @@ lineLoop:
// Execute the body statements
err := p.execute(action.Body)
if err == errNext {
switch {
case err == errNext:
// "next" statement skips straight to next line
continue lineLoop
}
if err != nil {
case err == errNextfile:
// Tell nextLine to move on to next file
p.scanner = nil
continue lineLoop
case err != nil:
return err
}
}
@ -723,7 +737,7 @@ func (p *interp) setVarByName(name, value string) error {
if index > 0 {
return p.setSpecial(index, numStr(value))
}
index, ok := p.program.Scalars[name]
index, ok := p.scalarIndexes[name]
if ok {
p.globals[index] = numStr(value)
return nil
@ -764,7 +778,11 @@ func (p *interp) setSpecial(index int, v value) error {
case ast.V_FNR:
p.fileLineNum = int(v.num())
case ast.V_ARGC:
p.argc = int(v.num())
argc := int(v.num())
if argc > maxFieldIndex {
return newError("ARGC set too large: %d", argc)
}
p.argc = argc
case ast.V_CONVFMT:
p.convertFormat = p.toString(v)
case ast.V_FILENAME:
@ -833,8 +851,8 @@ func (p *interp) setSpecial(index int, v value) error {
// Determine the index of given array into the p.arrays slice. Global
// arrays are just at p.arrays[index], local arrays have to be looked
// up indirectly.
func (p *interp) arrayIndex(scope ast.VarScope, index int) int {
if scope == ast.ScopeGlobal {
func (p *interp) arrayIndex(scope resolver.Scope, index int) int {
if scope == resolver.Global {
return index
} else {
return p.localArrays[len(p.localArrays)-1][index]
@ -842,7 +860,7 @@ func (p *interp) arrayIndex(scope ast.VarScope, index int) int {
}
// Return array with given scope and index.
func (p *interp) array(scope ast.VarScope, index int) map[string]value {
func (p *interp) array(scope resolver.Scope, index int) map[string]value {
return p.arrays[p.arrayIndex(scope, index)]
}
@ -852,7 +870,7 @@ func (p *interp) localArray(index int) map[string]value {
}
// Set a value in given array by key (index)
func (p *interp) setArrayValue(scope ast.VarScope, arrayIndex int, index string, v value) {
func (p *interp) setArrayValue(scope resolver.Scope, arrayIndex int, index string, v value) {
array := p.array(scope, arrayIndex)
array[index] = v
}

View File

@ -8,7 +8,6 @@ import (
"encoding/csv"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"regexp"
@ -17,7 +16,7 @@ import (
"strings"
"unicode/utf8"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/resolver"
. "github.com/benhoyt/goawk/lexer"
)
@ -97,26 +96,6 @@ func (p *interp) writeCSV(output io.Writer, fields []string) error {
return nil
}
// Implement a buffered version of WriteCloser so output is buffered
// when redirecting to a file (eg: print >"out")
type bufferedWriteCloser struct {
*bufio.Writer
io.Closer
}
func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser {
writer := bufio.NewWriterSize(w, outputBufSize)
return &bufferedWriteCloser{writer, w}
}
func (wc *bufferedWriteCloser) Close() error {
err := wc.Writer.Flush()
if err != nil {
return err
}
return wc.Closer.Close()
}
// Determine the output stream for given redirect token and
// destination (file or pipe name)
func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) {
@ -145,13 +124,13 @@ func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, er
} else {
flags |= os.O_APPEND
}
w, err := os.OpenFile(name, flags, 0644)
f, err := os.OpenFile(name, flags, 0644)
if err != nil {
return nil, newError("output redirection error: %s", err)
}
buffered := newBufferedWriteCloser(w)
p.outputStreams[name] = buffered
return buffered, nil
out := newOutFileStream(f, outputBufSize)
p.outputStreams[name] = out
return out, nil
case PIPE:
// Pipe to command
@ -159,22 +138,16 @@ func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, er
return nil, newError("can't write to pipe due to NoExec")
}
cmd := p.execShell(name)
w, err := cmd.StdinPipe()
if err != nil {
return nil, newError("error connecting to stdin pipe: %v", err)
}
cmd.Stdout = p.output
cmd.Stderr = p.errorOutput
p.flushOutputAndError() // ensure synchronization
err = cmd.Start()
out, err := newOutCmdStream(cmd)
if err != nil {
p.printErrorf("%s\n", err)
return ioutil.Discard, nil
out = newOutNullStream()
}
p.commands[name] = cmd
buffered := newBufferedWriteCloser(w)
p.outputStreams[name] = buffered
return buffered, nil
p.outputStreams[name] = out
return out, nil
default:
// Should never happen
@ -214,13 +187,14 @@ func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
if p.noFileReads {
return nil, newError("can't read from file due to NoFileReads")
}
r, err := os.Open(name)
f, err := os.Open(name)
if err != nil {
return nil, err // *os.PathError is handled by caller (getline returns -1)
}
scanner := p.newScanner(r, make([]byte, inputBufSize))
in := newInFileStream(f)
scanner := p.newScanner(in, make([]byte, inputBufSize))
p.scanners[name] = scanner
p.inputStreams[name] = r
p.inputStreams[name] = in
return scanner, nil
}
@ -238,19 +212,15 @@ func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) {
cmd := p.execShell(name)
cmd.Stdin = p.stdin
cmd.Stderr = p.errorOutput
r, err := cmd.StdoutPipe()
if err != nil {
return nil, newError("error connecting to stdout pipe: %v", err)
}
p.flushOutputAndError() // ensure synchronization
err = cmd.Start()
in, err := newInCmdStream(cmd)
if err != nil {
p.printErrorf("%s\n", err)
return bufio.NewScanner(strings.NewReader("")), nil
}
scanner := p.newScanner(r, make([]byte, inputBufSize))
p.commands[name] = cmd
p.inputStreams[name] = r
scanner := p.newScanner(in, make([]byte, inputBufSize))
p.inputStreams[name] = in
p.scanners[name] = scanner
return scanner, nil
}
@ -294,7 +264,7 @@ func (p *interp) setFieldNames(names []string) {
p.fieldIndexes = nil // clear name-to-index cache
// Populate FIELDS array (mapping of field indexes to field names).
fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"])
fieldsArray := p.array(resolver.Global, p.arrayIndexes["FIELDS"])
for k := range fieldsArray {
delete(fieldsArray, k)
}
@ -648,6 +618,24 @@ func (p *interp) setLine(line string, isTrueStr bool) {
p.reparseCSV = true
}
// Splits on FS as a regex, appending each field to fields and returning the
// new slice (for efficiency).
func (p *interp) splitOnFieldSepRegex(fields []string, line string) []string {
indices := p.fieldSepRegex.FindAllStringIndex(line, -1)
prevIndex := 0
for _, match := range indices {
start, end := match[0], match[1]
// skip empty matches (https://www.austingroupbugs.net/view.php?id=1468)
if start == end {
continue
}
fields = append(fields, line[prevIndex:start])
prevIndex = end
}
fields = append(fields, line[prevIndex:])
return fields
}
// Ensure that the current line is parsed into fields, splitting it
// into fields if it hasn't been already
func (p *interp) ensureFields() {
@ -684,7 +672,7 @@ func (p *interp) ensureFields() {
p.fields = strings.Split(p.line, p.fieldSep)
default:
// Split on FS as a regex
p.fields = p.fieldSepRegex.Split(p.line, -1)
p.fields = p.splitOnFieldSepRegex(p.fields[:0], p.line)
}
// Special case for when RS=="" and FS is single character,
@ -732,8 +720,8 @@ func (p *interp) nextLine() (string, error) {
// getArrayValue() here as it would set the value if
// not present
index := strconv.Itoa(p.filenameIndex)
argvIndex := p.program.Arrays["ARGV"]
argvArray := p.array(ast.ScopeGlobal, argvIndex)
argvIndex := p.arrayIndexes["ARGV"]
argvArray := p.array(resolver.Global, argvIndex)
filename := p.toString(argvArray[index])
p.filenameIndex++
@ -814,7 +802,7 @@ func writeOutput(w io.Writer, s string) error {
return err
}
// Close all streams, commands, and so on (after program execution).
// Close all streams and so on (after program execution).
func (p *interp) closeAll() {
if prevInput, ok := p.input.(io.Closer); ok {
_ = prevInput.Close()
@ -825,9 +813,6 @@ func (p *interp) closeAll() {
for _, w := range p.outputStreams {
_ = w.Close()
}
for _, cmd := range p.commands {
_ = cmd.Wait()
}
if f, ok := p.output.(flusher); ok {
_ = f.Flush()
}
@ -841,11 +826,12 @@ func (p *interp) closeAll() {
func (p *interp) flushAll() bool {
allGood := true
for name, writer := range p.outputStreams {
allGood = allGood && p.flushWriter(name, writer)
if !p.flushWriter(name, writer) {
allGood = false
}
}
if _, ok := p.output.(flusher); ok {
// User-provided output may or may not be flushable
allGood = allGood && p.flushWriter("stdout", p.output)
if !p.flushWriter("stdout", p.output) {
allGood = false
}
return allGood
}

224
play/vendor/github.com/benhoyt/goawk/interp/iostream.go generated vendored Normal file
View File

@ -0,0 +1,224 @@
package interp
// I/O streams are interfaces which allow file redirects and command pipelines to be treated
// equivalently.
import (
"bufio"
"errors"
"io"
"os/exec"
"syscall"
)
const (
notClosedExitCode = -127
)
var (
doubleCloseError = errors.New("close: stream already closed")
)
// firstError returns the first non-nil error or nil if all errors are nil.
func firstError(errs ...error) error {
for _, err := range errs {
if err != nil {
return err
}
}
return nil
}
// Close the cmd and convert the error result into the result returned from goawk builtin functions.
// A nil error is returned if that error describes a non-zero exit status or an unhandled signal.
// Any other type of error returns -1 and err.
//
// The result mimicks gawk for expected child process errors:
// 1. Returns the exit status of the child process and nil error on normal process exit.
// 2. Returns 256 + signal on unhandled signal exit.
// 3. Returns 512 + signal on unhandled signal exit which caused a core dump.
func waitExitCode(cmd *exec.Cmd) (int, error) {
err := cmd.Wait()
if err == nil {
return 0, nil
}
ee, ok := err.(*exec.ExitError)
if !ok {
// Wait() returned an io error.
return -1, err
}
status, ok := ee.ProcessState.Sys().(syscall.WaitStatus)
if !ok {
// Maybe not all platforms support WaitStatus?
return -1, err
}
switch {
case status.CoreDump():
return 512 + int(status.Signal()), nil
case status.Signaled():
return 256 + int(status.Signal()), nil
case status.Exited():
return status.ExitStatus(), nil
default:
return -1, err
}
}
type inputStream interface {
io.ReadCloser
ExitCode() int
}
type outputStream interface {
io.WriteCloser
Flush() error
ExitCode() int
}
type outFileStream struct {
*bufio.Writer
closer io.Closer
exitCode int
closed bool
}
func newOutFileStream(wc io.WriteCloser, size int) outputStream {
b := bufio.NewWriterSize(wc, size)
return &outFileStream{b, wc, notClosedExitCode, false}
}
func (s *outFileStream) Close() error {
if s.closed {
return doubleCloseError
}
s.closed = true
flushErr := s.Writer.Flush()
closeErr := s.closer.Close()
if err := firstError(flushErr, closeErr); err != nil {
s.exitCode = -1
return err
}
s.exitCode = 0
return nil
}
func (s *outFileStream) ExitCode() int {
return s.exitCode
}
type outCmdStream struct {
*bufio.Writer
closer io.Closer
cmd *exec.Cmd
exitCode int
closed bool
}
func newOutCmdStream(cmd *exec.Cmd) (outputStream, error) {
w, err := cmd.StdinPipe()
if err != nil {
return nil, newError("error connecting to stdin pipe: %v", err)
}
err = cmd.Start()
if err != nil {
w.Close()
return nil, err
}
out := &outCmdStream{bufio.NewWriterSize(w, outputBufSize), w, cmd, notClosedExitCode, false}
return out, nil
}
func (s *outCmdStream) Close() error {
if s.closed {
return doubleCloseError
}
s.closed = true
flushErr := s.Writer.Flush()
closeErr := s.closer.Close()
var waitErr error
s.exitCode, waitErr = waitExitCode(s.cmd)
return firstError(waitErr, flushErr, closeErr)
}
func (s *outCmdStream) ExitCode() int {
return s.exitCode
}
// An outNullStream allows writes to not do anything while fulfilling the outputStream interface.
type outNullStream struct {
io.Writer
closed bool
}
func newOutNullStream() outputStream { return &outNullStream{io.Discard, false} }
func (s outNullStream) Flush() error { return nil }
func (s *outNullStream) Close() error {
if s.closed {
return doubleCloseError
}
s.closed = true
return nil
}
func (s outNullStream) ExitCode() int { return -1 }
type inFileStream struct {
io.ReadCloser
exitCode int
closed bool
}
func newInFileStream(rc io.ReadCloser) inputStream {
return &inFileStream{rc, notClosedExitCode, false}
}
func (s *inFileStream) Close() error {
if s.closed {
return doubleCloseError
}
s.closed = true
if err := s.ReadCloser.Close(); err != nil {
s.exitCode = -1
return err
}
s.exitCode = 0
return nil
}
func (s *inFileStream) ExitCode() int {
return s.exitCode
}
type inCmdStream struct {
io.ReadCloser
cmd *exec.Cmd
exitCode int
closed bool
}
func newInCmdStream(cmd *exec.Cmd) (inputStream, error) {
r, err := cmd.StdoutPipe()
if err != nil {
return nil, newError("error connecting to stdout pipe: %v", err)
}
err = cmd.Start()
if err != nil {
r.Close()
return nil, err
}
return &inCmdStream{r, cmd, notClosedExitCode, false}, nil
}
func (s *inCmdStream) Close() error {
if s.closed {
return doubleCloseError
}
s.closed = true
closeErr := s.ReadCloser.Close()
var waitErr error
s.exitCode, waitErr = waitExitCode(s.cmd)
return firstError(waitErr, closeErr)
}
func (s *inCmdStream) ExitCode() int {
return s.exitCode
}

View File

@ -6,6 +6,7 @@ import (
"context"
"math"
"github.com/benhoyt/goawk/internal/resolver"
"github.com/benhoyt/goawk/parser"
)
@ -60,6 +61,29 @@ func (p *Interpreter) Execute(config *Config) (int, error) {
return p.interp.executeAll()
}
// Array returns a map representing the items in the named AWK array. AWK
// numbers are included as type float64, strings (including "numeric strings")
// are included as type string. If the named array does not exist, return nil.
func (p *Interpreter) Array(name string) map[string]interface{} {
index, exists := p.interp.arrayIndexes[name]
if !exists {
return nil
}
array := p.interp.array(resolver.Global, index)
result := make(map[string]interface{}, len(array))
for k, v := range array {
switch v.typ {
case typeNum:
result[k] = v.n
case typeStr, typeNumStr:
result[k] = v.s
default:
result[k] = ""
}
}
return result
}
func (p *interp) resetCore() {
p.scanner = nil
for k := range p.scanners {
@ -72,9 +96,6 @@ func (p *interp) resetCore() {
for k := range p.outputStreams {
delete(p.outputStreams, k)
}
for k := range p.commands {
delete(p.commands, k)
}
p.sp = 0
p.localArrays = p.localArrays[:0]

View File

@ -6,12 +6,11 @@ import (
"io"
"math"
"os"
"os/exec"
"strings"
"time"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/compiler"
"github.com/benhoyt/goawk/internal/resolver"
"github.com/benhoyt/goawk/lexer"
)
@ -61,6 +60,11 @@ func (p *interp) execute(code []compiler.Opcode) error {
l, r := p.peekTwo()
p.replaceTwo(r, l)
case compiler.Rote:
s := p.peekSlice(3)
v0, v1, v2 := s[0], s[1], s[2]
s[0], s[1], s[2] = v1, v2, v0
case compiler.Field:
index := p.peekTop()
v := p.getField(int(index.num()))
@ -180,7 +184,7 @@ func (p *interp) execute(code []compiler.Opcode) error {
arrayScope := code[ip]
arrayIndex := code[ip+1]
ip += 2
array := p.array(ast.VarScope(arrayScope), int(arrayIndex))
array := p.array(resolver.Scope(arrayScope), int(arrayIndex))
index := p.toString(p.pop())
delete(array, index)
@ -188,7 +192,7 @@ func (p *interp) execute(code []compiler.Opcode) error {
arrayScope := code[ip]
arrayIndex := code[ip+1]
ip += 2
array := p.array(ast.VarScope(arrayScope), int(arrayIndex))
array := p.array(resolver.Scope(arrayScope), int(arrayIndex))
for k := range array {
delete(array, k)
}
@ -587,11 +591,17 @@ func (p *interp) execute(code []compiler.Opcode) error {
case compiler.Next:
return errNext
case compiler.Nextfile:
return errNextfile
case compiler.Exit:
p.exitStatus = int(p.pop().num())
// Return special errExit value "caught" by top-level executor
return errExit
case compiler.ExitStatus:
p.exitStatus = int(p.pop().num())
return errExit
case compiler.ForIn:
varScope := code[ip]
varIndex := code[ip+1]
@ -599,15 +609,15 @@ func (p *interp) execute(code []compiler.Opcode) error {
arrayIndex := code[ip+3]
offset := code[ip+4]
ip += 5
array := p.array(ast.VarScope(arrayScope), int(arrayIndex))
array := p.array(resolver.Scope(arrayScope), int(arrayIndex))
loopCode := code[ip : ip+int(offset)]
for index := range array {
switch ast.VarScope(varScope) {
case ast.ScopeGlobal:
switch resolver.Scope(varScope) {
case resolver.Global:
p.globals[varIndex] = str(index)
case ast.ScopeLocal:
case resolver.Local:
p.frame[varIndex] = str(index)
default: // ScopeSpecial
default: // resolver.Special
err := p.setSpecial(int(varIndex), str(index))
if err != nil {
return err
@ -634,12 +644,19 @@ func (p *interp) execute(code []compiler.Opcode) error {
return err
}
case compiler.CallLengthArray:
arrayScope := code[ip]
arrayIndex := code[ip+1]
ip += 2
array := p.array(resolver.Scope(arrayScope), int(arrayIndex))
p.push(num(float64(len(array))))
case compiler.CallSplit:
arrayScope := code[ip]
arrayIndex := code[ip+1]
ip += 2
s := p.toString(p.peekTop())
n, err := p.split(s, ast.VarScope(arrayScope), int(arrayIndex), p.fieldSep)
n, err := p.split(s, resolver.Scope(arrayScope), int(arrayIndex), p.fieldSep, p.inputMode)
if err != nil {
return err
}
@ -650,7 +667,8 @@ func (p *interp) execute(code []compiler.Opcode) error {
arrayIndex := code[ip+1]
ip += 2
s, fieldSep := p.peekPop()
n, err := p.split(p.toString(s), ast.VarScope(arrayScope), int(arrayIndex), p.toString(fieldSep))
// 3-argument form of split() ignores input mode
n, err := p.split(p.toString(s), resolver.Scope(arrayScope), int(arrayIndex), p.toString(fieldSep), DefaultMode)
if err != nil {
return err
}
@ -683,7 +701,7 @@ func (p *interp) execute(code []compiler.Opcode) error {
// Handle array arguments
var arrays []int
for j := 0; j < numArrayArgs; j++ {
arrayScope := ast.VarScope(code[ip])
arrayScope := resolver.Scope(code[ip])
arrayIndex := int(code[ip+1])
ip += 2
arrays = append(arrays, p.arrayIndex(arrayScope, arrayIndex))
@ -880,7 +898,7 @@ func (p *interp) execute(code []compiler.Opcode) error {
}
index := p.toString(p.peekTop())
if ret == 1 {
array := p.array(ast.VarScope(arrayScope), int(arrayIndex))
array := p.array(resolver.Scope(arrayScope), int(arrayIndex))
array[index] = numStr(line)
}
p.replaceTop(num(ret))
@ -897,33 +915,25 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error {
p.replaceTop(num(math.Atan2(y.num(), x.num())))
case compiler.BuiltinClose:
var err error
code := -1
name := p.toString(p.peekTop())
var c io.Closer = p.inputStreams[name]
if c != nil {
if stream := p.inputStreams[name]; stream != nil {
// Close input stream
delete(p.inputStreams, name)
err := c.Close()
if err != nil {
p.replaceTop(num(-1))
} else {
p.replaceTop(num(0))
}
} else {
c = p.outputStreams[name]
if c != nil {
// Close output stream
delete(p.outputStreams, name)
err := c.Close()
if err != nil {
p.replaceTop(num(-1))
} else {
p.replaceTop(num(0))
}
} else {
// Nothing to close
p.replaceTop(num(-1))
}
delete(p.scanners, name)
err = stream.Close()
code = stream.ExitCode()
} else if stream := p.outputStreams[name]; stream != nil {
// Close output stream
delete(p.outputStreams, name)
err = stream.Close()
code = stream.ExitCode()
}
if err != nil {
p.printErrorf("error closing %q: %v\n", name, err)
}
p.replaceTop(num(float64(code)))
case compiler.BuiltinCos:
p.replaceTop(num(math.Cos(p.peekTop().num())))
@ -1071,20 +1081,21 @@ func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error {
cmd.Stdout = p.output
cmd.Stderr = p.errorOutput
_ = p.flushAll() // ensure synchronization
err := cmd.Run()
ret := 0.0
err := cmd.Start()
if err != nil {
// Could not start the shell so skip waiting on it.
p.printErrorf("%v\n", err)
p.replaceTop(num(-1.0))
return nil
}
exitCode, err := waitExitCode(cmd)
if err != nil {
if p.checkCtx && p.ctx.Err() != nil {
return p.ctx.Err()
}
if exitErr, ok := err.(*exec.ExitError); ok {
ret = float64(exitErr.ProcessState.ExitCode())
} else {
p.printErrorf("%v\n", err)
ret = -1
}
p.printErrorf("%v\n", err)
}
p.replaceTop(num(ret))
p.replaceTop(num(float64(exitCode)))
case compiler.BuiltinTolower:
p.replaceTop(str(strings.ToLower(p.toString(p.peekTop()))))

View File

@ -9,6 +9,8 @@ package lexer
import (
"errors"
"fmt"
"unicode/utf8"
)
// Lexer tokenizes a byte string of AWK source code. Use NewLexer to
@ -32,6 +34,11 @@ type Position struct {
Column int
}
// String returns the position in "line:col" format.
func (p Position) String() string {
return fmt.Sprintf("%d:%d", p.Line, p.Column)
}
// NewLexer creates a new lexer that will tokenize the given source
// code. See the module-level example for a working example.
func NewLexer(src []byte) *Lexer {
@ -461,7 +468,7 @@ func parseString(quote byte, ch func() byte, next func()) (string, error) {
c = '\v'
next()
case 'x':
// Hex byte of one of two hex digits
// Hex byte of one or two hex digits
next()
digit := hexDigit(ch())
if digit < 0 {
@ -474,6 +481,29 @@ func parseString(quote byte, ch func() byte, next func()) (string, error) {
c = c*16 + byte(digit)
next()
}
case 'u':
// Hex Unicode character of 1-8 digits
next()
r := hexDigit(ch())
if r < 0 {
return "", errors.New("1-8 hex digits expected")
}
next()
for i := 0; i < 7; i++ {
digit := hexDigit(ch())
if digit < 0 {
break
}
next()
r = r*16 + digit
}
if !utf8.ValidRune(rune(r)) {
return "", errors.New("invalid Unicode character")
}
runeBytes := make([]byte, utf8.UTFMax)
n := utf8.EncodeRune(runeBytes, rune(r))
chars = append(chars, runeBytes[:n]...)
continue
case '0', '1', '2', '3', '4', '5', '6', '7':
// Octal byte of 1-3 octal digits
c = ch() - '0'

View File

@ -70,6 +70,7 @@ const (
IF
IN
NEXT
NEXTFILE
PRINT
PRINTF
RETURN
@ -127,6 +128,7 @@ var keywordTokens = map[string]Token{
"if": IF,
"in": IN,
"next": NEXT,
"nextfile": NEXTFILE,
"print": PRINT,
"printf": PRINTF,
"return": RETURN,
@ -223,6 +225,7 @@ var tokenNames = map[Token]string{
IF: "if",
IN: "in",
NEXT: "next",
NEXTFILE: "nextfile",
PRINT: "print",
PRINTF: "printf",
RETURN: "return",

View File

@ -13,6 +13,7 @@ import (
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/compiler"
"github.com/benhoyt/goawk/internal/resolver"
. "github.com/benhoyt/goawk/lexer"
)
@ -45,35 +46,49 @@ type ParserConfig struct {
Funcs map[string]interface{}
}
func (c *ParserConfig) toResolverConfig() *resolver.Config {
if c == nil {
return nil
}
return &resolver.Config{
DebugTypes: c.DebugTypes,
DebugWriter: c.DebugWriter,
Funcs: c.Funcs,
}
}
// ParseProgram parses an entire AWK program, returning the *Program
// abstract syntax tree or a *ParseError on error. "config" describes
// the parser configuration (and is allowed to be nil).
func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) {
defer func() {
// The parser uses panic with a *ParseError to signal parsing
// errors internally, and they're caught here. This
// significantly simplifies the recursive descent calls as
// we don't have to check errors everywhere.
// The parser and resolver use panic with an *ast.PositionError to signal parsing
// errors internally, and they're caught here. This significantly simplifies
// the recursive descent calls as we don't have to check errors everywhere.
if r := recover(); r != nil {
// Convert to ParseError or re-panic
err = r.(*ParseError)
// Convert to PositionError or re-panic
posError := *r.(*ast.PositionError)
err = &ParseError{
Position: posError.Position,
Message: posError.Message,
}
}
}()
lexer := NewLexer(src)
p := parser{lexer: lexer}
if config != nil {
p.debugTypes = config.DebugTypes
p.debugWriter = config.DebugWriter
p.nativeFuncs = config.Funcs
}
p.initResolve()
p.multiExprs = make(map[*ast.MultiExpr]Position, 3)
p.next() // initialize p.tok
// Parse into abstract syntax tree
prog = p.program()
astProg := p.program()
// Resolve variable scopes and types
prog = &Program{}
prog.ResolvedProgram = *resolver.Resolve(astProg, config.toResolverConfig())
// Compile to virtual machine code
prog.Compiled, err = compiler.Compile(prog.toAST())
prog.Compiled, err = compiler.Compile(&prog.ResolvedProgram)
return prog, err
}
@ -83,19 +98,14 @@ type Program struct {
// but are exported for the interpreter (Program itself needs to
// be exported in package "parser", otherwise these could live in
// "internal/ast".)
Begin []ast.Stmts
Actions []ast.Action
End []ast.Stmts
Functions []ast.Function
Scalars map[string]int
Arrays map[string]int
Compiled *compiler.Program
resolver.ResolvedProgram
Compiled *compiler.Program
}
// String returns an indented, pretty-printed version of the parsed
// program.
func (p *Program) String() string {
return p.toAST().String()
return p.ResolvedProgram.Program.String()
}
// Disassemble writes a human-readable form of the program's virtual machine
@ -104,18 +114,6 @@ func (p *Program) Disassemble(writer io.Writer) error {
return p.Compiled.Disassemble(writer)
}
// toAST converts the *Program to an *ast.Program.
func (p *Program) toAST() *ast.Program {
return &ast.Program{
Begin: p.Begin,
Actions: p.Actions,
End: p.End,
Functions: p.Functions,
Scalars: p.Scalars,
Arrays: p.Arrays,
}
}
// Parser state
type parser struct {
// Lexer instance and current token values
@ -131,28 +129,36 @@ type parser struct {
loopDepth int // current loop depth (0 if not in any loops)
// Variable tracking and resolving
locals map[string]bool // current function's locals (for determining scope)
varTypes map[string]map[string]typeInfo // map of func name to var name to type
varRefs []varRef // all variable references (usually scalars)
arrayRefs []arrayRef // all array references
multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions
// Function tracking
functions map[string]int // map of function name to index
userCalls []userCall // record calls so we can resolve them later
nativeFuncs map[string]interface{}
// Configuration and debugging
debugTypes bool // show variable types for debugging
debugWriter io.Writer // where the debug output goes
multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions
}
// Parse an entire AWK program.
func (p *parser) program() *Program {
prog := &Program{}
p.optionalNewlines()
func (p *parser) program() *ast.Program {
prog := &ast.Program{}
// Terminator "(SEMICOLON|NEWLINE) NEWLINE*" is required after each item
// with two exceptions where it is optional:
//
// 1. after the last item, or
// 2. when the previous item ended with a closing brace.
//
// NOTE: The second exception does not seem to be correct according to
// the POSIX grammar definition, but it is the common behaviour for the
// major AWK implementations.
needsTerminator := false
for p.tok != EOF {
if needsTerminator {
if !p.matches(NEWLINE, SEMICOLON) {
panic(p.errorf("expected ; or newline between items"))
}
p.next()
needsTerminator = false
}
p.optionalNewlines()
switch p.tok {
case EOF:
break
case BEGIN:
p.next()
prog.Begin = append(prog.Begin, p.stmtsBrace())
@ -161,7 +167,6 @@ func (p *parser) program() *Program {
prog.End = append(prog.End, p.stmtsBrace())
case FUNCTION:
function := p.function()
p.addFunction(function.Name, len(prog.Functions))
prog.Functions = append(prog.Functions, function)
default:
p.inAction = true
@ -170,23 +175,22 @@ func (p *parser) program() *Program {
if !p.matches(LBRACE, EOF) {
pattern = append(pattern, p.expr())
}
if !p.matches(LBRACE, EOF, NEWLINE) {
if !p.matches(LBRACE, EOF, NEWLINE, SEMICOLON) {
p.commaNewlines()
pattern = append(pattern, p.expr())
}
// Or an empty action (equivalent to { print $0 })
action := ast.Action{pattern, nil}
action := &ast.Action{pattern, nil}
if p.tok == LBRACE {
action.Stmts = p.stmtsBrace()
} else {
needsTerminator = true
}
prog.Actions = append(prog.Actions, action)
p.inAction = false
}
p.optionalNewlines()
}
p.resolveUserCalls(prog)
p.resolveVars(prog)
p.checkMultiExprs()
return prog
@ -213,6 +217,10 @@ func (p *parser) stmtsBrace() ast.Stmts {
p.optionalNewlines()
ss := []ast.Stmt{}
for p.tok != RBRACE && p.tok != EOF {
if p.matches(SEMICOLON, NEWLINE) {
p.next()
continue
}
ss = append(ss, p.stmt())
}
p.expect(RBRACE)
@ -224,6 +232,7 @@ func (p *parser) stmtsBrace() ast.Stmts {
// Parse a "simple" statement (eg: allowed in a for loop init clause).
func (p *parser) simpleStmt() ast.Stmt {
startPos := p.pos
switch p.tok {
case PRINT, PRINTF:
op := p.tok
@ -244,17 +253,16 @@ func (p *parser) simpleStmt() ast.Stmt {
dest = p.expr()
}
if op == PRINT {
return &ast.PrintStmt{args, redirect, dest}
return &ast.PrintStmt{args, redirect, dest, startPos, p.pos}
} else {
if len(args) == 0 {
panic(p.errorf("expected printf args, got none"))
}
return &ast.PrintfStmt{args, redirect, dest}
return &ast.PrintfStmt{args, redirect, dest, startPos, p.pos}
}
case DELETE:
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
name, namePos := p.expectName()
var index []ast.Expr
if p.tok == LBRACKET {
p.next()
@ -264,20 +272,18 @@ func (p *parser) simpleStmt() ast.Stmt {
}
p.expect(RBRACKET)
}
return &ast.DeleteStmt{ref, index}
case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN:
return &ast.DeleteStmt{name, namePos, index, startPos, p.pos}
case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, NEXTFILE, EXIT, RETURN:
panic(p.errorf("expected print/printf, delete, or expression"))
default:
return &ast.ExprStmt{p.expr()}
return &ast.ExprStmt{p.expr(), startPos, p.pos}
}
}
// Parse any top-level statement.
func (p *parser) stmt() ast.Stmt {
for p.matches(SEMICOLON, NEWLINE) {
p.next()
}
var s ast.Stmt
startPos := p.pos
switch p.tok {
case IF:
p.next()
@ -285,6 +291,7 @@ func (p *parser) stmt() ast.Stmt {
cond := p.expr()
p.expect(RPAREN)
p.optionalNewlines()
bodyStart := p.pos
body := p.stmts()
p.optionalNewlines()
var elseBody ast.Stmts
@ -293,7 +300,7 @@ func (p *parser) stmt() ast.Stmt {
p.optionalNewlines()
elseBody = p.stmts()
}
s = &ast.IfStmt{cond, body, elseBody}
s = &ast.IfStmt{cond, bodyStart, body, elseBody, startPos, p.pos}
case FOR:
// Parse for statement, either "for in" or C-like for loop.
//
@ -327,8 +334,18 @@ func (p *parser) stmt() ast.Stmt {
if !ok {
panic(p.errorf("expected 'for (var in array) ...'"))
}
bodyStart := p.pos
body := p.loopStmts()
s = &ast.ForInStmt{varExpr, inExpr.Array, body}
s = &ast.ForInStmt{
Var: varExpr.Name,
VarPos: varExpr.Pos,
Array: inExpr.Array,
ArrayPos: inExpr.ArrayPos,
BodyStart: bodyStart,
Body: body,
Start: startPos,
End: p.pos,
}
} else {
// Match: for ([pre]; [cond]; [post]) body
p.expect(SEMICOLON)
@ -345,8 +362,9 @@ func (p *parser) stmt() ast.Stmt {
}
p.expect(RPAREN)
p.optionalNewlines()
bodyStart := p.pos
body := p.loopStmts()
s = &ast.ForStmt{pre, cond, post, body}
s = &ast.ForStmt{pre, cond, post, bodyStart, body, startPos, p.pos}
}
case WHILE:
p.next()
@ -354,42 +372,50 @@ func (p *parser) stmt() ast.Stmt {
cond := p.expr()
p.expect(RPAREN)
p.optionalNewlines()
bodyStart := p.pos
body := p.loopStmts()
s = &ast.WhileStmt{cond, body}
s = &ast.WhileStmt{cond, bodyStart, body, startPos, p.pos}
case DO:
p.next()
p.optionalNewlines()
body := p.loopStmts()
p.optionalNewlines()
p.expect(WHILE)
p.expect(LPAREN)
cond := p.expr()
p.expect(RPAREN)
s = &ast.DoWhileStmt{body, cond}
s = &ast.DoWhileStmt{body, cond, startPos, p.pos}
case BREAK:
if p.loopDepth == 0 {
panic(p.errorf("break must be inside a loop body"))
}
p.next()
s = &ast.BreakStmt{}
s = &ast.BreakStmt{startPos, p.pos}
case CONTINUE:
if p.loopDepth == 0 {
panic(p.errorf("continue must be inside a loop body"))
}
p.next()
s = &ast.ContinueStmt{}
s = &ast.ContinueStmt{startPos, p.pos}
case NEXT:
if !p.inAction && p.funcName == "" {
panic(p.errorf("next can't be inside BEGIN or END"))
}
p.next()
s = &ast.NextStmt{}
s = &ast.NextStmt{startPos, p.pos}
case NEXTFILE:
if !p.inAction && p.funcName == "" {
panic(p.errorf("nextfile can't be inside BEGIN or END"))
}
p.next()
s = &ast.NextfileStmt{startPos, p.pos}
case EXIT:
p.next()
var status ast.Expr
if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
status = p.expr()
}
s = &ast.ExitStmt{status}
s = &ast.ExitStmt{status, startPos, p.pos}
case RETURN:
if p.funcName == "" {
panic(p.errorf("return must be inside a function"))
@ -399,10 +425,10 @@ func (p *parser) stmt() ast.Stmt {
if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
value = p.expr()
}
s = &ast.ReturnStmt{value}
s = &ast.ReturnStmt{value, startPos, p.pos}
case LBRACE:
body := p.stmtsBrace()
s = &ast.BlockStmt{body}
s = &ast.BlockStmt{body, startPos, p.pos}
default:
s = p.simpleStmt()
}
@ -429,22 +455,18 @@ func (p *parser) loopStmts() ast.Stmts {
// Parse a function definition and body. As it goes, this resolves
// the local variable indexes and tracks which parameters are array
// parameters.
func (p *parser) function() ast.Function {
func (p *parser) function() *ast.Function {
if p.funcName != "" {
// Should never actually get here (FUNCTION token is only
// handled at the top level), but just in case.
panic(p.errorf("can't nest functions"))
}
p.next()
name := p.val
if _, ok := p.functions[name]; ok {
panic(p.errorf("function %q already defined", name))
}
p.expect(NAME)
name, funcNamePos := p.expectName()
p.expect(LPAREN)
first := true
params := make([]string, 0, 7) // pre-allocate some to reduce allocations
p.locals = make(map[string]bool, 7)
locals := make(map[string]bool, 7)
for p.tok != RPAREN {
if !first {
p.commaNewlines()
@ -454,23 +476,24 @@ func (p *parser) function() ast.Function {
if param == name {
panic(p.errorf("can't use function name as parameter name"))
}
if p.locals[param] {
if locals[param] {
panic(p.errorf("duplicate parameter name %q", param))
}
p.expect(NAME)
params = append(params, param)
p.locals[param] = true
locals[param] = true
}
p.expect(RPAREN)
p.optionalNewlines()
// Parse the body
p.startFunction(name, params)
body := p.stmtsBrace()
p.stopFunction()
p.locals = nil
p.funcName = name
return ast.Function{name, params, nil, body}
body := p.stmtsBrace()
p.funcName = ""
return &ast.Function{name, params, body, funcNamePos}
}
// Parse expressions separated by commas: args to print[f] or user
@ -520,37 +543,57 @@ func (p *parser) getLine() ast.Expr {
// An lvalue is a variable name, an array[expr] index expression, or
// an $expr field expression.
func (p *parser) _assign(higher func() ast.Expr) ast.Expr {
leftPos := p.pos
expr := higher()
_, isNamedField := expr.(*ast.NamedFieldExpr)
if (isNamedField || ast.IsLValue(expr)) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN,
MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) {
if p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) {
_, isNamedField := expr.(*ast.NamedFieldExpr)
if isNamedField {
panic(p.errorf("assigning @ expression not supported"))
}
op := p.tok
p.next()
right := p._assign(higher)
switch op {
case ASSIGN:
return &ast.AssignExpr{expr, right}
case ADD_ASSIGN:
op = ADD
case DIV_ASSIGN:
op = DIV
case MOD_ASSIGN:
op = MOD
case MUL_ASSIGN:
op = MUL
case POW_ASSIGN:
op = POW
case SUB_ASSIGN:
op = SUB
if !ast.IsLValue(expr) {
// Partial backtracking to allow expressions like "1 && x=1",
// which isn't really valid, as assignments are lower-precedence
// than binary operators, but onetrueawk, Gawk, and mawk all
// support this for logical, match and comparison operators. See
// issue #166.
binary, isBinary := expr.(*ast.BinaryExpr)
if isBinary && ast.IsLValue(binary.Right) {
switch binary.Op {
case AND, OR, MATCH, NOT_MATCH, EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER:
assign := makeAssign(binary.Right, op, right)
return &ast.BinaryExpr{binary.Left, binary.Op, assign}
}
}
panic(ast.PosErrorf(leftPos, "expected lvalue before %s", op))
}
return &ast.AugAssignExpr{expr, op, right}
return makeAssign(expr, op, right)
}
return expr
}
func makeAssign(left ast.Expr, op Token, right ast.Expr) ast.Expr {
switch op {
case ASSIGN:
return &ast.AssignExpr{left, right}
case ADD_ASSIGN:
op = ADD
case DIV_ASSIGN:
op = DIV
case MOD_ASSIGN:
op = MOD
case MUL_ASSIGN:
op = MUL
case POW_ASSIGN:
op = POW
case SUB_ASSIGN:
op = SUB
}
return &ast.AugAssignExpr{left, op, right}
}
// Parse a ?: conditional expression:
//
// or [QUESTION NEWLINE* cond COLON NEWLINE* cond]
@ -593,9 +636,8 @@ func (p *parser) _in(higher func() ast.Expr) ast.Expr {
expr := higher()
for p.tok == IN {
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
expr = &ast.InExpr{[]ast.Expr{expr}, ref}
name, namePos := p.expectName()
expr = &ast.InExpr{[]ast.Expr{expr}, name, namePos}
}
return expr
}
@ -654,7 +696,7 @@ func (p *parser) mul() ast.Expr {
func (p *parser) pow() ast.Expr {
// Note that pow (expr ^ expr) is right-associative
expr := p.preIncr()
expr := p.postIncr()
if p.tok == POW {
p.next()
right := p.pow()
@ -663,20 +705,6 @@ func (p *parser) pow() ast.Expr {
return expr
}
func (p *parser) preIncr() ast.Expr {
if p.tok == INCR || p.tok == DECR {
op := p.tok
p.next()
exprPos := p.pos
expr := p.preIncr()
if !ast.IsLValue(expr) {
panic(p.posErrorf(exprPos, "expected lvalue after ++ or --"))
}
return &ast.IncrExpr{expr, op, true}
}
return p.postIncr()
}
func (p *parser) postIncr() ast.Expr {
expr := p.primary()
if (p.tok == INCR || p.tok == DECR) && ast.IsLValue(expr) {
@ -698,7 +726,7 @@ func (p *parser) primary() ast.Expr {
case STRING:
s := p.val
p.next()
return &ast.StrExpr{s}
return &ast.StrExpr{Value: s}
case DIV, DIV_ASSIGN:
// If we get to DIV or DIV_ASSIGN as a primary expression,
// it's actually a regex.
@ -706,7 +734,16 @@ func (p *parser) primary() ast.Expr {
return &ast.RegExpr{regex}
case DOLLAR:
p.next()
return &ast.FieldExpr{p.primary()}
var expr ast.Expr = &ast.FieldExpr{p.primary()}
// Post-increment operators have lower precedence than primary
// expressions by default, except for field expressions with
// post-increments (e.g., $$1++ = $($1++), NOT $($1)++).
if p.tok == INCR || p.tok == DECR {
op := p.tok
p.next()
expr = &ast.IncrExpr{expr, op, false}
}
return expr
case AT:
p.next()
return &ast.NamedFieldExpr{p.primary()}
@ -714,10 +751,17 @@ func (p *parser) primary() ast.Expr {
op := p.tok
p.next()
return &ast.UnaryExpr{op, p.pow()}
case NAME:
name := p.val
namePos := p.pos
case INCR, DECR:
op := p.tok
p.next()
exprPos := p.pos
expr := p.optionalLValue()
if expr == nil {
panic(ast.PosErrorf(exprPos, "expected lvalue after %s", op))
}
return &ast.IncrExpr{expr, op, true}
case NAME:
name, namePos := p.expectName()
if p.tok == LBRACKET {
// a[x] or a[x, y] array index expression
p.next()
@ -726,17 +770,14 @@ func (p *parser) primary() ast.Expr {
panic(p.errorf("expected expression instead of ]"))
}
p.expect(RBRACKET)
return &ast.IndexExpr{p.arrayRef(name, namePos), index}
return &ast.IndexExpr{name, namePos, index}
} else if p.tok == LPAREN && !p.lexer.HadSpace() {
if p.locals[name] {
panic(p.errorf("can't call local variable %q as function", name))
}
// Grammar requires no space between function name and
// left paren for user function calls, hence the funky
// lexer.HadSpace() method.
return p.userCall(name, namePos)
}
return p.varRef(name, namePos)
return &ast.VarExpr{name, namePos}
case LPAREN:
parenPos := p.pos
p.next()
@ -746,15 +787,14 @@ func (p *parser) primary() ast.Expr {
panic(p.errorf("expected expression, not %s", p.tok))
case 1:
p.expect(RPAREN)
return exprs[0]
return &ast.GroupingExpr{exprs[0]}
default:
// Multi-dimensional array "in" requires parens around index
p.expect(RPAREN)
if p.tok == IN {
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
return &ast.InExpr{exprs, ref}
name, namePos := p.expectName()
return &ast.InExpr{exprs, name, namePos}
}
// MultiExpr is used as a pseudo-expression for print[f] parsing.
return p.multiExpr(exprs, parenPos)
@ -785,7 +825,7 @@ func (p *parser) primary() ast.Expr {
inPos := p.pos
in := p.expr()
if !ast.IsLValue(in) {
panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue"))
panic(ast.PosErrorf(inPos, "3rd arg to sub/gsub must be lvalue"))
}
args = append(args, in)
}
@ -796,9 +836,8 @@ func (p *parser) primary() ast.Expr {
p.expect(LPAREN)
str := p.expr()
p.commaNewlines()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
args := []ast.Expr{str, ref}
name, namePos := p.expectName()
args := []ast.Expr{str, &ast.VarExpr{name, namePos}}
if p.tok == COMMA {
p.commaNewlines()
args = append(args, p.regexStr(p.expr))
@ -902,9 +941,7 @@ func (p *parser) optionalLValue() ast.Expr {
// User function call, e.g., foo() not lvalue.
return nil
}
name := p.val
namePos := p.pos
p.next()
name, namePos := p.expectName()
if p.tok == LBRACKET {
// a[x] or a[x, y] array index expression
p.next()
@ -913,9 +950,9 @@ func (p *parser) optionalLValue() ast.Expr {
panic(p.errorf("expected expression instead of ]"))
}
p.expect(RBRACKET)
return &ast.IndexExpr{p.arrayRef(name, namePos), index}
return &ast.IndexExpr{name, namePos, index}
}
return p.varRef(name, namePos)
return &ast.VarExpr{name, namePos}
case DOLLAR:
p.next()
return &ast.FieldExpr{p.primary()}
@ -930,7 +967,7 @@ func (p *parser) optionalLValue() ast.Expr {
func (p *parser) regexStr(parse func() ast.Expr) ast.Expr {
if p.matches(DIV, DIV_ASSIGN) {
regex := p.nextRegex()
return &ast.StrExpr{regex}
return &ast.StrExpr{Value: regex, Regex: true}
}
return parse()
}
@ -987,7 +1024,7 @@ func (p *parser) nextRegex() string {
panic(p.errorf("%s", p.val))
}
regex := p.val
_, err := regexp.Compile(regex)
_, err := regexp.Compile(compiler.AddRegexFlags(regex))
if err != nil {
panic(p.errorf("%v", err))
}
@ -1003,6 +1040,13 @@ func (p *parser) expect(tok Token) {
p.next()
}
// Ensure current token is a name, parse it, and return name and position.
func (p *parser) expectName() (string, Position) {
name, pos := p.val, p.pos
p.expect(NAME)
return name, pos
}
// Return true iff current token matches one of the given operators,
// but don't parse next token.
func (p *parser) matches(operators ...Token) bool {
@ -1017,13 +1061,7 @@ func (p *parser) matches(operators ...Token) bool {
// Format given string and args with Sprintf and return *ParseError
// with that message and the current position.
func (p *parser) errorf(format string, args ...interface{}) error {
return p.posErrorf(p.pos, format, args...)
}
// Like errorf, but with an explicit position.
func (p *parser) posErrorf(pos Position, format string, args ...interface{}) error {
message := fmt.Sprintf(format, args...)
return &ParseError{pos, message}
return ast.PosErrorf(p.pos, format, args...)
}
// Parse call to a user-defined function (and record call site for
@ -1037,12 +1075,37 @@ func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr {
p.commaNewlines()
}
arg := p.expr()
p.processUserCallArg(name, arg, i)
args = append(args, arg)
i++
}
p.expect(RPAREN)
call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later
p.recordUserCall(call, pos)
return call
return &ast.UserCallExpr{name, args, pos}
}
// Record a "multi expression" (comma-separated pseudo-expression
// used to allow commas around print/printf arguments).
func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr {
expr := &ast.MultiExpr{exprs}
p.multiExprs[expr] = pos
return expr
}
// Mark the multi expression as used (by a print/printf statement).
func (p *parser) useMultiExpr(expr *ast.MultiExpr) {
delete(p.multiExprs, expr)
}
// Check that there are no unused multi expressions (syntax error).
func (p *parser) checkMultiExprs() {
if len(p.multiExprs) == 0 {
return
}
// Show error on first comma-separated expression
min := Position{1000000000, 1000000000}
for _, pos := range p.multiExprs {
if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column {
min = pos
}
}
panic(ast.PosErrorf(min, "unexpected comma-separated expression"))
}

View File

@ -1,462 +0,0 @@
// Resolve function calls and variable types
package parser
import (
"fmt"
"reflect"
"sort"
"github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
)
type varType int
const (
typeUnknown varType = iota
typeScalar
typeArray
)
func (t varType) String() string {
switch t {
case typeScalar:
return "Scalar"
case typeArray:
return "Array"
default:
return "Unknown"
}
}
// typeInfo records type information for a single variable
type typeInfo struct {
typ varType
ref *ast.VarExpr
scope ast.VarScope
index int
callName string
argIndex int
}
// Used by printVarTypes when debugTypes is turned on
func (t typeInfo) String() string {
var scope string
switch t.scope {
case ast.ScopeGlobal:
scope = "Global"
case ast.ScopeLocal:
scope = "Local"
default:
scope = "Special"
}
return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d",
t.typ, t.ref, scope, t.index, t.callName, t.argIndex)
}
// A single variable reference (normally scalar)
type varRef struct {
funcName string
ref *ast.VarExpr
isArg bool
pos Position
}
// A single array reference
type arrayRef struct {
funcName string
ref *ast.ArrayExpr
pos Position
}
// Initialize the resolver
func (p *parser) initResolve() {
p.varTypes = make(map[string]map[string]typeInfo)
p.varTypes[""] = make(map[string]typeInfo) // globals
p.functions = make(map[string]int)
p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present
p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays
p.arrayRef("FIELDS", Position{1, 1})
p.multiExprs = make(map[*ast.MultiExpr]Position, 3)
}
// Signal the start of a function
func (p *parser) startFunction(name string, params []string) {
p.funcName = name
p.varTypes[name] = make(map[string]typeInfo)
}
// Signal the end of a function
func (p *parser) stopFunction() {
p.funcName = ""
}
// Add function by name with given index
func (p *parser) addFunction(name string, index int) {
p.functions[name] = index
}
// Records a call to a user function (for resolving indexes later)
type userCall struct {
call *ast.UserCallExpr
pos Position
inFunc string
}
// Record a user call site
func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) {
p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName})
}
// After parsing, resolve all user calls to their indexes. Also
// ensures functions called have actually been defined, and that
// they're not being called with too many arguments.
func (p *parser) resolveUserCalls(prog *Program) {
// Number the native funcs (order by name to get consistent order)
nativeNames := make([]string, 0, len(p.nativeFuncs))
for name := range p.nativeFuncs {
nativeNames = append(nativeNames, name)
}
sort.Strings(nativeNames)
nativeIndexes := make(map[string]int, len(nativeNames))
for i, name := range nativeNames {
nativeIndexes[name] = i
}
for _, c := range p.userCalls {
// AWK-defined functions take precedence over native Go funcs
index, ok := p.functions[c.call.Name]
if !ok {
f, haveNative := p.nativeFuncs[c.call.Name]
if !haveNative {
panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name))
}
typ := reflect.TypeOf(f)
if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() {
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
}
c.call.Native = true
c.call.Index = nativeIndexes[c.call.Name]
continue
}
function := prog.Functions[index]
if len(c.call.Args) > len(function.Params) {
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
}
c.call.Index = index
}
}
// For arguments that are variable references, we don't know the
// type based on context, so mark the types for these as unknown.
func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) {
if varExpr, ok := arg.(*ast.VarExpr); ok {
scope, varFuncName := p.getScope(varExpr.Name)
ref := p.varTypes[varFuncName][varExpr.Name].ref
if ref == varExpr {
// Only applies if this is the first reference to this
// variable (otherwise we know the type already)
p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index}
}
// Mark the last related varRef (the most recent one) as a
// call argument for later error handling
p.varRefs[len(p.varRefs)-1].isArg = true
}
}
// Determine scope of given variable reference (and funcName if it's
// a local, otherwise empty string)
func (p *parser) getScope(name string) (ast.VarScope, string) {
switch {
case p.locals[name]:
return ast.ScopeLocal, p.funcName
case ast.SpecialVarIndex(name) > 0:
return ast.ScopeSpecial, ""
default:
return ast.ScopeGlobal, ""
}
}
// Record a variable (scalar) reference and return the *VarExpr (but
// VarExpr.Index won't be set till later)
func (p *parser) varRef(name string, pos Position) *ast.VarExpr {
scope, funcName := p.getScope(name)
expr := &ast.VarExpr{scope, 0, name}
p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos})
info := p.varTypes[funcName][name]
if info.typ == typeUnknown {
p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0}
}
return expr
}
// Record an array reference and return the *ArrayExpr (but
// ArrayExpr.Index won't be set till later)
func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr {
scope, funcName := p.getScope(name)
if scope == ast.ScopeSpecial {
panic(p.errorf("can't use scalar %q as array", name))
}
expr := &ast.ArrayExpr{scope, 0, name}
p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos})
info := p.varTypes[funcName][name]
if info.typ == typeUnknown {
p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0}
}
return expr
}
// Print variable type information (for debugging) on p.debugWriter
func (p *parser) printVarTypes(prog *Program) {
fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars)
fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays)
funcNames := []string{}
for funcName := range p.varTypes {
funcNames = append(funcNames, funcName)
}
sort.Strings(funcNames)
for _, funcName := range funcNames {
if funcName != "" {
fmt.Fprintf(p.debugWriter, "function %s\n", funcName)
} else {
fmt.Fprintf(p.debugWriter, "globals\n")
}
varNames := []string{}
for name := range p.varTypes[funcName] {
varNames = append(varNames, name)
}
sort.Strings(varNames)
for _, name := range varNames {
info := p.varTypes[funcName][name]
fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info)
}
}
}
// Resolve unknown variables types and generate variable indexes and
// name-to-index mappings for interpreter
func (p *parser) resolveVars(prog *Program) {
// First go through all unknown types and try to determine the
// type from the parameter type in that function definition.
// Iterate through functions in topological order, for example
// if f() calls g(), process g first, then f.
callGraph := make(map[string]map[string]struct{})
for _, call := range p.userCalls {
if _, ok := callGraph[call.inFunc]; !ok {
callGraph[call.inFunc] = make(map[string]struct{})
}
callGraph[call.inFunc][call.call.Name] = struct{}{}
}
sortedFuncs := topoSort(callGraph)
for _, funcName := range sortedFuncs {
infos := p.varTypes[funcName]
for name, info := range infos {
if info.scope == ast.ScopeSpecial || info.typ != typeUnknown {
// It's a special var or type is already known
continue
}
funcIndex, ok := p.functions[info.callName]
if !ok {
// Function being called is a native function
continue
}
// Determine var type based on type of this parameter
// in the called function (if we know that)
paramName := prog.Functions[funcIndex].Params[info.argIndex]
typ := p.varTypes[info.callName][paramName].typ
if typ != typeUnknown {
if p.debugTypes {
fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n",
funcName, name, typ)
}
info.typ = typ
p.varTypes[funcName][name] = info
}
}
}
// Resolve global variables (iteration order is undefined, so
// assign indexes basically randomly)
prog.Scalars = make(map[string]int)
prog.Arrays = make(map[string]int)
for name, info := range p.varTypes[""] {
_, isFunc := p.functions[name]
if isFunc {
// Global var can't also be the name of a function
panic(p.errorf("global var %q can't also be a function", name))
}
var index int
if info.scope == ast.ScopeSpecial {
index = ast.SpecialVarIndex(name)
} else if info.typ == typeArray {
index = len(prog.Arrays)
prog.Arrays[name] = index
} else {
index = len(prog.Scalars)
prog.Scalars[name] = index
}
info.index = index
p.varTypes[""][name] = info
}
// Fill in unknown parameter types that are being called with arrays,
// for example, as in the following code:
//
// BEGIN { arr[0]; f(arr) }
// function f(a) { }
for _, c := range p.userCalls {
if c.call.Native {
continue
}
function := prog.Functions[c.call.Index]
for i, arg := range c.call.Args {
varExpr, ok := arg.(*ast.VarExpr)
if !ok {
continue
}
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
argType := p.varTypes[funcName][varExpr.Name]
paramType := p.varTypes[function.Name][function.Params[i]]
if argType.typ == typeArray && paramType.typ == typeUnknown {
paramType.typ = argType.typ
p.varTypes[function.Name][function.Params[i]] = paramType
}
}
}
// Resolve local variables (assign indexes in order of params).
// Also patch up Function.Arrays (tells interpreter which args
// are arrays).
for funcName, infos := range p.varTypes {
if funcName == "" {
continue
}
scalarIndex := 0
arrayIndex := 0
functionIndex := p.functions[funcName]
function := prog.Functions[functionIndex]
arrays := make([]bool, len(function.Params))
for i, name := range function.Params {
info := infos[name]
var index int
if info.typ == typeArray {
index = arrayIndex
arrayIndex++
arrays[i] = true
} else {
// typeScalar or typeUnknown: variables may still be
// of unknown type if they've never been referenced --
// default to scalar in that case
index = scalarIndex
scalarIndex++
}
info.index = index
p.varTypes[funcName][name] = info
}
prog.Functions[functionIndex].Arrays = arrays
}
// Check that variables passed to functions are the correct type
for _, c := range p.userCalls {
// Check native function calls
if c.call.Native {
for _, arg := range c.call.Args {
varExpr, ok := arg.(*ast.VarExpr)
if !ok {
// Non-variable expression, must be scalar
continue
}
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
info := p.varTypes[funcName][varExpr.Name]
if info.typ == typeArray {
panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name))
}
}
continue
}
// Check AWK function calls
function := prog.Functions[c.call.Index]
for i, arg := range c.call.Args {
varExpr, ok := arg.(*ast.VarExpr)
if !ok {
if function.Arrays[i] {
panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg))
}
continue
}
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
info := p.varTypes[funcName][varExpr.Name]
if info.typ == typeArray && !function.Arrays[i] {
panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name))
}
if info.typ != typeArray && function.Arrays[i] {
panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name))
}
}
}
if p.debugTypes {
p.printVarTypes(prog)
}
// Patch up variable indexes (interpreter uses an index instead
// of name for more efficient lookups)
for _, varRef := range p.varRefs {
info := p.varTypes[varRef.funcName][varRef.ref.Name]
if info.typ == typeArray && !varRef.isArg {
panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name))
}
varRef.ref.Index = info.index
}
for _, arrayRef := range p.arrayRefs {
info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name]
if info.typ == typeScalar {
panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name))
}
arrayRef.ref.Index = info.index
}
}
// If name refers to a local (in function inFunc), return that
// function's name, otherwise return "" (meaning global).
func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string {
if inFunc == "" {
return ""
}
for _, param := range prog.Functions[p.functions[inFunc]].Params {
if name == param {
return inFunc
}
}
return ""
}
// Record a "multi expression" (comma-separated pseudo-expression
// used to allow commas around print/printf arguments).
func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr {
expr := &ast.MultiExpr{exprs}
p.multiExprs[expr] = pos
return expr
}
// Mark the multi expression as used (by a print/printf statement).
func (p *parser) useMultiExpr(expr *ast.MultiExpr) {
delete(p.multiExprs, expr)
}
// Check that there are no unused multi expressions (syntax error).
func (p *parser) checkMultiExprs() {
if len(p.multiExprs) == 0 {
return
}
// Show error on first comma-separated expression
min := Position{1000000000, 1000000000}
for _, pos := range p.multiExprs {
if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column {
min = pos
}
}
panic(p.posErrorf(min, "unexpected comma-separated expression"))
}

View File

@ -1,7 +1,8 @@
# github.com/benhoyt/goawk v1.20.0
## explicit; go 1.14
# github.com/benhoyt/goawk v1.26.0
## explicit; go 1.16
github.com/benhoyt/goawk/internal/ast
github.com/benhoyt/goawk/internal/compiler
github.com/benhoyt/goawk/internal/resolver
github.com/benhoyt/goawk/interp
github.com/benhoyt/goawk/lexer
github.com/benhoyt/goawk/parser