diff --git a/play/go.mod b/play/go.mod index aeb0fec..7047bb0 100644 --- a/play/go.mod +++ b/play/go.mod @@ -1,5 +1,5 @@ module github.com/deuill/grawkit/play -go 1.12 +go 1.17 -require github.com/benhoyt/goawk v1.6.0 +require github.com/benhoyt/goawk v1.13.0 diff --git a/play/go.sum b/play/go.sum index 6a48630..2a43f42 100644 --- a/play/go.sum +++ b/play/go.sum @@ -1,2 +1,2 @@ -github.com/benhoyt/goawk v1.6.0 h1:6oHKBL2BAvYiKroi8RhmpnhyvMGeiW5u/WEaxyOcKRQ= -github.com/benhoyt/goawk v1.6.0/go.mod h1:krl47rWeW8s+kD3dtHYm6aq4MBGRzQD5PGkZaRm38Uk= +github.com/benhoyt/goawk v1.13.0 h1:/Iu42ErHsT5vHrpWyewpI98hB2PHBk66o+oLZs4drPs= +github.com/benhoyt/goawk v1.13.0/go.mod h1:UKzPyqDh9O7HZ/ftnU33MYlAP2rPbXdwQ+OVlEOPsjM= diff --git a/play/vendor/github.com/benhoyt/goawk/LICENSE.txt b/play/vendor/github.com/benhoyt/goawk/LICENSE.txt new file mode 100644 index 0000000..620ad38 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/LICENSE.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Ben Hoyt + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go b/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go new file mode 100644 index 0000000..1007d44 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go @@ -0,0 +1,557 @@ +// GoAWK parser - abstract syntax tree structs + +package ast + +import ( + "fmt" + "strconv" + "strings" + + . "github.com/benhoyt/goawk/lexer" +) + +// Stmts is a block containing multiple statements. +type Stmts []Stmt + +func (ss Stmts) String() string { + lines := []string{} + for _, s := range ss { + subLines := strings.Split(s.String(), "\n") + for _, sl := range subLines { + lines = append(lines, " "+sl+"\n") + } + } + return strings.Join(lines, "") +} + +// Action is pattern-action section of a program. +type Action struct { + Pattern []Expr + Stmts Stmts +} + +func (a *Action) String() string { + patterns := make([]string, len(a.Pattern)) + for i, p := range a.Pattern { + patterns[i] = p.String() + } + sep := "" + if len(patterns) > 0 && a.Stmts != nil { + sep = " " + } + stmtsStr := "" + if a.Stmts != nil { + stmtsStr = "{\n" + a.Stmts.String() + "}" + } + return strings.Join(patterns, ", ") + sep + stmtsStr +} + +// Expr is the abstract syntax tree for any AWK expression. +type Expr interface { + expr() + String() string +} + +// All these types implement the Expr interface. +func (e *FieldExpr) expr() {} +func (e *UnaryExpr) expr() {} +func (e *BinaryExpr) expr() {} +func (e *ArrayExpr) expr() {} +func (e *InExpr) expr() {} +func (e *CondExpr) expr() {} +func (e *NumExpr) expr() {} +func (e *StrExpr) expr() {} +func (e *RegExpr) expr() {} +func (e *VarExpr) expr() {} +func (e *IndexExpr) expr() {} +func (e *AssignExpr) expr() {} +func (e *AugAssignExpr) expr() {} +func (e *IncrExpr) expr() {} +func (e *CallExpr) expr() {} +func (e *UserCallExpr) expr() {} +func (e *MultiExpr) expr() {} +func (e *GetlineExpr) expr() {} + +// FieldExpr is an expression like $0. +type FieldExpr struct { + Index Expr +} + +func (e *FieldExpr) String() string { + return "$" + e.Index.String() +} + +// UnaryExpr is an expression like -1234. +type UnaryExpr struct { + Op Token + Value Expr +} + +func (e *UnaryExpr) String() string { + return e.Op.String() + e.Value.String() +} + +// BinaryExpr is an expression like 1 + 2. +type BinaryExpr struct { + Left Expr + Op Token + Right Expr +} + +func (e *BinaryExpr) String() string { + var opStr string + if e.Op == CONCAT { + opStr = " " + } else { + opStr = " " + e.Op.String() + " " + } + return "(" + e.Left.String() + opStr + e.Right.String() + ")" +} + +// ArrayExpr is an array reference. Not really a stand-alone +// expression, except as an argument to split() or a user function +// call. +type ArrayExpr struct { + Scope VarScope + Index int + Name string +} + +func (e *ArrayExpr) String() string { + return e.Name +} + +// InExpr is an expression like (index in array). +type InExpr struct { + Index []Expr + Array *ArrayExpr +} + +func (e *InExpr) String() string { + if len(e.Index) == 1 { + return "(" + e.Index[0].String() + " in " + e.Array.String() + ")" + } + indices := make([]string, len(e.Index)) + for i, index := range e.Index { + indices[i] = index.String() + } + return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")" +} + +// CondExpr is an expression like cond ? 1 : 0. +type CondExpr struct { + Cond Expr + True Expr + False Expr +} + +func (e *CondExpr) String() string { + return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")" +} + +// NumExpr is a literal number like 1234. +type NumExpr struct { + Value float64 +} + +func (e *NumExpr) String() string { + return fmt.Sprintf("%.6g", e.Value) +} + +// StrExpr is a literal string like "foo". +type StrExpr struct { + Value string +} + +func (e *StrExpr) String() string { + return strconv.Quote(e.Value) +} + +// RegExpr is a stand-alone regex expression, equivalent to: +// $0 ~ /regex/. +type RegExpr struct { + Regex string +} + +func (e *RegExpr) String() string { + escaped := strings.Replace(e.Regex, "/", `\/`, -1) + return "/" + escaped + "/" +} + +type VarScope int + +const ( + ScopeSpecial VarScope = iota + ScopeGlobal + ScopeLocal +) + +// VarExpr is a variable reference (special var, global, or local). +// Index is the resolved variable index used by the interpreter; Name +// is the original name used by String(). +type VarExpr struct { + Scope VarScope + Index int + Name string +} + +func (e *VarExpr) String() string { + return e.Name +} + +// IndexExpr is an expression like a[k] (rvalue or lvalue). +type IndexExpr struct { + Array *ArrayExpr + Index []Expr +} + +func (e *IndexExpr) String() string { + indices := make([]string, len(e.Index)) + for i, index := range e.Index { + indices[i] = index.String() + } + return e.Array.String() + "[" + strings.Join(indices, ", ") + "]" +} + +// AssignExpr is an expression like x = 1234. +type AssignExpr struct { + Left Expr // can be one of: var, array[x], $n + Right Expr +} + +func (e *AssignExpr) String() string { + return e.Left.String() + " = " + e.Right.String() +} + +// AugAssignExpr is an assignment expression like x += 5. +type AugAssignExpr struct { + Left Expr // can be one of: var, array[x], $n + Op Token + Right Expr +} + +func (e *AugAssignExpr) String() string { + return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String() +} + +// IncrExpr is an increment or decrement expression like x++ or --y. +type IncrExpr struct { + Expr Expr + Op Token + Pre bool +} + +func (e *IncrExpr) String() string { + if e.Pre { + return e.Op.String() + e.Expr.String() + } else { + return e.Expr.String() + e.Op.String() + } +} + +// CallExpr is a builtin function call like length($1). +type CallExpr struct { + Func Token + Args []Expr +} + +func (e *CallExpr) String() string { + args := make([]string, len(e.Args)) + for i, a := range e.Args { + args[i] = a.String() + } + return e.Func.String() + "(" + strings.Join(args, ", ") + ")" +} + +// UserCallExpr is a user-defined function call like my_func(1, 2, 3) +// +// Index is the resolved function index used by the interpreter; Name +// is the original name used by String(). +type UserCallExpr struct { + Native bool // false = AWK-defined function, true = native Go func + Index int + Name string + Args []Expr +} + +func (e *UserCallExpr) String() string { + args := make([]string, len(e.Args)) + for i, a := range e.Args { + args[i] = a.String() + } + return e.Name + "(" + strings.Join(args, ", ") + ")" +} + +// MultiExpr isn't an interpretable expression, but it's used as a +// pseudo-expression for print[f] parsing. +type MultiExpr struct { + Exprs []Expr +} + +func (e *MultiExpr) String() string { + exprs := make([]string, len(e.Exprs)) + for i, e := range e.Exprs { + exprs[i] = e.String() + } + return "(" + strings.Join(exprs, ", ") + ")" +} + +// GetlineExpr is an expression read from file or pipe input. +type GetlineExpr struct { + Command Expr + Target Expr + File Expr +} + +func (e *GetlineExpr) String() string { + s := "" + if e.Command != nil { + s += e.Command.String() + " |" + } + s += "getline" + if e.Target != nil { + s += " " + e.Target.String() + } + if e.File != nil { + s += " <" + e.File.String() + } + return s +} + +// IsLValue returns true if the given expression can be used as an +// lvalue (on the left-hand side of an assignment, in a ++ or -- +// operation, or as the third argument to sub or gsub). +func IsLValue(expr Expr) bool { + switch expr.(type) { + case *VarExpr, *IndexExpr, *FieldExpr: + return true + default: + return false + } +} + +// Stmt is the abstract syntax tree for any AWK statement. +type Stmt interface { + stmt() + String() string +} + +// All these types implement the Stmt interface. +func (s *PrintStmt) stmt() {} +func (s *PrintfStmt) stmt() {} +func (s *ExprStmt) stmt() {} +func (s *IfStmt) stmt() {} +func (s *ForStmt) stmt() {} +func (s *ForInStmt) stmt() {} +func (s *WhileStmt) stmt() {} +func (s *DoWhileStmt) stmt() {} +func (s *BreakStmt) stmt() {} +func (s *ContinueStmt) stmt() {} +func (s *NextStmt) stmt() {} +func (s *ExitStmt) stmt() {} +func (s *DeleteStmt) stmt() {} +func (s *ReturnStmt) stmt() {} +func (s *BlockStmt) stmt() {} + +// PrintStmt is a statement like print $1, $3. +type PrintStmt struct { + Args []Expr + Redirect Token + Dest Expr +} + +func (s *PrintStmt) String() string { + return printString("print", s.Args, s.Redirect, s.Dest) +} + +func printString(f string, args []Expr, redirect Token, dest Expr) string { + parts := make([]string, len(args)) + for i, a := range args { + parts[i] = a.String() + } + str := f + " " + strings.Join(parts, ", ") + if dest != nil { + str += " " + redirect.String() + dest.String() + } + return str +} + +// PrintfStmt is a statement like printf "%3d", 1234. +type PrintfStmt struct { + Args []Expr + Redirect Token + Dest Expr +} + +func (s *PrintfStmt) String() string { + return printString("printf", s.Args, s.Redirect, s.Dest) +} + +// ExprStmt is statement like a bare function call: my_func(x). +type ExprStmt struct { + Expr Expr +} + +func (s *ExprStmt) String() string { + return s.Expr.String() +} + +// IfStmt is an if or if-else statement. +type IfStmt struct { + Cond Expr + Body Stmts + Else Stmts +} + +func (s *IfStmt) String() string { + str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}" + if len(s.Else) > 0 { + str += " else {\n" + s.Else.String() + "}" + } + return str +} + +// ForStmt is a C-like for loop: for (i=0; i<10; i++) print i. +type ForStmt struct { + Pre Stmt + Cond Expr + Post Stmt + Body Stmts +} + +func (s *ForStmt) String() string { + preStr := "" + if s.Pre != nil { + preStr = s.Pre.String() + } + condStr := "" + if s.Cond != nil { + condStr = " " + trimParens(s.Cond.String()) + } + postStr := "" + if s.Post != nil { + postStr = " " + s.Post.String() + } + return "for (" + preStr + ";" + condStr + ";" + postStr + ") {\n" + s.Body.String() + "}" +} + +// ForInStmt is a for loop like for (k in a) print k, a[k]. +type ForInStmt struct { + Var *VarExpr + Array *ArrayExpr + Body Stmts +} + +func (s *ForInStmt) String() string { + return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}" +} + +// WhileStmt is a while loop. +type WhileStmt struct { + Cond Expr + Body Stmts +} + +func (s *WhileStmt) String() string { + return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}" +} + +// DoWhileStmt is a do-while loop. +type DoWhileStmt struct { + Body Stmts + Cond Expr +} + +func (s *DoWhileStmt) String() string { + return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")" +} + +// BreakStmt is a break statement. +type BreakStmt struct{} + +func (s *BreakStmt) String() string { + return "break" +} + +// ContinueStmt is a continue statement. +type ContinueStmt struct{} + +func (s *ContinueStmt) String() string { + return "continue" +} + +// NextStmt is a next statement. +type NextStmt struct{} + +func (s *NextStmt) String() string { + return "next" +} + +// ExitStmt is an exit statement. +type ExitStmt struct { + Status Expr +} + +func (s *ExitStmt) String() string { + var statusStr string + if s.Status != nil { + statusStr = " " + s.Status.String() + } + return "exit" + statusStr +} + +// DeleteStmt is a statement like delete a[k]. +type DeleteStmt struct { + Array *ArrayExpr + Index []Expr +} + +func (s *DeleteStmt) String() string { + indices := make([]string, len(s.Index)) + for i, index := range s.Index { + indices[i] = index.String() + } + return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]" +} + +// ReturnStmt is a return statement. +type ReturnStmt struct { + Value Expr +} + +func (s *ReturnStmt) String() string { + var valueStr string + if s.Value != nil { + valueStr = " " + s.Value.String() + } + return "return" + valueStr +} + +// BlockStmt is a stand-alone block like { print "x" }. +type BlockStmt struct { + Body Stmts +} + +func (s *BlockStmt) String() string { + return "{\n" + s.Body.String() + "}" +} + +// Function is the AST for a user-defined function. +type Function struct { + Name string + Params []string + Arrays []bool + Body Stmts +} + +func (f *Function) String() string { + return "function " + f.Name + "(" + strings.Join(f.Params, ", ") + ") {\n" + + f.Body.String() + "}" +} + +func trimParens(s string) string { + if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") { + s = s[1 : len(s)-1] + } + return s +} diff --git a/play/vendor/github.com/benhoyt/goawk/internal/ast/specialvars.go b/play/vendor/github.com/benhoyt/goawk/internal/ast/specialvars.go new file mode 100644 index 0000000..1ea06e4 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/ast/specialvars.go @@ -0,0 +1,48 @@ +// Special variable constants + +package ast + +const ( + V_ILLEGAL = iota + V_ARGC + V_CONVFMT + V_FILENAME + V_FNR + V_FS + V_NF + V_NR + V_OFMT + V_OFS + V_ORS + V_RLENGTH + V_RS + V_RSTART + V_RT + V_SUBSEP + + V_LAST = V_SUBSEP +) + +var specialVars = map[string]int{ + "ARGC": V_ARGC, + "CONVFMT": V_CONVFMT, + "FILENAME": V_FILENAME, + "FNR": V_FNR, + "FS": V_FS, + "NF": V_NF, + "NR": V_NR, + "OFMT": V_OFMT, + "OFS": V_OFS, + "ORS": V_ORS, + "RLENGTH": V_RLENGTH, + "RS": V_RS, + "RSTART": V_RSTART, + "RT": V_RT, + "SUBSEP": V_SUBSEP, +} + +// SpecialVarIndex returns the "index" of the special variable, or 0 +// if it's not a special variable. +func SpecialVarIndex(name string) int { + return specialVars[name] +} diff --git a/play/vendor/github.com/benhoyt/goawk/interp/functions.go b/play/vendor/github.com/benhoyt/goawk/interp/functions.go new file mode 100644 index 0000000..26b446c --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/interp/functions.go @@ -0,0 +1,789 @@ +// Evaluate builtin and user-defined function calls + +package interp + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "os/exec" + "reflect" + "sort" + "strconv" + "strings" + "time" + "unicode/utf8" + + . "github.com/benhoyt/goawk/internal/ast" + . "github.com/benhoyt/goawk/lexer" +) + +// Call builtin function specified by "op" with given args +func (p *interp) callBuiltin(op Token, argExprs []Expr) (value, error) { + // split() has an array arg (not evaluated) and [g]sub() have an + // lvalue arg, so handle them as special cases + switch op { + case F_SPLIT: + strValue, err := p.eval(argExprs[0]) + if err != nil { + return null(), err + } + str := p.toString(strValue) + var fieldSep string + if len(argExprs) == 3 { + sepValue, err := p.eval(argExprs[2]) + if err != nil { + return null(), err + } + fieldSep = p.toString(sepValue) + } else { + fieldSep = p.fieldSep + } + arrayExpr := argExprs[1].(*ArrayExpr) + n, err := p.split(str, arrayExpr.Scope, arrayExpr.Index, fieldSep) + if err != nil { + return null(), err + } + return num(float64(n)), nil + + case F_SUB, F_GSUB: + regexValue, err := p.eval(argExprs[0]) + if err != nil { + return null(), err + } + regex := p.toString(regexValue) + replValue, err := p.eval(argExprs[1]) + if err != nil { + return null(), err + } + repl := p.toString(replValue) + var in string + if len(argExprs) == 3 { + inValue, err := p.eval(argExprs[2]) + if err != nil { + return null(), err + } + in = p.toString(inValue) + } else { + in = p.line + } + out, n, err := p.sub(regex, repl, in, op == F_GSUB) + if err != nil { + return null(), err + } + if len(argExprs) == 3 { + err := p.assign(argExprs[2], str(out)) + if err != nil { + return null(), err + } + } else { + p.setLine(out, true) + } + return num(float64(n)), nil + } + + // Now evaluate the argExprs (calls with up to 7 args don't + // require heap allocation) + args := make([]value, 0, 7) + for _, a := range argExprs { + arg, err := p.eval(a) + if err != nil { + return null(), err + } + args = append(args, arg) + } + + // Then switch on the function for the ordinary functions + switch op { + case F_LENGTH: + var s string + if len(args) > 0 { + s = p.toString(args[0]) + } else { + s = p.line + } + var n int + if p.bytes { + n = len(s) + } else { + n = utf8.RuneCountInString(s) + } + return num(float64(n)), nil + + case F_MATCH: + re, err := p.compileRegex(p.toString(args[1])) + if err != nil { + return null(), err + } + s := p.toString(args[0]) + loc := re.FindStringIndex(s) + if loc == nil { + p.matchStart = 0 + p.matchLength = -1 + return num(0), nil + } + if p.bytes { + p.matchStart = loc[0] + 1 + p.matchLength = loc[1] - loc[0] + } else { + p.matchStart = utf8.RuneCountInString(s[:loc[0]]) + 1 + p.matchLength = utf8.RuneCountInString(s[loc[0]:loc[1]]) + } + return num(float64(p.matchStart)), nil + + case F_SUBSTR: + s := p.toString(args[0]) + pos := int(args[1].num()) + if p.bytes { + if pos > len(s) { + pos = len(s) + 1 + } + if pos < 1 { + pos = 1 + } + maxLength := len(s) - pos + 1 + length := maxLength + if len(args) == 3 { + length = int(args[2].num()) + if length < 0 { + length = 0 + } + if length > maxLength { + length = maxLength + } + } + return str(s[pos-1 : pos-1+length]), nil + } else { + // Count characters till we get to pos. + chars := 1 + start := 0 + for start = range s { + chars++ + if chars > pos { + break + } + } + if pos >= chars { + start = len(s) + } + + // Count characters from start till we reach length. + var end int + if len(args) == 3 { + length := int(args[2].num()) + chars = 0 + for end = range s[start:] { + chars++ + if chars > length { + break + } + } + if length >= chars { + end = len(s) + } else { + end += start + } + } else { + end = len(s) + } + return str(s[start:end]), nil + } + + case F_SPRINTF: + s, err := p.sprintf(p.toString(args[0]), args[1:]) + if err != nil { + return null(), err + } + return str(s), nil + + case F_INDEX: + s := p.toString(args[0]) + substr := p.toString(args[1]) + index := strings.Index(s, substr) + if p.bytes { + return num(float64(index + 1)), nil + } else { + if index < 0 { + return num(float64(0)), nil + } + index = utf8.RuneCountInString(s[:index]) + return num(float64(index + 1)), nil + } + + case F_TOLOWER: + return str(strings.ToLower(p.toString(args[0]))), nil + case F_TOUPPER: + return str(strings.ToUpper(p.toString(args[0]))), nil + + case F_ATAN2: + return num(math.Atan2(args[0].num(), args[1].num())), nil + case F_COS: + return num(math.Cos(args[0].num())), nil + case F_EXP: + return num(math.Exp(args[0].num())), nil + case F_INT: + return num(float64(int(args[0].num()))), nil + case F_LOG: + return num(math.Log(args[0].num())), nil + case F_SQRT: + return num(math.Sqrt(args[0].num())), nil + case F_RAND: + return num(p.random.Float64()), nil + case F_SIN: + return num(math.Sin(args[0].num())), nil + + case F_SRAND: + prevSeed := p.randSeed + switch len(args) { + case 0: + p.random.Seed(time.Now().UnixNano()) + case 1: + p.randSeed = args[0].num() + p.random.Seed(int64(math.Float64bits(p.randSeed))) + } + return num(prevSeed), nil + + case F_SYSTEM: + if p.noExec { + return null(), newError("can't call system() due to NoExec") + } + cmdline := p.toString(args[0]) + cmd := p.execShell(cmdline) + cmd.Stdout = p.output + cmd.Stderr = p.errorOutput + _ = p.flushAll() // ensure synchronization + err := cmd.Start() + if err != nil { + p.printErrorf("%s\n", err) + return num(-1), nil + } + err = cmd.Wait() + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + code := exitErr.ProcessState.ExitCode() + return num(float64(code)), nil + } else { + p.printErrorf("unexpected error running command %q: %v\n", cmdline, err) + return num(-1), nil + } + } + return num(0), nil + + case F_CLOSE: + name := p.toString(args[0]) + var c io.Closer = p.inputStreams[name] + if c != nil { + // Close input stream + delete(p.inputStreams, name) + err := c.Close() + if err != nil { + return num(-1), nil + } + return num(0), nil + } + c = p.outputStreams[name] + if c != nil { + // Close output stream + delete(p.outputStreams, name) + err := c.Close() + if err != nil { + return num(-1), nil + } + return num(0), nil + } + // Nothing to close + return num(-1), nil + + case F_FFLUSH: + var name string + if len(args) > 0 { + name = p.toString(args[0]) + } + var ok bool + if name != "" { + // Flush a single, named output stream + ok = p.flushStream(name) + } else { + // fflush() or fflush("") flushes all output streams + ok = p.flushAll() + } + if !ok { + return num(-1), nil + } + return num(0), nil + + default: + // Shouldn't happen + panic(fmt.Sprintf("unexpected function: %s", op)) + } +} + +// Executes code using configured system shell +func (p *interp) execShell(code string) *exec.Cmd { + executable := p.shellCommand[0] + args := p.shellCommand[1:] + args = append(args, code) + cmd := exec.Command(executable, args...) + return cmd +} + +// Call user-defined function with given index and arguments, return +// its return value (or null value if it doesn't return anything) +func (p *interp) callUser(index int, args []Expr) (value, error) { + f := p.program.Functions[index] + + if p.callDepth >= maxCallDepth { + return null(), newError("calling %q exceeded maximum call depth of %d", f.Name, maxCallDepth) + } + + // Evaluate the arguments and push them onto the locals stack + oldFrame := p.frame + newFrameStart := len(p.stack) + var arrays []int + for i, arg := range args { + if f.Arrays[i] { + a := arg.(*VarExpr) + arrays = append(arrays, p.getArrayIndex(a.Scope, a.Index)) + } else { + argValue, err := p.eval(arg) + if err != nil { + return null(), err + } + p.stack = append(p.stack, argValue) + } + } + // Push zero value for any additional parameters (it's valid to + // call a function with fewer arguments than it has parameters) + oldArraysLen := len(p.arrays) + for i := len(args); i < len(f.Params); i++ { + if f.Arrays[i] { + arrays = append(arrays, len(p.arrays)) + p.arrays = append(p.arrays, make(map[string]value)) + } else { + p.stack = append(p.stack, null()) + } + } + p.frame = p.stack[newFrameStart:] + p.localArrays = append(p.localArrays, arrays) + + // Execute the function! + p.callDepth++ + err := p.executes(f.Body) + p.callDepth-- + + // Pop the locals off the stack + p.stack = p.stack[:newFrameStart] + p.frame = oldFrame + p.localArrays = p.localArrays[:len(p.localArrays)-1] + p.arrays = p.arrays[:oldArraysLen] + + if r, ok := err.(returnValue); ok { + return r.Value, nil + } + if err != nil { + return null(), err + } + return null(), nil +} + +// Call native-defined function with given name and arguments, return +// its return value (or null value if it doesn't return anything). +func (p *interp) callNative(index int, args []Expr) (value, error) { + f := p.nativeFuncs[index] + minIn := len(f.in) // Minimum number of args we should pass + var variadicType reflect.Type + if f.isVariadic { + variadicType = f.in[len(f.in)-1].Elem() + minIn-- + } + + // Build list of args to pass to function + values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation + for i, arg := range args { + a, err := p.eval(arg) + if err != nil { + return null(), err + } + var argType reflect.Type + if !f.isVariadic || i < len(f.in)-1 { + argType = f.in[i] + } else { + // Final arg(s) when calling a variadic are all of this type + argType = variadicType + } + values = append(values, p.toNative(a, argType)) + } + // Use zero value for any unspecified args + for i := len(args); i < minIn; i++ { + values = append(values, reflect.Zero(f.in[i])) + } + + // Call Go function, determine return value + outs := f.value.Call(values) + switch len(outs) { + case 0: + // No return value, return null value to AWK + return null(), nil + case 1: + // Single return value + return fromNative(outs[0]), nil + case 2: + // Two-valued return of (scalar, error) + if !outs[1].IsNil() { + return null(), outs[1].Interface().(error) + } + return fromNative(outs[0]), nil + default: + // Should never happen (checked at parse time) + panic(fmt.Sprintf("unexpected number of return values: %d", len(outs))) + } +} + +// Convert from an AWK value to a native Go value +func (p *interp) toNative(v value, typ reflect.Type) reflect.Value { + switch typ.Kind() { + case reflect.Bool: + return reflect.ValueOf(v.boolean()) + case reflect.Int: + return reflect.ValueOf(int(v.num())) + case reflect.Int8: + return reflect.ValueOf(int8(v.num())) + case reflect.Int16: + return reflect.ValueOf(int16(v.num())) + case reflect.Int32: + return reflect.ValueOf(int32(v.num())) + case reflect.Int64: + return reflect.ValueOf(int64(v.num())) + case reflect.Uint: + return reflect.ValueOf(uint(v.num())) + case reflect.Uint8: + return reflect.ValueOf(uint8(v.num())) + case reflect.Uint16: + return reflect.ValueOf(uint16(v.num())) + case reflect.Uint32: + return reflect.ValueOf(uint32(v.num())) + case reflect.Uint64: + return reflect.ValueOf(uint64(v.num())) + case reflect.Float32: + return reflect.ValueOf(float32(v.num())) + case reflect.Float64: + return reflect.ValueOf(v.num()) + case reflect.String: + return reflect.ValueOf(p.toString(v)) + case reflect.Slice: + if typ.Elem().Kind() != reflect.Uint8 { + // Shouldn't happen: prevented by checkNativeFunc + panic(fmt.Sprintf("unexpected argument slice: %s", typ.Elem().Kind())) + } + return reflect.ValueOf([]byte(p.toString(v))) + default: + // Shouldn't happen: prevented by checkNativeFunc + panic(fmt.Sprintf("unexpected argument type: %s", typ.Kind())) + } +} + +// Convert from a native Go value to an AWK value +func fromNative(v reflect.Value) value { + switch v.Kind() { + case reflect.Bool: + return boolean(v.Bool()) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return num(float64(v.Int())) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + return num(float64(v.Uint())) + case reflect.Float32, reflect.Float64: + return num(v.Float()) + case reflect.String: + return str(v.String()) + case reflect.Slice: + if b, ok := v.Interface().([]byte); ok { + return str(string(b)) + } + // Shouldn't happen: prevented by checkNativeFunc + panic(fmt.Sprintf("unexpected return slice: %s", v.Type().Elem().Kind())) + default: + // Shouldn't happen: prevented by checkNativeFunc + panic(fmt.Sprintf("unexpected return type: %s", v.Kind())) + } +} + +// Used for caching native function type information on init +type nativeFunc struct { + isVariadic bool + in []reflect.Type + value reflect.Value +} + +// Check and initialize native functions +func (p *interp) initNativeFuncs(funcs map[string]interface{}) error { + for name, f := range funcs { + err := checkNativeFunc(name, f) + if err != nil { + return err + } + } + + // Sort functions by name, then use those indexes to build slice + // (this has to match how the parser sets the indexes). + names := make([]string, 0, len(funcs)) + for name := range funcs { + names = append(names, name) + } + sort.Strings(names) + p.nativeFuncs = make([]nativeFunc, len(names)) + for i, name := range names { + f := funcs[name] + typ := reflect.TypeOf(f) + in := make([]reflect.Type, typ.NumIn()) + for j := 0; j < len(in); j++ { + in[j] = typ.In(j) + } + p.nativeFuncs[i] = nativeFunc{ + isVariadic: typ.IsVariadic(), + in: in, + value: reflect.ValueOf(f), + } + } + return nil +} + +// Got this trick from the Go stdlib text/template source +var errorType = reflect.TypeOf((*error)(nil)).Elem() + +// Check that native function with given name is okay to call from +// AWK, return an *interp.Error if not. This checks that f is actually +// a function, and that its parameter and return types are good. +func checkNativeFunc(name string, f interface{}) error { + if KeywordToken(name) != ILLEGAL { + return newError("can't use keyword %q as native function name", name) + } + + typ := reflect.TypeOf(f) + if typ.Kind() != reflect.Func { + return newError("native function %q is not a function", name) + } + for i := 0; i < typ.NumIn(); i++ { + param := typ.In(i) + if typ.IsVariadic() && i == typ.NumIn()-1 { + param = param.Elem() + } + if !validNativeType(param) { + return newError("native function %q param %d is not int or string", name, i) + } + } + + switch typ.NumOut() { + case 0: + // No return value is fine + case 1: + // Single scalar return value is fine + if !validNativeType(typ.Out(0)) { + return newError("native function %q return value is not int or string", name) + } + case 2: + // Returning (scalar, error) is handled too + if !validNativeType(typ.Out(0)) { + return newError("native function %q first return value is not int or string", name) + } + if typ.Out(1) != errorType { + return newError("native function %q second return value is not an error", name) + } + default: + return newError("native function %q returns more than two values", name) + } + return nil +} + +// Return true if typ is a valid parameter or return type. +func validNativeType(typ reflect.Type) bool { + switch typ.Kind() { + case reflect.Bool: + return true + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + return true + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + return true + case reflect.Float32, reflect.Float64: + return true + case reflect.String: + return true + case reflect.Slice: + // Only allow []byte (convert to string in AWK) + return typ.Elem().Kind() == reflect.Uint8 + default: + return false + } +} + +// Guts of the split() function +func (p *interp) split(s string, scope VarScope, index int, fs string) (int, error) { + var parts []string + if fs == " " { + parts = strings.Fields(s) + } else if s == "" { + // NF should be 0 on empty line + } else if utf8.RuneCountInString(fs) <= 1 { + parts = strings.Split(s, fs) + } else { + re, err := p.compileRegex(fs) + if err != nil { + return 0, err + } + parts = re.Split(s, -1) + } + array := make(map[string]value, len(parts)) + for i, part := range parts { + array[strconv.Itoa(i+1)] = numStr(part) + } + p.arrays[p.getArrayIndex(scope, index)] = array + return len(array), nil +} + +// Guts of the sub() and gsub() functions +func (p *interp) sub(regex, repl, in string, global bool) (out string, num int, err error) { + re, err := p.compileRegex(regex) + if err != nil { + return "", 0, err + } + count := 0 + out = re.ReplaceAllStringFunc(in, func(s string) string { + // Only do the first replacement for sub(), or all for gsub() + if !global && count > 0 { + return s + } + count++ + // Handle & (ampersand) properly in replacement string + r := make([]byte, 0, 64) // Up to 64 byte replacement won't require heap allocation + for i := 0; i < len(repl); i++ { + switch repl[i] { + case '&': + r = append(r, s...) + case '\\': + i++ + if i < len(repl) { + switch repl[i] { + case '&': + r = append(r, '&') + case '\\': + r = append(r, '\\') + default: + r = append(r, '\\', repl[i]) + } + } else { + r = append(r, '\\') + } + default: + r = append(r, repl[i]) + } + } + return string(r) + }) + return out, count, nil +} + +type cachedFormat struct { + format string + types []byte +} + +// Parse given sprintf format string into Go format string, along with +// type conversion specifiers. Output is memoized in a simple cache +// for performance. +func (p *interp) parseFmtTypes(s string) (format string, types []byte, err error) { + if item, ok := p.formatCache[s]; ok { + return item.format, item.types, nil + } + + out := []byte(s) + for i := 0; i < len(s); i++ { + if s[i] == '%' { + i++ + if i >= len(s) { + return "", nil, errors.New("expected type specifier after %") + } + if s[i] == '%' { + continue + } + for i < len(s) && bytes.IndexByte([]byte(" .-+*#0123456789"), s[i]) >= 0 { + if s[i] == '*' { + types = append(types, 'd') + } + i++ + } + if i >= len(s) { + return "", nil, errors.New("expected type specifier after %") + } + var t byte + switch s[i] { + case 's': + t = 's' + case 'd', 'i', 'o', 'x', 'X': + t = 'd' + case 'f', 'e', 'E', 'g', 'G': + t = 'f' + case 'u': + t = 'u' + out[i] = 'd' + case 'c': + t = 'c' + out[i] = 's' + default: + return "", nil, fmt.Errorf("invalid format type %q", s[i]) + } + types = append(types, t) + } + } + + // Dumb, non-LRU cache: just cache the first N formats + format = string(out) + if len(p.formatCache) < maxCachedFormats { + p.formatCache[s] = cachedFormat{format, types} + } + return format, types, nil +} + +// Guts of sprintf() function (also used by "printf" statement) +func (p *interp) sprintf(format string, args []value) (string, error) { + format, types, err := p.parseFmtTypes(format) + if err != nil { + return "", newError("format error: %s", err) + } + if len(types) > len(args) { + return "", newError("format error: got %d args, expected %d", len(args), len(types)) + } + converted := make([]interface{}, len(types)) + for i, t := range types { + a := args[i] + var v interface{} + switch t { + case 's': + v = p.toString(a) + case 'd': + v = int(a.num()) + case 'f': + v = a.num() + case 'u': + v = uint32(a.num()) + case 'c': + var c []byte + n, isStr := a.isTrueStr() + if isStr { + s := p.toString(a) + if len(s) > 0 { + c = []byte{s[0]} + } else { + c = []byte{0} + } + } else { + // Follow the behaviour of awk and mawk, where %c + // operates on bytes (0-255), not Unicode codepoints + c = []byte{byte(n)} + } + v = c + } + converted[i] = v + } + return fmt.Sprintf(format, converted...), nil +} diff --git a/play/vendor/github.com/benhoyt/goawk/interp/interp.go b/play/vendor/github.com/benhoyt/goawk/interp/interp.go new file mode 100644 index 0000000..ee2dbaf --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/interp/interp.go @@ -0,0 +1,1369 @@ +// Package interp is the GoAWK interpreter (a simple tree-walker). +// +// For basic usage, use the Exec function. For more complicated use +// cases and configuration options, first use the parser package to +// parse the AWK source, and then use ExecProgram to execute it with +// a specific configuration. +// +package interp + +import ( + "bufio" + "errors" + "fmt" + "io" + "io/ioutil" + "math" + "math/rand" + "os" + "os/exec" + "regexp" + "runtime" + "strconv" + "strings" + "unicode/utf8" + + . "github.com/benhoyt/goawk/internal/ast" + . "github.com/benhoyt/goawk/lexer" + . "github.com/benhoyt/goawk/parser" +) + +var ( + errExit = errors.New("exit") + errBreak = errors.New("break") + errContinue = errors.New("continue") + errNext = errors.New("next") + + crlfNewline = runtime.GOOS == "windows" + varRegex = regexp.MustCompile(`^([_a-zA-Z][_a-zA-Z0-9]*)=(.*)`) +) + +// Error (actually *Error) is returned by Exec and Eval functions on +// interpreter error, for example a negative field index. +type Error struct { + message string +} + +func (e *Error) Error() string { + return e.message +} + +func newError(format string, args ...interface{}) error { + return &Error{fmt.Sprintf(format, args...)} +} + +type returnValue struct { + Value value +} + +func (r returnValue) Error() string { + return "" +} + +type interp struct { + // Input/output + output io.Writer + errorOutput io.Writer + scanner *bufio.Scanner + scanners map[string]*bufio.Scanner + stdin io.Reader + filenameIndex int + hadFiles bool + input io.Reader + inputStreams map[string]io.ReadCloser + outputStreams map[string]io.WriteCloser + commands map[string]*exec.Cmd + noExec bool + noFileWrites bool + noFileReads bool + shellCommand []string + + // Scalars, arrays, and function state + globals []value + stack []value + frame []value + arrays []map[string]value + localArrays [][]int + callDepth int + nativeFuncs []nativeFunc + + // File, line, and field handling + filename value + line string + lineIsTrueStr bool + lineNum int + fileLineNum int + fields []string + fieldsIsTrueStr []bool + numFields int + haveFields bool + + // Built-in variables + argc int + convertFormat string + outputFormat string + fieldSep string + fieldSepRegex *regexp.Regexp + recordSep string + recordSepRegex *regexp.Regexp + recordTerminator string + outputFieldSep string + outputRecordSep string + subscriptSep string + matchLength int + matchStart int + + // Misc pieces of state + program *Program + random *rand.Rand + randSeed float64 + exitStatus int + regexCache map[string]*regexp.Regexp + formatCache map[string]cachedFormat + bytes bool +} + +// Various const configuration. Could make these part of Config if +// we wanted to, but no need for now. +const ( + maxCachedRegexes = 100 + maxCachedFormats = 100 + maxRecordLength = 10 * 1024 * 1024 // 10MB seems like plenty + maxFieldIndex = 1000000 + maxCallDepth = 1000 + initialStackSize = 100 + outputBufSize = 64 * 1024 + inputBufSize = 64 * 1024 +) + +// Config defines the interpreter configuration for ExecProgram. +type Config struct { + // Standard input reader (defaults to os.Stdin) + Stdin io.Reader + + // Writer for normal output (defaults to a buffered version of + // os.Stdout) + Output io.Writer + + // Writer for non-fatal error messages (defaults to os.Stderr) + Error io.Writer + + // The name of the executable (accessible via ARGV[0]) + Argv0 string + + // Input arguments (usually filenames): empty slice means read + // only from Stdin, and a filename of "-" means read from Stdin + // instead of a real file. + Args []string + + // List of name-value pairs for variables to set before executing + // the program (useful for setting FS and other built-in + // variables, for example []string{"FS", ",", "OFS", ","}). + Vars []string + + // Map of named Go functions to allow calling from AWK. You need + // to pass this same map to the parser.ParseProgram config. + // + // Functions can have any number of parameters, and variadic + // functions are supported. Functions can have no return values, + // one return value, or two return values (result, error). In the + // two-value case, if the function returns a non-nil error, + // program execution will stop and ExecProgram will return that + // error. + // + // Apart from the error return value, the types supported are + // bool, integer and floating point types (excluding complex), + // and string types (string or []byte). + // + // It's not an error to call a Go function from AWK with fewer + // arguments than it has parameters in Go. In this case, the zero + // value will be used for any additional parameters. However, it + // is a parse error to call a non-variadic function from AWK with + // more arguments than it has parameters in Go. + // + // Functions defined with the "function" keyword in AWK code + // take precedence over functions in Funcs. + Funcs map[string]interface{} + + // Set one or more of these to true to prevent unsafe behaviours, + // useful when executing untrusted scripts: + // + // * NoExec prevents system calls via system() or pipe operator + // * NoFileWrites prevents writing to files via '>' or '>>' + // * NoFileReads prevents reading from files via getline or the + // filenames in Args + NoExec bool + NoFileWrites bool + NoFileReads bool + + // Exec args used to run system shell. Typically, this will + // be {"/bin/sh", "-c"} + ShellCommand []string + + // List of name-value pairs to be assigned to the ENVIRON special + // array, for example []string{"USER", "bob", "HOME", "/home/bob"}. + // If nil (the default), values from os.Environ() are used. + Environ []string + + // Set to true to use byte indexes instead of character indexes for + // the index, length, match, and substr functions. Note: the default + // was changed from bytes to characters in GoAWK version 1.11. + Bytes bool +} + +// ExecProgram executes the parsed program using the given interpreter +// config, returning the exit status code of the program. Error is nil +// on successful execution of the program, even if the program returns +// a non-zero status code. +func ExecProgram(program *Program, config *Config) (int, error) { + if len(config.Vars)%2 != 0 { + return 0, newError("length of config.Vars must be a multiple of 2, not %d", len(config.Vars)) + } + if len(config.Environ)%2 != 0 { + return 0, newError("length of config.Environ must be a multiple of 2, not %d", len(config.Environ)) + } + + p := &interp{program: program} + + // Allocate memory for variables + p.globals = make([]value, len(program.Scalars)) + p.stack = make([]value, 0, initialStackSize) + p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize) + for i := 0; i < len(program.Arrays); i++ { + p.arrays[i] = make(map[string]value) + } + + // Initialize defaults + p.regexCache = make(map[string]*regexp.Regexp, 10) + p.formatCache = make(map[string]cachedFormat, 10) + p.randSeed = 1.0 + seed := math.Float64bits(p.randSeed) + p.random = rand.New(rand.NewSource(int64(seed))) + p.convertFormat = "%.6g" + p.outputFormat = "%.6g" + p.fieldSep = " " + p.recordSep = "\n" + p.outputFieldSep = " " + p.outputRecordSep = "\n" + p.subscriptSep = "\x1c" + p.noExec = config.NoExec + p.noFileWrites = config.NoFileWrites + p.noFileReads = config.NoFileReads + p.bytes = config.Bytes + err := p.initNativeFuncs(config.Funcs) + if err != nil { + return 0, err + } + + // Setup ARGV and other variables from config + argvIndex := program.Arrays["ARGV"] + p.setArrayValue(ScopeGlobal, argvIndex, "0", str(config.Argv0)) + p.argc = len(config.Args) + 1 + for i, arg := range config.Args { + p.setArrayValue(ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg)) + } + p.filenameIndex = 1 + p.hadFiles = false + for i := 0; i < len(config.Vars); i += 2 { + err := p.setVarByName(config.Vars[i], config.Vars[i+1]) + if err != nil { + return 0, err + } + } + + // Setup ENVIRON from config or environment variables + environIndex := program.Arrays["ENVIRON"] + if config.Environ != nil { + for i := 0; i < len(config.Environ); i += 2 { + p.setArrayValue(ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1])) + } + } else { + for _, kv := range os.Environ() { + eq := strings.IndexByte(kv, '=') + if eq >= 0 { + p.setArrayValue(ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:])) + } + } + } + + // Setup system shell command + if len(config.ShellCommand) != 0 { + p.shellCommand = config.ShellCommand + } else { + executable := "/bin/sh" + if runtime.GOOS == "windows" { + executable = "sh" + } + p.shellCommand = []string{executable, "-c"} + } + + // Setup I/O structures + p.stdin = config.Stdin + if p.stdin == nil { + p.stdin = os.Stdin + } + p.output = config.Output + if p.output == nil { + p.output = bufio.NewWriterSize(os.Stdout, outputBufSize) + } + p.errorOutput = config.Error + if p.errorOutput == nil { + p.errorOutput = os.Stderr + } + p.inputStreams = make(map[string]io.ReadCloser) + p.outputStreams = make(map[string]io.WriteCloser) + p.commands = make(map[string]*exec.Cmd) + p.scanners = make(map[string]*bufio.Scanner) + defer p.closeAll() + + // Execute the program! BEGIN, then pattern/actions, then END + err = p.execBeginEnd(program.Begin) + if err != nil && err != errExit { + return 0, err + } + if program.Actions == nil && program.End == nil { + return p.exitStatus, nil + } + if err != errExit { + err = p.execActions(program.Actions) + if err != nil && err != errExit { + return 0, err + } + } + err = p.execBeginEnd(program.End) + if err != nil && err != errExit { + return 0, err + } + return p.exitStatus, nil +} + +// Exec provides a simple way to parse and execute an AWK program +// with the given field separator. Exec reads input from the given +// reader (nil means use os.Stdin) and writes output to stdout (nil +// means use a buffered version of os.Stdout). +func Exec(source, fieldSep string, input io.Reader, output io.Writer) error { + prog, err := ParseProgram([]byte(source), nil) + if err != nil { + return err + } + config := &Config{ + Stdin: input, + Output: output, + Error: ioutil.Discard, + Vars: []string{"FS", fieldSep}, + } + _, err = ExecProgram(prog, config) + return err +} + +// Execute BEGIN or END blocks (may be multiple) +func (p *interp) execBeginEnd(beginEnd []Stmts) error { + for _, statements := range beginEnd { + err := p.executes(statements) + if err != nil { + return err + } + } + return nil +} + +// Execute pattern-action blocks (may be multiple) +func (p *interp) execActions(actions []Action) error { + inRange := make([]bool, len(actions)) +lineLoop: + for { + // Read and setup next line of input + line, err := p.nextLine() + if err == io.EOF { + break + } + if err != nil { + return err + } + p.setLine(line, false) + + // Execute all the pattern-action blocks for each line + for i, action := range actions { + // First determine whether the pattern matches + matched := false + switch len(action.Pattern) { + case 0: + // No pattern is equivalent to pattern evaluating to true + matched = true + case 1: + // Single boolean pattern + v, err := p.eval(action.Pattern[0]) + if err != nil { + return err + } + matched = v.boolean() + case 2: + // Range pattern (matches between start and stop lines) + if !inRange[i] { + v, err := p.eval(action.Pattern[0]) + if err != nil { + return err + } + inRange[i] = v.boolean() + } + matched = inRange[i] + if inRange[i] { + v, err := p.eval(action.Pattern[1]) + if err != nil { + return err + } + inRange[i] = !v.boolean() + } + } + if !matched { + continue + } + + // No action is equivalent to { print $0 } + if action.Stmts == nil { + err := p.printLine(p.output, p.line) + if err != nil { + return err + } + continue + } + + // Execute the body statements + err := p.executes(action.Stmts) + if err == errNext { + // "next" statement skips straight to next line + continue lineLoop + } + if err != nil { + return err + } + } + } + return nil +} + +// Execute a block of multiple statements +func (p *interp) executes(stmts Stmts) error { + for _, s := range stmts { + err := p.execute(s) + if err != nil { + return err + } + } + return nil +} + +// Execute a single statement +func (p *interp) execute(stmt Stmt) error { + switch s := stmt.(type) { + case *ExprStmt: + // Expression statement: simply throw away the expression value + _, err := p.eval(s.Expr) + return err + + case *PrintStmt: + // Print OFS-separated args followed by ORS (usually newline) + var line string + if len(s.Args) > 0 { + strs := make([]string, len(s.Args)) + for i, a := range s.Args { + v, err := p.eval(a) + if err != nil { + return err + } + strs[i] = v.str(p.outputFormat) + } + line = strings.Join(strs, p.outputFieldSep) + } else { + // "print" with no args is equivalent to "print $0" + line = p.line + } + output, err := p.getOutputStream(s.Redirect, s.Dest) + if err != nil { + return err + } + return p.printLine(output, line) + + case *PrintfStmt: + // printf(fmt, arg1, arg2, ...): uses our version of sprintf + // to build the formatted string and then print that + formatValue, err := p.eval(s.Args[0]) + if err != nil { + return err + } + format := p.toString(formatValue) + args := make([]value, len(s.Args)-1) + for i, a := range s.Args[1:] { + args[i], err = p.eval(a) + if err != nil { + return err + } + } + output, err := p.getOutputStream(s.Redirect, s.Dest) + if err != nil { + return err + } + str, err := p.sprintf(format, args) + if err != nil { + return err + } + err = writeOutput(output, str) + if err != nil { + return err + } + + case *IfStmt: + v, err := p.eval(s.Cond) + if err != nil { + return err + } + if v.boolean() { + return p.executes(s.Body) + } else { + // Doesn't do anything if s.Else is nil + return p.executes(s.Else) + } + + case *ForStmt: + // C-like for loop with pre-statement, cond, and post-statement + if s.Pre != nil { + err := p.execute(s.Pre) + if err != nil { + return err + } + } + for { + if s.Cond != nil { + v, err := p.eval(s.Cond) + if err != nil { + return err + } + if !v.boolean() { + break + } + } + err := p.executes(s.Body) + if err == errBreak { + break + } + if err != nil && err != errContinue { + return err + } + if s.Post != nil { + err := p.execute(s.Post) + if err != nil { + return err + } + } + } + + case *ForInStmt: + // Foreach-style "for (key in array)" loop + array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)] + for index := range array { + err := p.setVar(s.Var.Scope, s.Var.Index, str(index)) + if err != nil { + return err + } + err = p.executes(s.Body) + if err == errBreak { + break + } + if err == errContinue { + continue + } + if err != nil { + return err + } + } + + case *ReturnStmt: + // Return statement uses special error value which is "caught" + // by the callUser function + var v value + if s.Value != nil { + var err error + v, err = p.eval(s.Value) + if err != nil { + return err + } + } + return returnValue{v} + + case *WhileStmt: + // Simple "while (cond)" loop + for { + v, err := p.eval(s.Cond) + if err != nil { + return err + } + if !v.boolean() { + break + } + err = p.executes(s.Body) + if err == errBreak { + break + } + if err == errContinue { + continue + } + if err != nil { + return err + } + } + + case *DoWhileStmt: + // Do-while loop (tests condition after executing body) + for { + err := p.executes(s.Body) + if err == errBreak { + break + } + if err == errContinue { + continue + } + if err != nil { + return err + } + v, err := p.eval(s.Cond) + if err != nil { + return err + } + if !v.boolean() { + break + } + } + + // Break, continue, next, and exit statements + case *BreakStmt: + return errBreak + case *ContinueStmt: + return errContinue + case *NextStmt: + return errNext + case *ExitStmt: + if s.Status != nil { + status, err := p.eval(s.Status) + if err != nil { + return err + } + p.exitStatus = int(status.num()) + } + // Return special errExit value "caught" by top-level executor + return errExit + + case *DeleteStmt: + if len(s.Index) > 0 { + // Delete single key from array + index, err := p.evalIndex(s.Index) + if err != nil { + return err + } + array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)] + delete(array, index) // Does nothing if key isn't present + } else { + // Delete entire array + array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)] + for k := range array { + delete(array, k) + } + } + + case *BlockStmt: + // Nested block (just syntax, doesn't do anything) + return p.executes(s.Body) + + default: + // Should never happen + panic(fmt.Sprintf("unexpected stmt type: %T", stmt)) + } + return nil +} + +// Evaluate a single expression, return expression value and error +func (p *interp) eval(expr Expr) (value, error) { + switch e := expr.(type) { + case *NumExpr: + // Number literal + return num(e.Value), nil + + case *StrExpr: + // String literal + return str(e.Value), nil + + case *FieldExpr: + // $n field expression + index, err := p.eval(e.Index) + if err != nil { + return null(), err + } + return p.getField(int(index.num())) + + case *VarExpr: + // Variable read expression (scope is global, local, or special) + return p.getVar(e.Scope, e.Index), nil + + case *RegExpr: + // Stand-alone /regex/ is equivalent to: $0 ~ /regex/ + re, err := p.compileRegex(e.Regex) + if err != nil { + return null(), err + } + return boolean(re.MatchString(p.line)), nil + + case *BinaryExpr: + // Binary expression. Note that && and || are special cases + // as they're short-circuit operators. + left, err := p.eval(e.Left) + if err != nil { + return null(), err + } + switch e.Op { + case AND: + if !left.boolean() { + return num(0), nil + } + right, err := p.eval(e.Right) + if err != nil { + return null(), err + } + return boolean(right.boolean()), nil + case OR: + if left.boolean() { + return num(1), nil + } + right, err := p.eval(e.Right) + if err != nil { + return null(), err + } + return boolean(right.boolean()), nil + default: + right, err := p.eval(e.Right) + if err != nil { + return null(), err + } + return p.evalBinary(e.Op, left, right) + } + + case *IncrExpr: + // Pre-increment, post-increment, pre-decrement, post-decrement + + // First evaluate the expression, but remember array or field + // index, so we don't evaluate part of the expression twice + exprValue, arrayIndex, fieldIndex, err := p.evalForAugAssign(e.Expr) + if err != nil { + return null(), err + } + + // Then convert to number and increment or decrement + exprNum := exprValue.num() + var incr float64 + if e.Op == INCR { + incr = exprNum + 1 + } else { + incr = exprNum - 1 + } + incrValue := num(incr) + + // Finally, assign back to expression and return the correct value + err = p.assignAug(e.Expr, arrayIndex, fieldIndex, incrValue) + if err != nil { + return null(), err + } + if e.Pre { + return incrValue, nil + } else { + return num(exprNum), nil + } + + case *AssignExpr: + // Assignment expression (returns right-hand side) + right, err := p.eval(e.Right) + if err != nil { + return null(), err + } + err = p.assign(e.Left, right) + if err != nil { + return null(), err + } + return right, nil + + case *AugAssignExpr: + // Augmented assignment like += (returns right-hand side) + right, err := p.eval(e.Right) + if err != nil { + return null(), err + } + left, arrayIndex, fieldIndex, err := p.evalForAugAssign(e.Left) + if err != nil { + return null(), err + } + right, err = p.evalBinary(e.Op, left, right) + if err != nil { + return null(), err + } + err = p.assignAug(e.Left, arrayIndex, fieldIndex, right) + if err != nil { + return null(), err + } + return right, nil + + case *CondExpr: + // C-like ?: ternary conditional operator + cond, err := p.eval(e.Cond) + if err != nil { + return null(), err + } + if cond.boolean() { + return p.eval(e.True) + } else { + return p.eval(e.False) + } + + case *IndexExpr: + // Read value from array by index + index, err := p.evalIndex(e.Index) + if err != nil { + return null(), err + } + return p.getArrayValue(e.Array.Scope, e.Array.Index, index), nil + + case *CallExpr: + // Call a builtin function + return p.callBuiltin(e.Func, e.Args) + + case *UnaryExpr: + // Unary ! or + or - + v, err := p.eval(e.Value) + if err != nil { + return null(), err + } + return p.evalUnary(e.Op, v), nil + + case *InExpr: + // "key in array" expression + index, err := p.evalIndex(e.Index) + if err != nil { + return null(), err + } + array := p.arrays[p.getArrayIndex(e.Array.Scope, e.Array.Index)] + _, ok := array[index] + return boolean(ok), nil + + case *UserCallExpr: + // Call user-defined or native Go function + if e.Native { + return p.callNative(e.Index, e.Args) + } else { + return p.callUser(e.Index, e.Args) + } + + case *GetlineExpr: + // Getline: read line from input + var line string + switch { + case e.Command != nil: + nameValue, err := p.eval(e.Command) + if err != nil { + return null(), err + } + name := p.toString(nameValue) + scanner, err := p.getInputScannerPipe(name) + if err != nil { + return null(), err + } + if !scanner.Scan() { + if err := scanner.Err(); err != nil { + return num(-1), nil + } + return num(0), nil + } + line = scanner.Text() + case e.File != nil: + nameValue, err := p.eval(e.File) + if err != nil { + return null(), err + } + name := p.toString(nameValue) + scanner, err := p.getInputScannerFile(name) + if err != nil { + if _, ok := err.(*os.PathError); ok { + // File not found is not a hard error, getline just returns -1. + // See: https://github.com/benhoyt/goawk/issues/41 + return num(-1), nil + } + return null(), err + } + if !scanner.Scan() { + if err := scanner.Err(); err != nil { + return num(-1), nil + } + return num(0), nil + } + line = scanner.Text() + default: + p.flushOutputAndError() // Flush output in case they've written a prompt + var err error + line, err = p.nextLine() + if err == io.EOF { + return num(0), nil + } + if err != nil { + return num(-1), nil + } + } + if e.Target != nil { + err := p.assign(e.Target, numStr(line)) + if err != nil { + return null(), err + } + } else { + p.setLine(line, false) + } + return num(1), nil + + default: + // Should never happen + panic(fmt.Sprintf("unexpected expr type: %T", expr)) + } +} + +func (p *interp) evalForAugAssign(expr Expr) (v value, arrayIndex string, fieldIndex int, err error) { + switch expr := expr.(type) { + case *VarExpr: + v = p.getVar(expr.Scope, expr.Index) + case *IndexExpr: + arrayIndex, err = p.evalIndex(expr.Index) + if err != nil { + return null(), "", 0, err + } + v = p.getArrayValue(expr.Array.Scope, expr.Array.Index, arrayIndex) + case *FieldExpr: + index, err := p.eval(expr.Index) + if err != nil { + return null(), "", 0, err + } + fieldIndex = int(index.num()) + v, err = p.getField(fieldIndex) + if err != nil { + return null(), "", 0, err + } + } + return v, arrayIndex, fieldIndex, nil +} + +func (p *interp) assignAug(expr Expr, arrayIndex string, fieldIndex int, v value) error { + switch expr := expr.(type) { + case *VarExpr: + return p.setVar(expr.Scope, expr.Index, v) + case *IndexExpr: + p.setArrayValue(expr.Array.Scope, expr.Array.Index, arrayIndex, v) + default: // *FieldExpr + return p.setField(fieldIndex, p.toString(v)) + } + return nil +} + +// Get a variable's value by index in given scope +func (p *interp) getVar(scope VarScope, index int) value { + switch scope { + case ScopeGlobal: + return p.globals[index] + case ScopeLocal: + return p.frame[index] + default: // ScopeSpecial + switch index { + case V_NF: + p.ensureFields() + return num(float64(p.numFields)) + case V_NR: + return num(float64(p.lineNum)) + case V_RLENGTH: + return num(float64(p.matchLength)) + case V_RSTART: + return num(float64(p.matchStart)) + case V_FNR: + return num(float64(p.fileLineNum)) + case V_ARGC: + return num(float64(p.argc)) + case V_CONVFMT: + return str(p.convertFormat) + case V_FILENAME: + return p.filename + case V_FS: + return str(p.fieldSep) + case V_OFMT: + return str(p.outputFormat) + case V_OFS: + return str(p.outputFieldSep) + case V_ORS: + return str(p.outputRecordSep) + case V_RS: + return str(p.recordSep) + case V_RT: + return str(p.recordTerminator) + case V_SUBSEP: + return str(p.subscriptSep) + default: + panic(fmt.Sprintf("unexpected special variable index: %d", index)) + } + } +} + +// Set a variable by name (specials and globals only) +func (p *interp) setVarByName(name, value string) error { + index := SpecialVarIndex(name) + if index > 0 { + return p.setVar(ScopeSpecial, index, numStr(value)) + } + index, ok := p.program.Scalars[name] + if ok { + return p.setVar(ScopeGlobal, index, numStr(value)) + } + // Ignore variables that aren't defined in program + return nil +} + +// Set a variable by index in given scope to given value +func (p *interp) setVar(scope VarScope, index int, v value) error { + switch scope { + case ScopeGlobal: + p.globals[index] = v + return nil + case ScopeLocal: + p.frame[index] = v + return nil + default: // ScopeSpecial + switch index { + case V_NF: + numFields := int(v.num()) + if numFields < 0 { + return newError("NF set to negative value: %d", numFields) + } + if numFields > maxFieldIndex { + return newError("NF set too large: %d", numFields) + } + p.ensureFields() + p.numFields = numFields + if p.numFields < len(p.fields) { + p.fields = p.fields[:p.numFields] + p.fieldsIsTrueStr = p.fieldsIsTrueStr[:p.numFields] + } + for i := len(p.fields); i < p.numFields; i++ { + p.fields = append(p.fields, "") + p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false) + } + p.line = strings.Join(p.fields, p.outputFieldSep) + p.lineIsTrueStr = true + case V_NR: + p.lineNum = int(v.num()) + case V_RLENGTH: + p.matchLength = int(v.num()) + case V_RSTART: + p.matchStart = int(v.num()) + case V_FNR: + p.fileLineNum = int(v.num()) + case V_ARGC: + p.argc = int(v.num()) + case V_CONVFMT: + p.convertFormat = p.toString(v) + case V_FILENAME: + p.filename = v + case V_FS: + p.fieldSep = p.toString(v) + if utf8.RuneCountInString(p.fieldSep) > 1 { // compare to interp.ensureFields + re, err := regexp.Compile(p.fieldSep) + if err != nil { + return newError("invalid regex %q: %s", p.fieldSep, err) + } + p.fieldSepRegex = re + } + case V_OFMT: + p.outputFormat = p.toString(v) + case V_OFS: + p.outputFieldSep = p.toString(v) + case V_ORS: + p.outputRecordSep = p.toString(v) + case V_RS: + p.recordSep = p.toString(v) + switch { // compare to interp.newScanner + case len(p.recordSep) <= 1: + // Simple cases use specialized splitters, not regex + case utf8.RuneCountInString(p.recordSep) == 1: + // Multi-byte unicode char falls back to regex splitter + sep := regexp.QuoteMeta(p.recordSep) // not strictly necessary as no multi-byte chars are regex meta chars + p.recordSepRegex = regexp.MustCompile(sep) + default: + re, err := regexp.Compile(p.recordSep) + if err != nil { + return newError("invalid regex %q: %s", p.recordSep, err) + } + p.recordSepRegex = re + } + case V_RT: + p.recordTerminator = p.toString(v) + case V_SUBSEP: + p.subscriptSep = p.toString(v) + default: + panic(fmt.Sprintf("unexpected special variable index: %d", index)) + } + return nil + } +} + +// Determine the index of given array into the p.arrays slice. Global +// arrays are just at p.arrays[index], local arrays have to be looked +// up indirectly. +func (p *interp) getArrayIndex(scope VarScope, index int) int { + if scope == ScopeGlobal { + return index + } else { + return p.localArrays[len(p.localArrays)-1][index] + } +} + +// Get a value from given array by key (index) +func (p *interp) getArrayValue(scope VarScope, arrayIndex int, index string) value { + resolved := p.getArrayIndex(scope, arrayIndex) + array := p.arrays[resolved] + v, ok := array[index] + if !ok { + // Strangely, per the POSIX spec, "Any other reference to a + // nonexistent array element [apart from "in" expressions] + // shall automatically create it." + array[index] = v + } + return v +} + +// Set a value in given array by key (index) +func (p *interp) setArrayValue(scope VarScope, arrayIndex int, index string, v value) { + resolved := p.getArrayIndex(scope, arrayIndex) + p.arrays[resolved][index] = v +} + +// Get the value of given numbered field, equivalent to "$index" +func (p *interp) getField(index int) (value, error) { + if index < 0 { + return null(), newError("field index negative: %d", index) + } + if index == 0 { + if p.lineIsTrueStr { + return str(p.line), nil + } else { + return numStr(p.line), nil + } + } + p.ensureFields() + if index > len(p.fields) { + return str(""), nil + } + if p.fieldsIsTrueStr[index-1] { + return str(p.fields[index-1]), nil + } else { + return numStr(p.fields[index-1]), nil + } +} + +// Sets a single field, equivalent to "$index = value" +func (p *interp) setField(index int, value string) error { + if index == 0 { + p.setLine(value, true) + return nil + } + if index < 0 { + return newError("field index negative: %d", index) + } + if index > maxFieldIndex { + return newError("field index too large: %d", index) + } + // If there aren't enough fields, add empty string fields in between + p.ensureFields() + for i := len(p.fields); i < index; i++ { + p.fields = append(p.fields, "") + p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, true) + } + p.fields[index-1] = value + p.fieldsIsTrueStr[index-1] = true + p.numFields = len(p.fields) + p.line = strings.Join(p.fields, p.outputFieldSep) + p.lineIsTrueStr = true + return nil +} + +// Convert value to string using current CONVFMT +func (p *interp) toString(v value) string { + return v.str(p.convertFormat) +} + +// Compile regex string (or fetch from regex cache) +func (p *interp) compileRegex(regex string) (*regexp.Regexp, error) { + if re, ok := p.regexCache[regex]; ok { + return re, nil + } + re, err := regexp.Compile(regex) + if err != nil { + return nil, newError("invalid regex %q: %s", regex, err) + } + // Dumb, non-LRU cache: just cache the first N regexes + if len(p.regexCache) < maxCachedRegexes { + p.regexCache[regex] = re + } + return re, nil +} + +// Evaluate simple binary expression and return result +func (p *interp) evalBinary(op Token, l, r value) (value, error) { + // Note: cases are ordered (very roughly) in order of frequency + // of occurrence for performance reasons. Benchmark on common code + // before changing the order. + switch op { + case ADD: + return num(l.num() + r.num()), nil + case SUB: + return num(l.num() - r.num()), nil + case EQUALS: + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + return boolean(p.toString(l) == p.toString(r)), nil + } else { + return boolean(ln == rn), nil + } + case LESS: + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + return boolean(p.toString(l) < p.toString(r)), nil + } else { + return boolean(ln < rn), nil + } + case LTE: + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + return boolean(p.toString(l) <= p.toString(r)), nil + } else { + return boolean(ln <= rn), nil + } + case CONCAT: + return str(p.toString(l) + p.toString(r)), nil + case MUL: + return num(l.num() * r.num()), nil + case DIV: + rf := r.num() + if rf == 0.0 { + return null(), newError("division by zero") + } + return num(l.num() / rf), nil + case GREATER: + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + return boolean(p.toString(l) > p.toString(r)), nil + } else { + return boolean(ln > rn), nil + } + case GTE: + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + return boolean(p.toString(l) >= p.toString(r)), nil + } else { + return boolean(ln >= rn), nil + } + case NOT_EQUALS: + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + return boolean(p.toString(l) != p.toString(r)), nil + } else { + return boolean(ln != rn), nil + } + case MATCH: + re, err := p.compileRegex(p.toString(r)) + if err != nil { + return null(), err + } + matched := re.MatchString(p.toString(l)) + return boolean(matched), nil + case NOT_MATCH: + re, err := p.compileRegex(p.toString(r)) + if err != nil { + return null(), err + } + matched := re.MatchString(p.toString(l)) + return boolean(!matched), nil + case POW: + return num(math.Pow(l.num(), r.num())), nil + case MOD: + rf := r.num() + if rf == 0.0 { + return null(), newError("division by zero in mod") + } + return num(math.Mod(l.num(), rf)), nil + default: + panic(fmt.Sprintf("unexpected binary operation: %s", op)) + } +} + +// Evaluate unary expression and return result +func (p *interp) evalUnary(op Token, v value) value { + switch op { + case SUB: + return num(-v.num()) + case NOT: + return boolean(!v.boolean()) + case ADD: + return num(v.num()) + default: + panic(fmt.Sprintf("unexpected unary operation: %s", op)) + } +} + +// Perform an assignment: can assign to var, array[key], or $field +func (p *interp) assign(left Expr, right value) error { + switch left := left.(type) { + case *VarExpr: + return p.setVar(left.Scope, left.Index, right) + case *IndexExpr: + index, err := p.evalIndex(left.Index) + if err != nil { + return err + } + p.setArrayValue(left.Array.Scope, left.Array.Index, index, right) + return nil + case *FieldExpr: + index, err := p.eval(left.Index) + if err != nil { + return err + } + return p.setField(int(index.num()), p.toString(right)) + } + // Shouldn't happen + panic(fmt.Sprintf("unexpected lvalue type: %T", left)) +} + +// Evaluate an index expression to a string. Multi-valued indexes are +// separated by SUBSEP. +func (p *interp) evalIndex(indexExprs []Expr) (string, error) { + // Optimize the common case of a 1-dimensional index + if len(indexExprs) == 1 { + v, err := p.eval(indexExprs[0]) + if err != nil { + return "", err + } + return p.toString(v), nil + } + + // Up to 3-dimensional indices won't require heap allocation + indices := make([]string, 0, 3) + for _, expr := range indexExprs { + v, err := p.eval(expr) + if err != nil { + return "", err + } + indices = append(indices, p.toString(v)) + } + return strings.Join(indices, p.subscriptSep), nil +} diff --git a/play/vendor/github.com/benhoyt/goawk/interp/io.go b/play/vendor/github.com/benhoyt/goawk/interp/io.go new file mode 100644 index 0000000..ca6b1e0 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/interp/io.go @@ -0,0 +1,561 @@ +// Input/output handling for GoAWK interpreter + +package interp + +import ( + "bufio" + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "regexp" + "strconv" + "strings" + "unicode/utf8" + + . "github.com/benhoyt/goawk/internal/ast" + . "github.com/benhoyt/goawk/lexer" +) + +// Print a line of output followed by a newline +func (p *interp) printLine(writer io.Writer, line string) error { + err := writeOutput(writer, line) + if err != nil { + return err + } + return writeOutput(writer, p.outputRecordSep) +} + +// Implement a buffered version of WriteCloser so output is buffered +// when redirecting to a file (eg: print >"out") +type bufferedWriteCloser struct { + *bufio.Writer + io.Closer +} + +func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser { + writer := bufio.NewWriterSize(w, outputBufSize) + return &bufferedWriteCloser{writer, w} +} + +func (wc *bufferedWriteCloser) Close() error { + err := wc.Writer.Flush() + if err != nil { + return err + } + return wc.Closer.Close() +} + +// Determine the output stream for given redirect token and +// destination (file or pipe name) +func (p *interp) getOutputStream(redirect Token, dest Expr) (io.Writer, error) { + if redirect == ILLEGAL { + // Token "ILLEGAL" means send to standard output + return p.output, nil + } + + destValue, err := p.eval(dest) + if err != nil { + return nil, err + } + name := p.toString(destValue) + if _, ok := p.inputStreams[name]; ok { + return nil, newError("can't write to reader stream") + } + if w, ok := p.outputStreams[name]; ok { + return w, nil + } + + switch redirect { + case GREATER, APPEND: + if name == "-" { + // filename of "-" means write to stdout, eg: print "x" >"-" + return p.output, nil + } + // Write or append to file + if p.noFileWrites { + return nil, newError("can't write to file due to NoFileWrites") + } + p.flushOutputAndError() // ensure synchronization + flags := os.O_CREATE | os.O_WRONLY + if redirect == GREATER { + flags |= os.O_TRUNC + } else { + flags |= os.O_APPEND + } + w, err := os.OpenFile(name, flags, 0644) + if err != nil { + return nil, newError("output redirection error: %s", err) + } + buffered := newBufferedWriteCloser(w) + p.outputStreams[name] = buffered + return buffered, nil + + case PIPE: + // Pipe to command + if p.noExec { + return nil, newError("can't write to pipe due to NoExec") + } + cmd := p.execShell(name) + w, err := cmd.StdinPipe() + if err != nil { + return nil, newError("error connecting to stdin pipe: %v", err) + } + cmd.Stdout = p.output + cmd.Stderr = p.errorOutput + p.flushOutputAndError() // ensure synchronization + err = cmd.Start() + if err != nil { + p.printErrorf("%s\n", err) + return ioutil.Discard, nil + } + p.commands[name] = cmd + buffered := newBufferedWriteCloser(w) + p.outputStreams[name] = buffered + return buffered, nil + + default: + // Should never happen + panic(fmt.Sprintf("unexpected redirect type %s", redirect)) + } +} + +// Get input Scanner to use for "getline" based on file name +func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) { + if _, ok := p.outputStreams[name]; ok { + return nil, newError("can't read from writer stream") + } + if _, ok := p.inputStreams[name]; ok { + return p.scanners[name], nil + } + if name == "-" { + // filename of "-" means read from stdin, eg: getline <"-" + if scanner, ok := p.scanners["-"]; ok { + return scanner, nil + } + scanner := p.newScanner(p.stdin) + p.scanners[name] = scanner + return scanner, nil + } + if p.noFileReads { + return nil, newError("can't read from file due to NoFileReads") + } + r, err := os.Open(name) + if err != nil { + return nil, err // *os.PathError is handled by caller (getline returns -1) + } + scanner := p.newScanner(r) + p.scanners[name] = scanner + p.inputStreams[name] = r + return scanner, nil +} + +// Get input Scanner to use for "getline" based on pipe name +func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) { + if _, ok := p.outputStreams[name]; ok { + return nil, newError("can't read from writer stream") + } + if _, ok := p.inputStreams[name]; ok { + return p.scanners[name], nil + } + if p.noExec { + return nil, newError("can't read from pipe due to NoExec") + } + cmd := p.execShell(name) + cmd.Stdin = p.stdin + cmd.Stderr = p.errorOutput + r, err := cmd.StdoutPipe() + if err != nil { + return nil, newError("error connecting to stdout pipe: %v", err) + } + p.flushOutputAndError() // ensure synchronization + err = cmd.Start() + if err != nil { + p.printErrorf("%s\n", err) + return bufio.NewScanner(strings.NewReader("")), nil + } + scanner := p.newScanner(r) + p.commands[name] = cmd + p.inputStreams[name] = r + p.scanners[name] = scanner + return scanner, nil +} + +// Create a new buffered Scanner for reading input records +func (p *interp) newScanner(input io.Reader) *bufio.Scanner { + scanner := bufio.NewScanner(input) + switch { + case p.recordSep == "\n": + // Scanner default is to split on newlines + case p.recordSep == "": + // Empty string for RS means split on \n\n (blank lines) + splitter := blankLineSplitter{&p.recordTerminator} + scanner.Split(splitter.scan) + case len(p.recordSep) == 1: + splitter := byteSplitter{p.recordSep[0]} + scanner.Split(splitter.scan) + case utf8.RuneCountInString(p.recordSep) >= 1: + // Multi-byte and single char but multi-byte RS use regex + splitter := regexSplitter{p.recordSepRegex, &p.recordTerminator} + scanner.Split(splitter.scan) + } + buffer := make([]byte, inputBufSize) + scanner.Buffer(buffer, maxRecordLength) + return scanner +} + +// Copied from bufio/scan.go in the stdlib: I guess it's a bit more +// efficient than bytes.TrimSuffix(data, []byte("\r")) +func dropCR(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[:len(data)-1] + } + return data +} + +func dropLF(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\n' { + return data[:len(data)-1] + } + return data +} + +type blankLineSplitter struct { + terminator *string +} + +func (s blankLineSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + + // Skip newlines at beginning of data + i := 0 + for i < len(data) && (data[i] == '\n' || data[i] == '\r') { + i++ + } + if i >= len(data) { + // At end of data after newlines, skip entire data block + return i, nil, nil + } + start := i + + // Try to find two consecutive newlines (or \n\r\n for Windows) + for ; i < len(data); i++ { + if data[i] != '\n' { + continue + } + end := i + if i+1 < len(data) && data[i+1] == '\n' { + i += 2 + for i < len(data) && (data[i] == '\n' || data[i] == '\r') { + i++ // Skip newlines at end of record + } + *s.terminator = string(data[end:i]) + return i, dropCR(data[start:end]), nil + } + if i+2 < len(data) && data[i+1] == '\r' && data[i+2] == '\n' { + i += 3 + for i < len(data) && (data[i] == '\n' || data[i] == '\r') { + i++ // Skip newlines at end of record + } + *s.terminator = string(data[end:i]) + return i, dropCR(data[start:end]), nil + } + } + + // If we're at EOF, we have one final record; return it + if atEOF { + token = dropCR(dropLF(data[start:])) + *s.terminator = string(data[len(token):]) + return len(data), token, nil + } + + // Request more data + return 0, nil, nil +} + +// Splitter that splits records on the given separator byte +type byteSplitter struct { + sep byte +} + +func (s byteSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, s.sep); i >= 0 { + // We have a full sep-terminated record + return i + 1, data[:i], nil + } + // If at EOF, we have a final, non-terminated record; return it + if atEOF { + return len(data), data, nil + } + // Request more data + return 0, nil, nil +} + +// Splitter that splits records on the given regular expression +type regexSplitter struct { + re *regexp.Regexp + terminator *string +} + +func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + loc := s.re.FindIndex(data) + // Note: for a regex such as "()", loc[0]==loc[1]. Gawk behavior for this + // case is to match the entire input. + if loc != nil && loc[0] != loc[1] { + *s.terminator = string(data[loc[0]:loc[1]]) // set RT special variable + return loc[1], data[:loc[0]], nil + } + // If at EOF, we have a final, non-terminated record; return it + if atEOF { + *s.terminator = "" + return len(data), data, nil + } + // Request more data + return 0, nil, nil +} + +// Setup for a new input file with given name (empty string if stdin) +func (p *interp) setFile(filename string) { + p.filename = numStr(filename) + p.fileLineNum = 0 +} + +// Setup for a new input line (but don't parse it into fields till we +// need to) +func (p *interp) setLine(line string, isTrueStr bool) { + p.line = line + p.lineIsTrueStr = isTrueStr + p.haveFields = false +} + +// Ensure that the current line is parsed into fields, splitting it +// into fields if it hasn't been already +func (p *interp) ensureFields() { + if p.haveFields { + return + } + p.haveFields = true + + switch { + case p.fieldSep == " ": + // FS space (default) means split fields on any whitespace + p.fields = strings.Fields(p.line) + case p.line == "": + p.fields = nil + case utf8.RuneCountInString(p.fieldSep) <= 1: + // 1-char FS is handled as plain split (not regex) + p.fields = strings.Split(p.line, p.fieldSep) + default: + // Split on FS as a regex + p.fields = p.fieldSepRegex.Split(p.line, -1) + } + + // Special case for when RS=="" and FS is single character, + // split on newline in addition to FS. See more here: + // https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html + if p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 { + fields := make([]string, 0, len(p.fields)) + for _, field := range p.fields { + lines := strings.Split(field, "\n") + for _, line := range lines { + trimmed := strings.TrimSuffix(line, "\r") + fields = append(fields, trimmed) + } + } + p.fields = fields + } + + p.fieldsIsTrueStr = make([]bool, len(p.fields)) + p.numFields = len(p.fields) +} + +// Fetch next line (record) of input from current input file, opening +// next input file if done with previous one +func (p *interp) nextLine() (string, error) { + for { + if p.scanner == nil { + if prevInput, ok := p.input.(io.Closer); ok && p.input != p.stdin { + // Previous input is file, close it + _ = prevInput.Close() + } + if p.filenameIndex >= p.argc && !p.hadFiles { + // Moved past number of ARGV args and haven't seen + // any files yet, use stdin + p.input = p.stdin + p.setFile("") + p.hadFiles = true + } else { + if p.filenameIndex >= p.argc { + // Done with ARGV args, all done with input + return "", io.EOF + } + // Fetch next filename from ARGV. Can't use + // getArrayValue() here as it would set the value if + // not present + index := strconv.Itoa(p.filenameIndex) + argvIndex := p.program.Arrays["ARGV"] + argvArray := p.arrays[p.getArrayIndex(ScopeGlobal, argvIndex)] + filename := p.toString(argvArray[index]) + p.filenameIndex++ + + // Is it actually a var=value assignment? + matches := varRegex.FindStringSubmatch(filename) + if len(matches) >= 3 { + // Yep, set variable to value and keep going + err := p.setVarByName(matches[1], matches[2]) + if err != nil { + return "", err + } + continue + } else if filename == "" { + // ARGV arg is empty string, skip + p.input = nil + continue + } else if filename == "-" { + // ARGV arg is "-" meaning stdin + p.input = p.stdin + p.setFile("") + } else { + // A regular file name, open it + if p.noFileReads { + return "", newError("can't read from file due to NoFileReads") + } + input, err := os.Open(filename) + if err != nil { + return "", err + } + p.input = input + p.setFile(filename) + p.hadFiles = true + } + } + p.scanner = p.newScanner(p.input) + } + p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars + if p.scanner.Scan() { + // We scanned some input, break and return it + break + } + err := p.scanner.Err() + if err != nil { + return "", fmt.Errorf("error reading from input: %s", err) + } + // Signal loop to move onto next file + p.scanner = nil + } + + // Got a line (record) of input, return it + p.lineNum++ + p.fileLineNum++ + return p.scanner.Text(), nil +} + +// Write output string to given writer, producing correct line endings +// on Windows (CR LF). +func writeOutput(w io.Writer, s string) error { + if crlfNewline { + // First normalize to \n, then convert all newlines to \r\n + // (on Windows). NOTE: creating two new strings is almost + // certainly slow; would be better to create a custom Writer. + s = strings.Replace(s, "\r\n", "\n", -1) + s = strings.Replace(s, "\n", "\r\n", -1) + } + _, err := io.WriteString(w, s) + return err +} + +// Close all streams, commands, and so on (after program execution). +func (p *interp) closeAll() { + if prevInput, ok := p.input.(io.Closer); ok { + _ = prevInput.Close() + } + for _, r := range p.inputStreams { + _ = r.Close() + } + for _, w := range p.outputStreams { + _ = w.Close() + } + for _, cmd := range p.commands { + _ = cmd.Wait() + } + if f, ok := p.output.(flusher); ok { + _ = f.Flush() + } + if f, ok := p.errorOutput.(flusher); ok { + _ = f.Flush() + } +} + +// Flush all output streams as well as standard output. Report whether all +// streams were flushed successfully (logging error(s) if not). +func (p *interp) flushAll() bool { + allGood := true + for name, writer := range p.outputStreams { + allGood = allGood && p.flushWriter(name, writer) + } + if _, ok := p.output.(flusher); ok { + // User-provided output may or may not be flushable + allGood = allGood && p.flushWriter("stdout", p.output) + } + return allGood +} + +// Flush a single, named output stream, and report whether it was flushed +// successfully (logging an error if not). +func (p *interp) flushStream(name string) bool { + writer := p.outputStreams[name] + if writer == nil { + p.printErrorf("error flushing %q: not an output file or pipe\n", name) + return false + } + return p.flushWriter(name, writer) +} + +type flusher interface { + Flush() error +} + +// Flush given output writer, and report whether it was flushed successfully +// (logging an error if not). +func (p *interp) flushWriter(name string, writer io.Writer) bool { + flusher, ok := writer.(flusher) + if !ok { + return true // not a flusher, don't error + } + err := flusher.Flush() + if err != nil { + p.printErrorf("error flushing %q: %v\n", name, err) + return false + } + return true +} + +// Flush output and error streams. +func (p *interp) flushOutputAndError() { + if flusher, ok := p.output.(flusher); ok { + _ = flusher.Flush() + } + if flusher, ok := p.errorOutput.(flusher); ok { + _ = flusher.Flush() + } +} + +// Print a message to the error output stream, flushing as necessary. +func (p *interp) printErrorf(format string, args ...interface{}) { + if flusher, ok := p.output.(flusher); ok { + _ = flusher.Flush() // ensure synchronization + } + fmt.Fprintf(p.errorOutput, format, args...) + if flusher, ok := p.errorOutput.(flusher); ok { + _ = flusher.Flush() + } +} diff --git a/play/vendor/github.com/benhoyt/goawk/interp/value.go b/play/vendor/github.com/benhoyt/goawk/interp/value.go new file mode 100644 index 0000000..ef5a422 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/interp/value.go @@ -0,0 +1,178 @@ +// GoAWK interpreter value type (not exported). + +package interp + +import ( + "fmt" + "math" + "strconv" + "strings" +) + +type valueType uint8 + +const ( + typeNull valueType = iota + typeStr + typeNum + typeNumStr +) + +// An AWK value (these are passed around by value) +type value struct { + typ valueType // Type of value + s string // String value (for typeStr and typeNumStr) + n float64 // Numeric value (for typeNum) +} + +// Create a new null value +func null() value { + return value{} +} + +// Create a new number value +func num(n float64) value { + return value{typ: typeNum, n: n} +} + +// Create a new string value +func str(s string) value { + return value{typ: typeStr, s: s} +} + +// Create a new value to represent a "numeric string" from an input field +func numStr(s string) value { + return value{typ: typeNumStr, s: s} +} + +// Create a numeric value from a Go bool +func boolean(b bool) value { + if b { + return num(1) + } + return num(0) +} + +// Return true if value is a "true string" (a string or a "numeric string" +// from an input field that can't be converted to a number). If false, +// also return the (possibly converted) number. +func (v value) isTrueStr() (float64, bool) { + switch v.typ { + case typeStr: + return 0, true + case typeNumStr: + f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64) + if err != nil { + return 0, true + } + return f, false + default: // typeNum, typeNull + return v.n, false + } +} + +// Return Go bool value of AWK value. For numbers or numeric strings, +// zero is false and everything else is true. For strings, empty +// string is false and everything else is true. +func (v value) boolean() bool { + switch v.typ { + case typeStr: + return v.s != "" + case typeNumStr: + f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64) + if err != nil { + return v.s != "" + } + return f != 0 + default: // typeNum, typeNull + return v.n != 0 + } +} + +// Return value's string value, or convert to a string using given +// format if a number value. Integers are a special case and don't +// use floatFormat. +func (v value) str(floatFormat string) string { + if v.typ == typeNum { + switch { + case math.IsNaN(v.n): + return "nan" + case math.IsInf(v.n, 0): + if v.n < 0 { + return "-inf" + } else { + return "inf" + } + case v.n == float64(int(v.n)): + return strconv.Itoa(int(v.n)) + default: + return fmt.Sprintf(floatFormat, v.n) + } + } + // For typeStr and typeNumStr we already have the string, for + // typeNull v.s == "". + return v.s +} + +// Return value's number value, converting from string if necessary +func (v value) num() float64 { + switch v.typ { + case typeStr, typeNumStr: + // Ensure string starts with a float and convert it + return parseFloatPrefix(v.s) + default: // typeNum, typeNull + return v.n + } +} + +var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1} + +// Like strconv.ParseFloat, but parses at the start of string and +// allows things like "1.5foo" +func parseFloatPrefix(s string) float64 { + // Skip whitespace at start + i := 0 + for i < len(s) && asciiSpace[s[i]] != 0 { + i++ + } + start := i + + // Parse mantissa: optional sign, initial digit(s), optional '.', + // then more digits + gotDigit := false + if i < len(s) && (s[i] == '+' || s[i] == '-') { + i++ + } + for i < len(s) && s[i] >= '0' && s[i] <= '9' { + gotDigit = true + i++ + } + if i < len(s) && s[i] == '.' { + i++ + } + for i < len(s) && s[i] >= '0' && s[i] <= '9' { + gotDigit = true + i++ + } + if !gotDigit { + return 0 + } + + // Parse exponent ("1e" and similar are allowed, but ParseFloat + // rejects them) + end := i + if i < len(s) && (s[i] == 'e' || s[i] == 'E') { + i++ + if i < len(s) && (s[i] == '+' || s[i] == '-') { + i++ + } + for i < len(s) && s[i] >= '0' && s[i] <= '9' { + i++ + end = i + } + } + + floatStr := s[start:end] + f, _ := strconv.ParseFloat(floatStr, 64) + return f // Returns infinity in case of "value out of range" error +} diff --git a/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go b/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go new file mode 100644 index 0000000..30147e9 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go @@ -0,0 +1,461 @@ +// Package lexer is an AWK lexer (tokenizer). +// +// The lexer turns a string of AWK source code into a stream of +// tokens for parsing. +// +// To tokenize some source, create a new lexer with NewLexer(src) and +// then call Scan() until the token type is EOF or ILLEGAL. +// +package lexer + +import ( + "fmt" +) + +// Lexer tokenizes a byte string of AWK source code. Use NewLexer to +// actually create a lexer, and Scan() or ScanRegex() to get tokens. +type Lexer struct { + src []byte + offset int + ch byte + pos Position + nextPos Position + hadSpace bool + lastTok Token +} + +// Position stores the source line and column where a token starts. +type Position struct { + // Line number of the token (starts at 1). + Line int + // Column on the line (starts at 1). Note that this is the byte + // offset into the line, not rune offset. + Column int +} + +// NewLexer creates a new lexer that will tokenize the given source +// code. See the module-level example for a working example. +func NewLexer(src []byte) *Lexer { + l := &Lexer{src: src} + l.nextPos.Line = 1 + l.nextPos.Column = 1 + l.next() + return l +} + +// HadSpace returns true if the previously-scanned token had +// whitespace before it. Used by the parser because when calling a +// user-defined function the grammar doesn't allow a space between +// the function name and the left parenthesis. +func (l *Lexer) HadSpace() bool { + return l.hadSpace +} + +// Scan scans the next token and returns its position (line/column), +// token value (one of the uppercase token constants), and the +// string value of the token. For most tokens, the token value is +// empty. For NAME, NUMBER, STRING, and REGEX tokens, it's the +// token's value. For an ILLEGAL token, it's the error message. +func (l *Lexer) Scan() (Position, Token, string) { + pos, tok, val := l.scan() + l.lastTok = tok + return pos, tok, val +} + +// Does the real work of scanning. Scan() wraps this to more easily +// set lastTok. +func (l *Lexer) scan() (Position, Token, string) { + // Skip whitespace (except newline, which is a token) + l.hadSpace = false + for l.ch == ' ' || l.ch == '\t' || l.ch == '\r' || l.ch == '\\' { + l.hadSpace = true + if l.ch == '\\' { + l.next() + if l.ch == '\r' { + l.next() + } + if l.ch != '\n' { + return l.pos, ILLEGAL, "expected \\n after \\ line continuation" + } + } + l.next() + } + if l.ch == '#' { + // Skip comment till end of line + l.next() + for l.ch != '\n' && l.ch != 0 { + l.next() + } + } + if l.ch == 0 { + // l.next() reached end of input + return l.pos, EOF, "" + } + + pos := l.pos + tok := ILLEGAL + val := "" + + ch := l.ch + l.next() + + // Names: keywords and functions + if isNameStart(ch) { + start := l.offset - 2 + for isNameStart(l.ch) || isDigit(l.ch) { + l.next() + } + name := string(l.src[start : l.offset-1]) + tok := KeywordToken(name) + if tok == ILLEGAL { + tok = NAME + val = name + } + return pos, tok, val + } + + // These are ordered by my guess at frequency of use. Should run + // through a corpus of real AWK programs to determine actual + // frequency. + switch ch { + case '$': + tok = DOLLAR + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.': + // Avoid make/append and use l.offset directly for performance + start := l.offset - 2 + gotDigit := false + if ch != '.' { + gotDigit = true + for isDigit(l.ch) { + l.next() + } + if l.ch == '.' { + l.next() + } + } + for isDigit(l.ch) { + gotDigit = true + l.next() + } + if !gotDigit { + return l.pos, ILLEGAL, "expected digits" + } + if l.ch == 'e' || l.ch == 'E' { + l.next() + gotSign := false + if l.ch == '+' || l.ch == '-' { + gotSign = true + l.next() + } + gotDigit = false + for isDigit(l.ch) { + l.next() + gotDigit = true + } + // Per awk/gawk, "1e" is allowed and parsed as "1 e" (with "e" + // considered a variable). "1e+" is parsed as "1e + ...". + if !gotDigit { + if gotSign { + l.unread() // unread the '+' or '-' + } + l.unread() // unread the 'e' or 'E' + } + } + tok = NUMBER + val = string(l.src[start : l.offset-1]) + case '{': + tok = LBRACE + case '}': + tok = RBRACE + case '=': + tok = l.choice('=', ASSIGN, EQUALS) + case '<': + tok = l.choice('=', LESS, LTE) + case '>': + switch l.ch { + case '=': + l.next() + tok = GTE + case '>': + l.next() + tok = APPEND + default: + tok = GREATER + } + case '"', '\'': + // Note: POSIX awk spec doesn't allow single-quoted strings, + // but this helps without quoting, especially on Windows + // where the shell quote character is " (double quote). + chars := make([]byte, 0, 32) // most won't require heap allocation + for l.ch != ch { + c := l.ch + if c == 0 { + return l.pos, ILLEGAL, "didn't find end quote in string" + } + if c == '\r' || c == '\n' { + return l.pos, ILLEGAL, "can't have newline in string" + } + if c != '\\' { + // Normal, non-escaped character + chars = append(chars, c) + l.next() + continue + } + // Escape sequence, skip over \ and process + l.next() + switch l.ch { + case 'n': + c = '\n' + l.next() + case 't': + c = '\t' + l.next() + case 'r': + c = '\r' + l.next() + case 'a': + c = '\a' + l.next() + case 'b': + c = '\b' + l.next() + case 'f': + c = '\f' + l.next() + case 'v': + c = '\v' + l.next() + case 'x': + // Hex byte of one of two hex digits + l.next() + digit := hexDigit(l.ch) + if digit < 0 { + return l.pos, ILLEGAL, "1 or 2 hex digits expected" + } + c = byte(digit) + l.next() + digit = hexDigit(l.ch) + if digit >= 0 { + c = c*16 + byte(digit) + l.next() + } + case '0', '1', '2', '3', '4', '5', '6', '7': + // Octal byte of 1-3 octal digits + c = l.ch - '0' + l.next() + for i := 0; i < 2 && l.ch >= '0' && l.ch <= '7'; i++ { + c = c*8 + l.ch - '0' + l.next() + } + default: + // Any other escape character is just the char + // itself, eg: "\z" is just "z" + c = l.ch + l.next() + } + chars = append(chars, c) + } + l.next() + tok = STRING + val = string(chars) + case '(': + tok = LPAREN + case ')': + tok = RPAREN + case ',': + tok = COMMA + case ';': + tok = SEMICOLON + case '+': + switch l.ch { + case '+': + l.next() + tok = INCR + case '=': + l.next() + tok = ADD_ASSIGN + default: + tok = ADD + } + case '-': + switch l.ch { + case '-': + l.next() + tok = DECR + case '=': + l.next() + tok = SUB_ASSIGN + default: + tok = SUB + } + case '*': + switch l.ch { + case '*': + l.next() + tok = l.choice('=', POW, POW_ASSIGN) + case '=': + l.next() + tok = MUL_ASSIGN + default: + tok = MUL + } + case '/': + tok = l.choice('=', DIV, DIV_ASSIGN) + case '%': + tok = l.choice('=', MOD, MOD_ASSIGN) + case '[': + tok = LBRACKET + case ']': + tok = RBRACKET + case '\n': + tok = NEWLINE + case '^': + tok = l.choice('=', POW, POW_ASSIGN) + case '!': + switch l.ch { + case '=': + l.next() + tok = NOT_EQUALS + case '~': + l.next() + tok = NOT_MATCH + default: + tok = NOT + } + case '~': + tok = MATCH + case '?': + tok = QUESTION + case ':': + tok = COLON + case '&': + tok = l.choice('&', ILLEGAL, AND) + if tok == ILLEGAL { + return l.pos, ILLEGAL, "unexpected char after '&'" + } + case '|': + tok = l.choice('|', PIPE, OR) + default: + tok = ILLEGAL + val = "unexpected char" + } + return pos, tok, val +} + +// ScanRegex parses an AWK regular expression in /slash/ syntax. The +// AWK grammar has somewhat special handling of regex tokens, so the +// parser can only call this after a DIV or DIV_ASSIGN token has just +// been scanned. +func (l *Lexer) ScanRegex() (Position, Token, string) { + pos, tok, val := l.scanRegex() + l.lastTok = tok + return pos, tok, val +} + +// Does the real work of scanning a regex. ScanRegex() wraps this to +// more easily set lastTok. +func (l *Lexer) scanRegex() (Position, Token, string) { + pos := l.pos + chars := make([]byte, 0, 32) // most won't require heap allocation + switch l.lastTok { + case DIV: + // Regex after '/' (the usual case) + pos.Column -= 1 + case DIV_ASSIGN: + // Regex after '/=' (happens when regex starts with '=') + pos.Column -= 2 + chars = append(chars, '=') + default: + return l.pos, ILLEGAL, fmt.Sprintf("unexpected %s preceding regex", l.lastTok) + } + for l.ch != '/' { + c := l.ch + if c == 0 { + return l.pos, ILLEGAL, "didn't find end slash in regex" + } + if c == '\r' || c == '\n' { + return l.pos, ILLEGAL, "can't have newline in regex" + } + if c == '\\' { + l.next() + if l.ch != '/' { + chars = append(chars, '\\') + } + c = l.ch + } + chars = append(chars, c) + l.next() + } + l.next() + return pos, REGEX, string(chars) +} + +// Load the next character into l.ch (or 0 on end of input) and update +// line and column position. +func (l *Lexer) next() { + l.pos = l.nextPos + if l.offset >= len(l.src) { + // For last character, move offset 1 past the end as it + // simplifies offset calculations in NAME and NUMBER + if l.ch != 0 { + l.ch = 0 + l.offset++ + l.nextPos.Column++ + } + return + } + ch := l.src[l.offset] + if ch == '\n' { + l.nextPos.Line++ + l.nextPos.Column = 1 + } else if ch != '\r' { + l.nextPos.Column++ + } + l.ch = ch + l.offset++ +} + +// Un-read the character just scanned (doesn't handle line boundaries). +func (l *Lexer) unread() { + l.offset-- + l.pos.Column-- + l.nextPos.Column-- + l.ch = l.src[l.offset-1] +} + +func isNameStart(ch byte) bool { + return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') +} + +func isDigit(ch byte) bool { + return ch >= '0' && ch <= '9' +} + +// Return the hex digit 0-15 corresponding to the given ASCII byte, +// or -1 if it's not a valid hex digit. +func hexDigit(ch byte) int { + switch { + case isDigit(ch): + return int(ch - '0') + case ch >= 'a' && ch <= 'f': + return int(ch - 'a' + 10) + case ch >= 'A' && ch <= 'F': + return int(ch - 'A' + 10) + default: + return -1 + } +} + +func (l *Lexer) choice(ch byte, one, two Token) Token { + if l.ch == ch { + l.next() + return two + } + return one +} + +// PeekByte returns the next unscanned byte; used when parsing +// "getline lvalue" expressions. Returns 0 at end of input. +func (l *Lexer) PeekByte() byte { + return l.ch +} diff --git a/play/vendor/github.com/benhoyt/goawk/lexer/token.go b/play/vendor/github.com/benhoyt/goawk/lexer/token.go new file mode 100644 index 0000000..6780816 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/lexer/token.go @@ -0,0 +1,261 @@ +// Lexer tokens + +package lexer + +// Token is the type of a single token. +type Token int + +const ( + ILLEGAL Token = iota + EOF + NEWLINE + CONCAT // Not really a token, but used as an operator + + // Symbols + + ADD + ADD_ASSIGN + AND + APPEND + ASSIGN + COLON + COMMA + DECR + DIV + DIV_ASSIGN + DOLLAR + EQUALS + GTE + GREATER + INCR + LBRACE + LBRACKET + LESS + LPAREN + LTE + MATCH + MOD + MOD_ASSIGN + MUL + MUL_ASSIGN + NOT_MATCH + NOT + NOT_EQUALS + OR + PIPE + POW + POW_ASSIGN + QUESTION + RBRACE + RBRACKET + RPAREN + SEMICOLON + SUB + SUB_ASSIGN + + // Keywords + + BEGIN + BREAK + CONTINUE + DELETE + DO + ELSE + END + EXIT + FOR + FUNCTION + GETLINE + IF + IN + NEXT + PRINT + PRINTF + RETURN + WHILE + + // Built-in functions + + F_ATAN2 + F_CLOSE + F_COS + F_EXP + F_FFLUSH + F_GSUB + F_INDEX + F_INT + F_LENGTH + F_LOG + F_MATCH + F_RAND + F_SIN + F_SPLIT + F_SPRINTF + F_SQRT + F_SRAND + F_SUB + F_SUBSTR + F_SYSTEM + F_TOLOWER + F_TOUPPER + + // Literals and names (variables and arrays) + + NAME + NUMBER + STRING + REGEX + + LAST = REGEX + FIRST_FUNC = F_ATAN2 + LAST_FUNC = F_TOUPPER +) + +var keywordTokens = map[string]Token{ + "BEGIN": BEGIN, + "break": BREAK, + "continue": CONTINUE, + "delete": DELETE, + "do": DO, + "else": ELSE, + "END": END, + "exit": EXIT, + "for": FOR, + "function": FUNCTION, + "getline": GETLINE, + "if": IF, + "in": IN, + "next": NEXT, + "print": PRINT, + "printf": PRINTF, + "return": RETURN, + "while": WHILE, + + "atan2": F_ATAN2, + "close": F_CLOSE, + "cos": F_COS, + "exp": F_EXP, + "fflush": F_FFLUSH, + "gsub": F_GSUB, + "index": F_INDEX, + "int": F_INT, + "length": F_LENGTH, + "log": F_LOG, + "match": F_MATCH, + "rand": F_RAND, + "sin": F_SIN, + "split": F_SPLIT, + "sprintf": F_SPRINTF, + "sqrt": F_SQRT, + "srand": F_SRAND, + "sub": F_SUB, + "substr": F_SUBSTR, + "system": F_SYSTEM, + "tolower": F_TOLOWER, + "toupper": F_TOUPPER, +} + +// KeywordToken returns the token associated with the given keyword +// string, or ILLEGAL if given name is not a keyword. +func KeywordToken(name string) Token { + return keywordTokens[name] +} + +var tokenNames = map[Token]string{ + ILLEGAL: "", + EOF: "EOF", + NEWLINE: "", + CONCAT: "", + + ADD: "+", + ADD_ASSIGN: "+=", + AND: "&&", + APPEND: ">>", + ASSIGN: "=", + COLON: ":", + COMMA: ",", + DECR: "--", + DIV: "/", + DIV_ASSIGN: "/=", + DOLLAR: "$", + EQUALS: "==", + GTE: ">=", + GREATER: ">", + INCR: "++", + LBRACE: "{", + LBRACKET: "[", + LESS: "<", + LPAREN: "(", + LTE: "<=", + MATCH: "~", + MOD: "%", + MOD_ASSIGN: "%=", + MUL: "*", + MUL_ASSIGN: "*=", + NOT_MATCH: "!~", + NOT: "!", + NOT_EQUALS: "!=", + OR: "||", + PIPE: "|", + POW: "^", + POW_ASSIGN: "^=", + QUESTION: "?", + RBRACE: "}", + RBRACKET: "]", + RPAREN: ")", + SEMICOLON: ";", + SUB: "-", + SUB_ASSIGN: "-=", + + BEGIN: "BEGIN", + BREAK: "break", + CONTINUE: "continue", + DELETE: "delete", + DO: "do", + ELSE: "else", + END: "END", + EXIT: "exit", + FOR: "for", + FUNCTION: "function", + GETLINE: "getline", + IF: "if", + IN: "in", + NEXT: "next", + PRINT: "print", + PRINTF: "printf", + RETURN: "return", + WHILE: "while", + + F_ATAN2: "atan2", + F_CLOSE: "close", + F_COS: "cos", + F_EXP: "exp", + F_FFLUSH: "fflush", + F_GSUB: "gsub", + F_INDEX: "index", + F_INT: "int", + F_LENGTH: "length", + F_LOG: "log", + F_MATCH: "match", + F_RAND: "rand", + F_SIN: "sin", + F_SPLIT: "split", + F_SPRINTF: "sprintf", + F_SQRT: "sqrt", + F_SRAND: "srand", + F_SUB: "sub", + F_SUBSTR: "substr", + F_SYSTEM: "system", + F_TOLOWER: "tolower", + F_TOUPPER: "toupper", + + NAME: "name", + NUMBER: "number", + STRING: "string", + REGEX: "regex", +} + +// String returns the string name of this token. +func (t Token) String() string { + return tokenNames[t] +} diff --git a/play/vendor/github.com/benhoyt/goawk/parser/parser.go b/play/vendor/github.com/benhoyt/goawk/parser/parser.go new file mode 100644 index 0000000..d8ff2c6 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/parser/parser.go @@ -0,0 +1,1041 @@ +// Package parser is an AWK parser and abstract syntax tree. +// +// Use the ParseProgram function to parse an AWK program, and then +// give the result to one of the interp.Exec* functions to execute it. +// +package parser + +import ( + "fmt" + "io" + "regexp" + "strconv" + "strings" + + . "github.com/benhoyt/goawk/internal/ast" + . "github.com/benhoyt/goawk/lexer" +) + +// ParseError (actually *ParseError) is the type of error returned by +// ParseProgram. +type ParseError struct { + // Source line/column position where the error occurred. + Position Position + // Error message. + Message string +} + +// Error returns a formatted version of the error, including the line +// and column numbers. +func (e *ParseError) Error() string { + return fmt.Sprintf("parse error at %d:%d: %s", e.Position.Line, e.Position.Column, e.Message) +} + +// ParserConfig lets you specify configuration for the parsing +// process (for example printing type information for debugging). +type ParserConfig struct { + // Enable printing of type information + DebugTypes bool + + // io.Writer to print type information on (for example, os.Stderr) + DebugWriter io.Writer + + // Map of named Go functions to allow calling from AWK. See docs + // on interp.Config.Funcs for details. + Funcs map[string]interface{} +} + +// ParseProgram parses an entire AWK program, returning the *Program +// abstract syntax tree or a *ParseError on error. "config" describes +// the parser configuration (and is allowed to be nil). +func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { + defer func() { + // The parser uses panic with a *ParseError to signal parsing + // errors internally, and they're caught here. This + // significantly simplifies the recursive descent calls as + // we don't have to check errors everywhere. + if r := recover(); r != nil { + // Convert to ParseError or re-panic + err = r.(*ParseError) + } + }() + lexer := NewLexer(src) + p := parser{lexer: lexer} + if config != nil { + p.debugTypes = config.DebugTypes + p.debugWriter = config.DebugWriter + p.nativeFuncs = config.Funcs + } + p.initResolve() + p.next() // initialize p.tok + return p.program(), nil +} + +// Program is the abstract syntax tree for an entire AWK program. +type Program struct { + // These fields aren't intended to be used or modified directly, + // but are exported for the interpreter (Program itself needs to + // be exported in package "parser", otherwise these could live in + // "internal/ast".) + Begin []Stmts + Actions []Action + End []Stmts + Functions []Function + Scalars map[string]int + Arrays map[string]int +} + +// String returns an indented, pretty-printed version of the parsed +// program. +func (p *Program) String() string { + parts := []string{} + for _, ss := range p.Begin { + parts = append(parts, "BEGIN {\n"+ss.String()+"}") + } + for _, a := range p.Actions { + parts = append(parts, a.String()) + } + for _, ss := range p.End { + parts = append(parts, "END {\n"+ss.String()+"}") + } + for _, function := range p.Functions { + parts = append(parts, function.String()) + } + return strings.Join(parts, "\n\n") +} + +// Parser state +type parser struct { + // Lexer instance and current token values + lexer *Lexer + pos Position // position of last token (tok) + tok Token // last lexed token + prevTok Token // previously lexed token + val string // string value of last token (or "") + + // Parsing state + inAction bool // true if parsing an action (false in BEGIN or END) + funcName string // function name if parsing a func, else "" + loopDepth int // current loop depth (0 if not in any loops) + + // Variable tracking and resolving + locals map[string]bool // current function's locals (for determining scope) + varTypes map[string]map[string]typeInfo // map of func name to var name to type + varRefs []varRef // all variable references (usually scalars) + arrayRefs []arrayRef // all array references + multiExprs map[*MultiExpr]Position // tracks comma-separated expressions + + // Function tracking + functions map[string]int // map of function name to index + userCalls []userCall // record calls so we can resolve them later + nativeFuncs map[string]interface{} + + // Configuration and debugging + debugTypes bool // show variable types for debugging + debugWriter io.Writer // where the debug output goes +} + +// Parse an entire AWK program. +func (p *parser) program() *Program { + prog := &Program{} + p.optionalNewlines() + for p.tok != EOF { + switch p.tok { + case BEGIN: + p.next() + prog.Begin = append(prog.Begin, p.stmtsBrace()) + case END: + p.next() + prog.End = append(prog.End, p.stmtsBrace()) + case FUNCTION: + function := p.function() + p.addFunction(function.Name, len(prog.Functions)) + prog.Functions = append(prog.Functions, function) + default: + p.inAction = true + // Allow empty pattern, normal pattern, or range pattern + pattern := []Expr{} + if !p.matches(LBRACE, EOF) { + pattern = append(pattern, p.expr()) + } + if !p.matches(LBRACE, EOF, NEWLINE) { + p.commaNewlines() + pattern = append(pattern, p.expr()) + } + // Or an empty action (equivalent to { print $0 }) + action := Action{pattern, nil} + if p.tok == LBRACE { + action.Stmts = p.stmtsBrace() + } + prog.Actions = append(prog.Actions, action) + p.inAction = false + } + p.optionalNewlines() + } + + p.resolveUserCalls(prog) + p.resolveVars(prog) + p.checkMultiExprs() + + return prog +} + +// Parse a list of statements. +func (p *parser) stmts() Stmts { + switch p.tok { + case SEMICOLON: + // This is so things like this parse correctly: + // BEGIN { for (i=0; i<10; i++); print "x" } + p.next() + return nil + case LBRACE: + return p.stmtsBrace() + default: + return []Stmt{p.stmt()} + } +} + +// Parse a list of statements surrounded in {...} braces. +func (p *parser) stmtsBrace() Stmts { + p.expect(LBRACE) + p.optionalNewlines() + ss := []Stmt{} + for p.tok != RBRACE && p.tok != EOF { + ss = append(ss, p.stmt()) + } + p.expect(RBRACE) + if p.tok == SEMICOLON { + p.next() + } + return ss +} + +// Parse a "simple" statement (eg: allowed in a for loop init clause). +func (p *parser) simpleStmt() Stmt { + switch p.tok { + case PRINT, PRINTF: + op := p.tok + p.next() + args := p.exprList(p.printExpr) + if len(args) == 1 { + // This allows parens around all the print args + if m, ok := args[0].(*MultiExpr); ok { + args = m.Exprs + p.useMultiExpr(m) + } + } + redirect := ILLEGAL + var dest Expr + if p.matches(GREATER, APPEND, PIPE) { + redirect = p.tok + p.next() + dest = p.expr() + } + if op == PRINT { + return &PrintStmt{args, redirect, dest} + } else { + if len(args) == 0 { + panic(p.errorf("expected printf args, got none")) + } + return &PrintfStmt{args, redirect, dest} + } + case DELETE: + p.next() + ref := p.arrayRef(p.val, p.pos) + p.expect(NAME) + var index []Expr + if p.tok == LBRACKET { + p.next() + index = p.exprList(p.expr) + if len(index) == 0 { + panic(p.errorf("expected expression instead of ]")) + } + p.expect(RBRACKET) + } + return &DeleteStmt{ref, index} + case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN: + panic(p.errorf("expected print/printf, delete, or expression")) + default: + return &ExprStmt{p.expr()} + } +} + +// Parse any top-level statement. +func (p *parser) stmt() Stmt { + for p.matches(SEMICOLON, NEWLINE) { + p.next() + } + var s Stmt + switch p.tok { + case IF: + p.next() + p.expect(LPAREN) + cond := p.expr() + p.expect(RPAREN) + p.optionalNewlines() + body := p.stmts() + p.optionalNewlines() + var elseBody Stmts + if p.tok == ELSE { + p.next() + p.optionalNewlines() + elseBody = p.stmts() + } + s = &IfStmt{cond, body, elseBody} + case FOR: + // Parse for statement, either "for in" or C-like for loop. + // + // FOR LPAREN NAME IN NAME RPAREN NEWLINE* stmts | + // FOR LPAREN [simpleStmt] SEMICOLON NEWLINE* + // [expr] SEMICOLON NEWLINE* + // [simpleStmt] RPAREN NEWLINE* stmts + // + p.next() + p.expect(LPAREN) + var pre Stmt + if p.tok != SEMICOLON { + pre = p.simpleStmt() + } + if pre != nil && p.tok == RPAREN { + // Match: for (var in array) body + p.next() + p.optionalNewlines() + exprStmt, ok := pre.(*ExprStmt) + if !ok { + panic(p.errorf("expected 'for (var in array) ...'")) + } + inExpr, ok := (exprStmt.Expr).(*InExpr) + if !ok { + panic(p.errorf("expected 'for (var in array) ...'")) + } + if len(inExpr.Index) != 1 { + panic(p.errorf("expected 'for (var in array) ...'")) + } + varExpr, ok := (inExpr.Index[0]).(*VarExpr) + if !ok { + panic(p.errorf("expected 'for (var in array) ...'")) + } + body := p.loopStmts() + s = &ForInStmt{varExpr, inExpr.Array, body} + } else { + // Match: for ([pre]; [cond]; [post]) body + p.expect(SEMICOLON) + p.optionalNewlines() + var cond Expr + if p.tok != SEMICOLON { + cond = p.expr() + } + p.expect(SEMICOLON) + p.optionalNewlines() + var post Stmt + if p.tok != RPAREN { + post = p.simpleStmt() + } + p.expect(RPAREN) + p.optionalNewlines() + body := p.loopStmts() + s = &ForStmt{pre, cond, post, body} + } + case WHILE: + p.next() + p.expect(LPAREN) + cond := p.expr() + p.expect(RPAREN) + p.optionalNewlines() + body := p.loopStmts() + s = &WhileStmt{cond, body} + case DO: + p.next() + p.optionalNewlines() + body := p.loopStmts() + p.expect(WHILE) + p.expect(LPAREN) + cond := p.expr() + p.expect(RPAREN) + s = &DoWhileStmt{body, cond} + case BREAK: + if p.loopDepth == 0 { + panic(p.errorf("break must be inside a loop body")) + } + p.next() + s = &BreakStmt{} + case CONTINUE: + if p.loopDepth == 0 { + panic(p.errorf("continue must be inside a loop body")) + } + p.next() + s = &ContinueStmt{} + case NEXT: + if !p.inAction && p.funcName == "" { + panic(p.errorf("next can't be inside BEGIN or END")) + } + p.next() + s = &NextStmt{} + case EXIT: + p.next() + var status Expr + if !p.matches(NEWLINE, SEMICOLON, RBRACE) { + status = p.expr() + } + s = &ExitStmt{status} + case RETURN: + if p.funcName == "" { + panic(p.errorf("return must be inside a function")) + } + p.next() + var value Expr + if !p.matches(NEWLINE, SEMICOLON, RBRACE) { + value = p.expr() + } + s = &ReturnStmt{value} + case LBRACE: + body := p.stmtsBrace() + s = &BlockStmt{body} + default: + s = p.simpleStmt() + } + + // Ensure statements are separated by ; or newline + if !p.matches(NEWLINE, SEMICOLON, RBRACE) && p.prevTok != NEWLINE && p.prevTok != SEMICOLON && p.prevTok != RBRACE { + panic(p.errorf("expected ; or newline between statements")) + } + for p.matches(NEWLINE, SEMICOLON) { + p.next() + } + return s +} + +// Same as stmts(), but tracks that we're in a loop (as break and +// continue can only occur inside a loop). +func (p *parser) loopStmts() Stmts { + p.loopDepth++ + ss := p.stmts() + p.loopDepth-- + return ss +} + +// Parse a function definition and body. As it goes, this resolves +// the local variable indexes and tracks which parameters are array +// parameters. +func (p *parser) function() Function { + if p.funcName != "" { + // Should never actually get here (FUNCTION token is only + // handled at the top level), but just in case. + panic(p.errorf("can't nest functions")) + } + p.next() + name := p.val + if _, ok := p.functions[name]; ok { + panic(p.errorf("function %q already defined", name)) + } + p.expect(NAME) + p.expect(LPAREN) + first := true + params := make([]string, 0, 7) // pre-allocate some to reduce allocations + p.locals = make(map[string]bool, 7) + for p.tok != RPAREN { + if !first { + p.commaNewlines() + } + first = false + param := p.val + if param == name { + panic(p.errorf("can't use function name as parameter name")) + } + if p.locals[param] { + panic(p.errorf("duplicate parameter name %q", param)) + } + p.expect(NAME) + params = append(params, param) + p.locals[param] = true + } + p.expect(RPAREN) + p.optionalNewlines() + + // Parse the body + p.startFunction(name, params) + body := p.stmtsBrace() + p.stopFunction() + p.locals = nil + + return Function{name, params, nil, body} +} + +// Parse expressions separated by commas: args to print[f] or user +// function call, or multi-dimensional index. +func (p *parser) exprList(parse func() Expr) []Expr { + exprs := []Expr{} + first := true + for !p.matches(NEWLINE, SEMICOLON, RBRACE, RBRACKET, RPAREN, GREATER, PIPE, APPEND) { + if !first { + p.commaNewlines() + } + first = false + exprs = append(exprs, parse()) + } + return exprs +} + +// Here's where things get slightly interesting: only certain +// expression types are allowed in print/printf statements, +// presumably so `print a, b > "file"` is a file redirect instead of +// a greater-than comparison. So we kind of have two ways to recurse +// down here: expr(), which parses all expressions, and printExpr(), +// which skips PIPE GETLINE and GREATER expressions. + +// Parse a single expression. +func (p *parser) expr() Expr { return p.getLine() } +func (p *parser) printExpr() Expr { return p._assign(p.printCond) } + +// Parse an "expr | getline [lvalue]" expression: +// +// assign [PIPE GETLINE [lvalue]] +// +func (p *parser) getLine() Expr { + expr := p._assign(p.cond) + if p.tok == PIPE { + p.next() + p.expect(GETLINE) + target := p.optionalLValue() + return &GetlineExpr{expr, target, nil} + } + return expr +} + +// Parse an = assignment expression: +// +// lvalue [assign_op assign] +// +// An lvalue is a variable name, an array[expr] index expression, or +// an $expr field expression. +// +func (p *parser) _assign(higher func() Expr) Expr { + expr := higher() + if IsLValue(expr) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, + MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) { + op := p.tok + p.next() + right := p._assign(higher) + switch op { + case ASSIGN: + return &AssignExpr{expr, right} + case ADD_ASSIGN: + op = ADD + case DIV_ASSIGN: + op = DIV + case MOD_ASSIGN: + op = MOD + case MUL_ASSIGN: + op = MUL + case POW_ASSIGN: + op = POW + case SUB_ASSIGN: + op = SUB + } + return &AugAssignExpr{expr, op, right} + } + return expr +} + +// Parse a ?: conditional expression: +// +// or [QUESTION NEWLINE* cond COLON NEWLINE* cond] +// +func (p *parser) cond() Expr { return p._cond(p.or) } +func (p *parser) printCond() Expr { return p._cond(p.printOr) } + +func (p *parser) _cond(higher func() Expr) Expr { + expr := higher() + if p.tok == QUESTION { + p.next() + p.optionalNewlines() + t := p.expr() + p.expect(COLON) + p.optionalNewlines() + f := p.expr() + return &CondExpr{expr, t, f} + } + return expr +} + +// Parse an || or expression: +// +// and [OR NEWLINE* and] [OR NEWLINE* and] ... +// +func (p *parser) or() Expr { return p.binaryLeft(p.and, true, OR) } +func (p *parser) printOr() Expr { return p.binaryLeft(p.printAnd, true, OR) } + +// Parse an && and expression: +// +// in [AND NEWLINE* in] [AND NEWLINE* in] ... +// +func (p *parser) and() Expr { return p.binaryLeft(p.in, true, AND) } +func (p *parser) printAnd() Expr { return p.binaryLeft(p.printIn, true, AND) } + +// Parse an "in" expression: +// +// match [IN NAME] [IN NAME] ... +// +func (p *parser) in() Expr { return p._in(p.match) } +func (p *parser) printIn() Expr { return p._in(p.printMatch) } + +func (p *parser) _in(higher func() Expr) Expr { + expr := higher() + for p.tok == IN { + p.next() + ref := p.arrayRef(p.val, p.pos) + p.expect(NAME) + expr = &InExpr{[]Expr{expr}, ref} + } + return expr +} + +// Parse a ~ match expression: +// +// compare [MATCH|NOT_MATCH compare] +// +func (p *parser) match() Expr { return p._match(p.compare) } +func (p *parser) printMatch() Expr { return p._match(p.printCompare) } + +func (p *parser) _match(higher func() Expr) Expr { + expr := higher() + if p.matches(MATCH, NOT_MATCH) { + op := p.tok + p.next() + right := p.regexStr(higher) // Not match() as these aren't associative + return &BinaryExpr{expr, op, right} + } + return expr +} + +// Parse a comparison expression: +// +// concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat] +// +func (p *parser) compare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) } +func (p *parser) printCompare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) } + +func (p *parser) _compare(ops ...Token) Expr { + expr := p.concat() + if p.matches(ops...) { + op := p.tok + p.next() + right := p.concat() // Not compare() as these aren't associative + return &BinaryExpr{expr, op, right} + } + return expr +} + +func (p *parser) concat() Expr { + expr := p.add() + for p.matches(DOLLAR, NOT, NAME, NUMBER, STRING, LPAREN, INCR, DECR) || + (p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC) { + right := p.add() + expr = &BinaryExpr{expr, CONCAT, right} + } + return expr +} + +func (p *parser) add() Expr { + return p.binaryLeft(p.mul, false, ADD, SUB) +} + +func (p *parser) mul() Expr { + return p.binaryLeft(p.pow, false, MUL, DIV, MOD) +} + +func (p *parser) pow() Expr { + // Note that pow (expr ^ expr) is right-associative + expr := p.preIncr() + if p.tok == POW { + p.next() + right := p.pow() + return &BinaryExpr{expr, POW, right} + } + return expr +} + +func (p *parser) preIncr() Expr { + if p.tok == INCR || p.tok == DECR { + op := p.tok + p.next() + exprPos := p.pos + expr := p.preIncr() + if !IsLValue(expr) { + panic(p.posErrorf(exprPos, "expected lvalue after ++ or --")) + } + return &IncrExpr{expr, op, true} + } + return p.postIncr() +} + +func (p *parser) postIncr() Expr { + expr := p.primary() + if (p.tok == INCR || p.tok == DECR) && IsLValue(expr) { + op := p.tok + p.next() + return &IncrExpr{expr, op, false} + } + return expr +} + +func (p *parser) primary() Expr { + switch p.tok { + case NUMBER: + // AWK allows forms like "1.5e", but ParseFloat doesn't + s := strings.TrimRight(p.val, "eE") + n, _ := strconv.ParseFloat(s, 64) + p.next() + return &NumExpr{n} + case STRING: + s := p.val + p.next() + return &StrExpr{s} + case DIV, DIV_ASSIGN: + // If we get to DIV or DIV_ASSIGN as a primary expression, + // it's actually a regex. + regex := p.nextRegex() + return &RegExpr{regex} + case DOLLAR: + p.next() + return &FieldExpr{p.primary()} + case NOT, ADD, SUB: + op := p.tok + p.next() + return &UnaryExpr{op, p.pow()} + case NAME: + name := p.val + namePos := p.pos + p.next() + if p.tok == LBRACKET { + // a[x] or a[x, y] array index expression + p.next() + index := p.exprList(p.expr) + if len(index) == 0 { + panic(p.errorf("expected expression instead of ]")) + } + p.expect(RBRACKET) + return &IndexExpr{p.arrayRef(name, namePos), index} + } else if p.tok == LPAREN && !p.lexer.HadSpace() { + if p.locals[name] { + panic(p.errorf("can't call local variable %q as function", name)) + } + // Grammar requires no space between function name and + // left paren for user function calls, hence the funky + // lexer.HadSpace() method. + return p.userCall(name, namePos) + } + return p.varRef(name, namePos) + case LPAREN: + parenPos := p.pos + p.next() + exprs := p.exprList(p.expr) + switch len(exprs) { + case 0: + panic(p.errorf("expected expression, not %s", p.tok)) + case 1: + p.expect(RPAREN) + return exprs[0] + default: + // Multi-dimensional array "in" requires parens around index + p.expect(RPAREN) + if p.tok == IN { + p.next() + ref := p.arrayRef(p.val, p.pos) + p.expect(NAME) + return &InExpr{exprs, ref} + } + // MultiExpr is used as a pseudo-expression for print[f] parsing. + return p.multiExpr(exprs, parenPos) + } + case GETLINE: + p.next() + target := p.optionalLValue() + var file Expr + if p.tok == LESS { + p.next() + file = p.primary() + } + return &GetlineExpr{nil, target, file} + // Below is the parsing of all the builtin function calls. We + // could unify these but several of them have special handling + // (array/lvalue/regex params, optional arguments, and so on). + // Doing it this way means we can check more at parse time. + case F_SUB, F_GSUB: + op := p.tok + p.next() + p.expect(LPAREN) + regex := p.regexStr(p.expr) + p.commaNewlines() + repl := p.expr() + args := []Expr{regex, repl} + if p.tok == COMMA { + p.commaNewlines() + inPos := p.pos + in := p.expr() + if !IsLValue(in) { + panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) + } + args = append(args, in) + } + p.expect(RPAREN) + return &CallExpr{op, args} + case F_SPLIT: + p.next() + p.expect(LPAREN) + str := p.expr() + p.commaNewlines() + ref := p.arrayRef(p.val, p.pos) + p.expect(NAME) + args := []Expr{str, ref} + if p.tok == COMMA { + p.commaNewlines() + args = append(args, p.regexStr(p.expr)) + } + p.expect(RPAREN) + return &CallExpr{F_SPLIT, args} + case F_MATCH: + p.next() + p.expect(LPAREN) + str := p.expr() + p.commaNewlines() + regex := p.regexStr(p.expr) + p.expect(RPAREN) + return &CallExpr{F_MATCH, []Expr{str, regex}} + case F_RAND: + p.next() + p.expect(LPAREN) + p.expect(RPAREN) + return &CallExpr{F_RAND, nil} + case F_SRAND: + p.next() + p.expect(LPAREN) + var args []Expr + if p.tok != RPAREN { + args = append(args, p.expr()) + } + p.expect(RPAREN) + return &CallExpr{F_SRAND, args} + case F_LENGTH: + p.next() + var args []Expr + // AWK quirk: "length" is allowed to be called without parens + if p.tok == LPAREN { + p.next() + if p.tok != RPAREN { + args = append(args, p.expr()) + } + p.expect(RPAREN) + } + return &CallExpr{F_LENGTH, args} + case F_SUBSTR: + p.next() + p.expect(LPAREN) + str := p.expr() + p.commaNewlines() + start := p.expr() + args := []Expr{str, start} + if p.tok == COMMA { + p.commaNewlines() + args = append(args, p.expr()) + } + p.expect(RPAREN) + return &CallExpr{F_SUBSTR, args} + case F_SPRINTF: + p.next() + p.expect(LPAREN) + args := []Expr{p.expr()} + for p.tok == COMMA { + p.commaNewlines() + args = append(args, p.expr()) + } + p.expect(RPAREN) + return &CallExpr{F_SPRINTF, args} + case F_FFLUSH: + p.next() + p.expect(LPAREN) + var args []Expr + if p.tok != RPAREN { + args = append(args, p.expr()) + } + p.expect(RPAREN) + return &CallExpr{F_FFLUSH, args} + case F_COS, F_SIN, F_EXP, F_LOG, F_SQRT, F_INT, F_TOLOWER, F_TOUPPER, F_SYSTEM, F_CLOSE: + // Simple 1-argument functions + op := p.tok + p.next() + p.expect(LPAREN) + arg := p.expr() + p.expect(RPAREN) + return &CallExpr{op, []Expr{arg}} + case F_ATAN2, F_INDEX: + // Simple 2-argument functions + op := p.tok + p.next() + p.expect(LPAREN) + arg1 := p.expr() + p.commaNewlines() + arg2 := p.expr() + p.expect(RPAREN) + return &CallExpr{op, []Expr{arg1, arg2}} + default: + panic(p.errorf("expected expression instead of %s", p.tok)) + } +} + +// Parse an optional lvalue +func (p *parser) optionalLValue() Expr { + switch p.tok { + case NAME: + if p.lexer.PeekByte() == '(' { + // User function call, e.g., foo() not lvalue. + return nil + } + name := p.val + namePos := p.pos + p.next() + if p.tok == LBRACKET { + // a[x] or a[x, y] array index expression + p.next() + index := p.exprList(p.expr) + if len(index) == 0 { + panic(p.errorf("expected expression instead of ]")) + } + p.expect(RBRACKET) + return &IndexExpr{p.arrayRef(name, namePos), index} + } + return p.varRef(name, namePos) + case DOLLAR: + p.next() + return &FieldExpr{p.primary()} + default: + return nil + } +} + +// Parse /.../ regex or generic expression: +// +// REGEX | expr +// +func (p *parser) regexStr(parse func() Expr) Expr { + if p.matches(DIV, DIV_ASSIGN) { + regex := p.nextRegex() + return &StrExpr{regex} + } + return parse() +} + +// Parse left-associative binary operator. Allow newlines after +// operator if allowNewline is true. +// +// parse [op parse] [op parse] ... +// +func (p *parser) binaryLeft(higher func() Expr, allowNewline bool, ops ...Token) Expr { + expr := higher() + for p.matches(ops...) { + op := p.tok + p.next() + if allowNewline { + p.optionalNewlines() + } + right := higher() + expr = &BinaryExpr{expr, op, right} + } + return expr +} + +// Parse comma followed by optional newlines: +// +// COMMA NEWLINE* +// +func (p *parser) commaNewlines() { + p.expect(COMMA) + p.optionalNewlines() +} + +// Parse zero or more optional newlines: +// +// [NEWLINE] [NEWLINE] ... +// +func (p *parser) optionalNewlines() { + for p.tok == NEWLINE { + p.next() + } +} + +// Parse next token into p.tok (and set p.pos and p.val). +func (p *parser) next() { + p.prevTok = p.tok + p.pos, p.tok, p.val = p.lexer.Scan() + if p.tok == ILLEGAL { + panic(p.errorf("%s", p.val)) + } +} + +// Parse next regex and return it (must only be called after DIV or +// DIV_ASSIGN token). +func (p *parser) nextRegex() string { + p.pos, p.tok, p.val = p.lexer.ScanRegex() + if p.tok == ILLEGAL { + panic(p.errorf("%s", p.val)) + } + regex := p.val + _, err := regexp.Compile(regex) + if err != nil { + panic(p.errorf("%v", err)) + } + p.next() + return regex +} + +// Ensure current token is tok, and parse next token into p.tok. +func (p *parser) expect(tok Token) { + if p.tok != tok { + panic(p.errorf("expected %s instead of %s", tok, p.tok)) + } + p.next() +} + +// Return true iff current token matches one of the given operators, +// but don't parse next token. +func (p *parser) matches(operators ...Token) bool { + for _, operator := range operators { + if p.tok == operator { + return true + } + } + return false +} + +// Format given string and args with Sprintf and return *ParseError +// with that message and the current position. +func (p *parser) errorf(format string, args ...interface{}) error { + return p.posErrorf(p.pos, format, args...) +} + +// Like errorf, but with an explicit position. +func (p *parser) posErrorf(pos Position, format string, args ...interface{}) error { + message := fmt.Sprintf(format, args...) + return &ParseError{pos, message} +} + +// Parse call to a user-defined function (and record call site for +// resolving later). +func (p *parser) userCall(name string, pos Position) *UserCallExpr { + p.expect(LPAREN) + args := []Expr{} + i := 0 + for !p.matches(NEWLINE, RPAREN) { + if i > 0 { + p.commaNewlines() + } + arg := p.expr() + p.processUserCallArg(name, arg, i) + args = append(args, arg) + i++ + } + p.expect(RPAREN) + call := &UserCallExpr{false, -1, name, args} // index is resolved later + p.recordUserCall(call, pos) + return call +} diff --git a/play/vendor/github.com/benhoyt/goawk/parser/resolve.go b/play/vendor/github.com/benhoyt/goawk/parser/resolve.go new file mode 100644 index 0000000..6543633 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/parser/resolve.go @@ -0,0 +1,468 @@ +// Resolve function calls and variable types + +package parser + +import ( + "fmt" + "reflect" + "sort" + + . "github.com/benhoyt/goawk/internal/ast" + . "github.com/benhoyt/goawk/lexer" +) + +type varType int + +const ( + typeUnknown varType = iota + typeScalar + typeArray +) + +func (t varType) String() string { + switch t { + case typeScalar: + return "Scalar" + case typeArray: + return "Array" + default: + return "Unknown" + } +} + +// typeInfo records type information for a single variable +type typeInfo struct { + typ varType + ref *VarExpr + scope VarScope + index int + callName string + argIndex int +} + +// Used by printVarTypes when debugTypes is turned on +func (t typeInfo) String() string { + var scope string + switch t.scope { + case ScopeGlobal: + scope = "Global" + case ScopeLocal: + scope = "Local" + default: + scope = "Special" + } + return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d", + t.typ, t.ref, scope, t.index, t.callName, t.argIndex) +} + +// A single variable reference (normally scalar) +type varRef struct { + funcName string + ref *VarExpr + isArg bool + pos Position +} + +// A single array reference +type arrayRef struct { + funcName string + ref *ArrayExpr + pos Position +} + +// Initialize the resolver +func (p *parser) initResolve() { + p.varTypes = make(map[string]map[string]typeInfo) + p.varTypes[""] = make(map[string]typeInfo) // globals + p.functions = make(map[string]int) + p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present + p.arrayRef("ENVIRON", Position{1, 1}) // and ENVIRON + p.multiExprs = make(map[*MultiExpr]Position, 3) +} + +// Signal the start of a function +func (p *parser) startFunction(name string, params []string) { + p.funcName = name + p.varTypes[name] = make(map[string]typeInfo) +} + +// Signal the end of a function +func (p *parser) stopFunction() { + p.funcName = "" +} + +// Add function by name with given index +func (p *parser) addFunction(name string, index int) { + p.functions[name] = index +} + +// Records a call to a user function (for resolving indexes later) +type userCall struct { + call *UserCallExpr + pos Position + inFunc string +} + +// Record a user call site +func (p *parser) recordUserCall(call *UserCallExpr, pos Position) { + p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName}) +} + +// After parsing, resolve all user calls to their indexes. Also +// ensures functions called have actually been defined, and that +// they're not being called with too many arguments. +func (p *parser) resolveUserCalls(prog *Program) { + // Number the native funcs (order by name to get consistent order) + nativeNames := make([]string, 0, len(p.nativeFuncs)) + for name := range p.nativeFuncs { + nativeNames = append(nativeNames, name) + } + sort.Strings(nativeNames) + nativeIndexes := make(map[string]int, len(nativeNames)) + for i, name := range nativeNames { + nativeIndexes[name] = i + } + + for _, c := range p.userCalls { + // AWK-defined functions take precedence over native Go funcs + index, ok := p.functions[c.call.Name] + if !ok { + f, haveNative := p.nativeFuncs[c.call.Name] + if !haveNative { + panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name)) + } + typ := reflect.TypeOf(f) + if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() { + panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + } + c.call.Native = true + c.call.Index = nativeIndexes[c.call.Name] + continue + } + function := prog.Functions[index] + if len(c.call.Args) > len(function.Params) { + panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name)) + } + c.call.Index = index + } +} + +// For arguments that are variable references, we don't know the +// type based on context, so mark the types for these as unknown. +func (p *parser) processUserCallArg(funcName string, arg Expr, index int) { + if varExpr, ok := arg.(*VarExpr); ok { + scope, varFuncName := p.getScope(varExpr.Name) + ref := p.varTypes[varFuncName][varExpr.Name].ref + if ref == varExpr { + // Only applies if this is the first reference to this + // variable (otherwise we know the type already) + p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index} + } + // Mark the last related varRef (the most recent one) as a + // call argument for later error handling + p.varRefs[len(p.varRefs)-1].isArg = true + } +} + +// Determine scope of given variable reference (and funcName if it's +// a local, otherwise empty string) +func (p *parser) getScope(name string) (VarScope, string) { + switch { + case p.locals[name]: + return ScopeLocal, p.funcName + case SpecialVarIndex(name) > 0: + return ScopeSpecial, "" + default: + return ScopeGlobal, "" + } +} + +// Record a variable (scalar) reference and return the *VarExpr (but +// VarExpr.Index won't be set till later) +func (p *parser) varRef(name string, pos Position) *VarExpr { + scope, funcName := p.getScope(name) + expr := &VarExpr{scope, 0, name} + p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos}) + info := p.varTypes[funcName][name] + if info.typ == typeUnknown { + p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0} + } + return expr +} + +// Record an array reference and return the *ArrayExpr (but +// ArrayExpr.Index won't be set till later) +func (p *parser) arrayRef(name string, pos Position) *ArrayExpr { + scope, funcName := p.getScope(name) + if scope == ScopeSpecial { + panic(p.errorf("can't use scalar %q as array", name)) + } + expr := &ArrayExpr{scope, 0, name} + p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos}) + info := p.varTypes[funcName][name] + if info.typ == typeUnknown { + p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0} + } + return expr +} + +// Print variable type information (for debugging) on p.debugWriter +func (p *parser) printVarTypes(prog *Program) { + fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars) + fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays) + funcNames := []string{} + for funcName := range p.varTypes { + funcNames = append(funcNames, funcName) + } + sort.Strings(funcNames) + for _, funcName := range funcNames { + if funcName != "" { + fmt.Fprintf(p.debugWriter, "function %s\n", funcName) + } else { + fmt.Fprintf(p.debugWriter, "globals\n") + } + varNames := []string{} + for name := range p.varTypes[funcName] { + varNames = append(varNames, name) + } + sort.Strings(varNames) + for _, name := range varNames { + info := p.varTypes[funcName][name] + fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info) + } + } +} + +// If we can't finish resolving after this many iterations, give up +const maxResolveIterations = 10000 + +// Resolve unknown variables types and generate variable indexes and +// name-to-index mappings for interpreter +func (p *parser) resolveVars(prog *Program) { + // First go through all unknown types and try to determine the + // type from the parameter type in that function definition. May + // need multiple passes depending on the order of functions. This + // is not particularly efficient, but on realistic programs it's + // not an issue. + for i := 0; ; i++ { + progressed := false + for funcName, infos := range p.varTypes { + for name, info := range infos { + if info.scope == ScopeSpecial || info.typ != typeUnknown { + // It's a special var or type is already known + continue + } + funcIndex, ok := p.functions[info.callName] + if !ok { + // Function being called is a native function + continue + } + // Determine var type based on type of this parameter + // in the called function (if we know that) + paramName := prog.Functions[funcIndex].Params[info.argIndex] + typ := p.varTypes[info.callName][paramName].typ + if typ != typeUnknown { + if p.debugTypes { + fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n", + funcName, name, typ) + } + info.typ = typ + p.varTypes[funcName][name] = info + progressed = true + } + } + } + if !progressed { + // If we didn't progress we're done (or trying again is + // not going to help) + break + } + if i >= maxResolveIterations { + panic(p.errorf("too many iterations trying to resolve variable types")) + } + } + + // Resolve global variables (iteration order is undefined, so + // assign indexes basically randomly) + prog.Scalars = make(map[string]int) + prog.Arrays = make(map[string]int) + for name, info := range p.varTypes[""] { + _, isFunc := p.functions[name] + if isFunc { + // Global var can't also be the name of a function + panic(p.errorf("global var %q can't also be a function", name)) + } + var index int + if info.scope == ScopeSpecial { + index = SpecialVarIndex(name) + } else if info.typ == typeArray { + index = len(prog.Arrays) + prog.Arrays[name] = index + } else { + index = len(prog.Scalars) + prog.Scalars[name] = index + } + info.index = index + p.varTypes[""][name] = info + } + + // Fill in unknown parameter types that are being called with arrays, + // for example, as in the following code: + // + // BEGIN { arr[0]; f(arr) } + // function f(a) { } + for _, c := range p.userCalls { + if c.call.Native { + continue + } + function := prog.Functions[c.call.Index] + for i, arg := range c.call.Args { + varExpr, ok := arg.(*VarExpr) + if !ok { + continue + } + funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) + argType := p.varTypes[funcName][varExpr.Name] + paramType := p.varTypes[function.Name][function.Params[i]] + if argType.typ == typeArray && paramType.typ == typeUnknown { + paramType.typ = argType.typ + p.varTypes[function.Name][function.Params[i]] = paramType + } + } + } + + // Resolve local variables (assign indexes in order of params). + // Also patch up Function.Arrays (tells interpreter which args + // are arrays). + for funcName, infos := range p.varTypes { + if funcName == "" { + continue + } + scalarIndex := 0 + arrayIndex := 0 + functionIndex := p.functions[funcName] + function := prog.Functions[functionIndex] + arrays := make([]bool, len(function.Params)) + for i, name := range function.Params { + info := infos[name] + var index int + if info.typ == typeArray { + index = arrayIndex + arrayIndex++ + arrays[i] = true + } else { + // typeScalar or typeUnknown: variables may still be + // of unknown type if they've never been referenced -- + // default to scalar in that case + index = scalarIndex + scalarIndex++ + } + info.index = index + p.varTypes[funcName][name] = info + } + prog.Functions[functionIndex].Arrays = arrays + } + + // Check that variables passed to functions are the correct type + for _, c := range p.userCalls { + // Check native function calls + if c.call.Native { + for _, arg := range c.call.Args { + varExpr, ok := arg.(*VarExpr) + if !ok { + // Non-variable expression, must be scalar + continue + } + funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) + info := p.varTypes[funcName][varExpr.Name] + if info.typ == typeArray { + panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name)) + } + } + continue + } + + // Check AWK function calls + function := prog.Functions[c.call.Index] + for i, arg := range c.call.Args { + varExpr, ok := arg.(*VarExpr) + if !ok { + if function.Arrays[i] { + panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg)) + } + continue + } + funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc) + info := p.varTypes[funcName][varExpr.Name] + if info.typ == typeArray && !function.Arrays[i] { + panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name)) + } + if info.typ != typeArray && function.Arrays[i] { + panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name)) + } + } + } + + if p.debugTypes { + p.printVarTypes(prog) + } + + // Patch up variable indexes (interpreter uses an index instead + // of name for more efficient lookups) + for _, varRef := range p.varRefs { + info := p.varTypes[varRef.funcName][varRef.ref.Name] + if info.typ == typeArray && !varRef.isArg { + panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name)) + } + varRef.ref.Index = info.index + } + for _, arrayRef := range p.arrayRefs { + info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name] + if info.typ == typeScalar { + panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name)) + } + arrayRef.ref.Index = info.index + } +} + +// If name refers to a local (in function inFunc), return that +// function's name, otherwise return "" (meaning global). +func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string { + if inFunc == "" { + return "" + } + for _, param := range prog.Functions[p.functions[inFunc]].Params { + if name == param { + return inFunc + } + } + return "" +} + +// Record a "multi expression" (comma-separated pseudo-expression +// used to allow commas around print/printf arguments). +func (p *parser) multiExpr(exprs []Expr, pos Position) Expr { + expr := &MultiExpr{exprs} + p.multiExprs[expr] = pos + return expr +} + +// Mark the multi expression as used (by a print/printf statement). +func (p *parser) useMultiExpr(expr *MultiExpr) { + delete(p.multiExprs, expr) +} + +// Check that there are no unused multi expressions (syntax error). +func (p *parser) checkMultiExprs() { + if len(p.multiExprs) == 0 { + return + } + // Show error on first comma-separated expression + min := Position{1000000000, 1000000000} + for _, pos := range p.multiExprs { + if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) { + min = pos + } + } + panic(p.posErrorf(min, "unexpected comma-separated expression")) +} diff --git a/play/vendor/modules.txt b/play/vendor/modules.txt new file mode 100644 index 0000000..ab9da8b --- /dev/null +++ b/play/vendor/modules.txt @@ -0,0 +1,6 @@ +# github.com/benhoyt/goawk v1.13.0 +## explicit; go 1.13 +github.com/benhoyt/goawk/internal/ast +github.com/benhoyt/goawk/interp +github.com/benhoyt/goawk/lexer +github.com/benhoyt/goawk/parser