play: Update to latest Go and GoAWK versions

And vendor in dependencies, as is appropriate for end-products.
This commit is contained in:
Alex Palaistras 2022-01-22 14:54:50 +00:00
parent 5500c9c667
commit ec3cfbdf0e
14 changed files with 5764 additions and 4 deletions

View File

@ -1,5 +1,5 @@
module github.com/deuill/grawkit/play
go 1.12
go 1.17
require github.com/benhoyt/goawk v1.6.0
require github.com/benhoyt/goawk v1.13.0

View File

@ -1,2 +1,2 @@
github.com/benhoyt/goawk v1.6.0 h1:6oHKBL2BAvYiKroi8RhmpnhyvMGeiW5u/WEaxyOcKRQ=
github.com/benhoyt/goawk v1.6.0/go.mod h1:krl47rWeW8s+kD3dtHYm6aq4MBGRzQD5PGkZaRm38Uk=
github.com/benhoyt/goawk v1.13.0 h1:/Iu42ErHsT5vHrpWyewpI98hB2PHBk66o+oLZs4drPs=
github.com/benhoyt/goawk v1.13.0/go.mod h1:UKzPyqDh9O7HZ/ftnU33MYlAP2rPbXdwQ+OVlEOPsjM=

21
play/vendor/github.com/benhoyt/goawk/LICENSE.txt generated vendored Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 Ben Hoyt
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,557 @@
// GoAWK parser - abstract syntax tree structs
package ast
import (
"fmt"
"strconv"
"strings"
. "github.com/benhoyt/goawk/lexer"
)
// Stmts is a block containing multiple statements.
type Stmts []Stmt
func (ss Stmts) String() string {
lines := []string{}
for _, s := range ss {
subLines := strings.Split(s.String(), "\n")
for _, sl := range subLines {
lines = append(lines, " "+sl+"\n")
}
}
return strings.Join(lines, "")
}
// Action is pattern-action section of a program.
type Action struct {
Pattern []Expr
Stmts Stmts
}
func (a *Action) String() string {
patterns := make([]string, len(a.Pattern))
for i, p := range a.Pattern {
patterns[i] = p.String()
}
sep := ""
if len(patterns) > 0 && a.Stmts != nil {
sep = " "
}
stmtsStr := ""
if a.Stmts != nil {
stmtsStr = "{\n" + a.Stmts.String() + "}"
}
return strings.Join(patterns, ", ") + sep + stmtsStr
}
// Expr is the abstract syntax tree for any AWK expression.
type Expr interface {
expr()
String() string
}
// All these types implement the Expr interface.
func (e *FieldExpr) expr() {}
func (e *UnaryExpr) expr() {}
func (e *BinaryExpr) expr() {}
func (e *ArrayExpr) expr() {}
func (e *InExpr) expr() {}
func (e *CondExpr) expr() {}
func (e *NumExpr) expr() {}
func (e *StrExpr) expr() {}
func (e *RegExpr) expr() {}
func (e *VarExpr) expr() {}
func (e *IndexExpr) expr() {}
func (e *AssignExpr) expr() {}
func (e *AugAssignExpr) expr() {}
func (e *IncrExpr) expr() {}
func (e *CallExpr) expr() {}
func (e *UserCallExpr) expr() {}
func (e *MultiExpr) expr() {}
func (e *GetlineExpr) expr() {}
// FieldExpr is an expression like $0.
type FieldExpr struct {
Index Expr
}
func (e *FieldExpr) String() string {
return "$" + e.Index.String()
}
// UnaryExpr is an expression like -1234.
type UnaryExpr struct {
Op Token
Value Expr
}
func (e *UnaryExpr) String() string {
return e.Op.String() + e.Value.String()
}
// BinaryExpr is an expression like 1 + 2.
type BinaryExpr struct {
Left Expr
Op Token
Right Expr
}
func (e *BinaryExpr) String() string {
var opStr string
if e.Op == CONCAT {
opStr = " "
} else {
opStr = " " + e.Op.String() + " "
}
return "(" + e.Left.String() + opStr + e.Right.String() + ")"
}
// ArrayExpr is an array reference. Not really a stand-alone
// expression, except as an argument to split() or a user function
// call.
type ArrayExpr struct {
Scope VarScope
Index int
Name string
}
func (e *ArrayExpr) String() string {
return e.Name
}
// InExpr is an expression like (index in array).
type InExpr struct {
Index []Expr
Array *ArrayExpr
}
func (e *InExpr) String() string {
if len(e.Index) == 1 {
return "(" + e.Index[0].String() + " in " + e.Array.String() + ")"
}
indices := make([]string, len(e.Index))
for i, index := range e.Index {
indices[i] = index.String()
}
return "((" + strings.Join(indices, ", ") + ") in " + e.Array.String() + ")"
}
// CondExpr is an expression like cond ? 1 : 0.
type CondExpr struct {
Cond Expr
True Expr
False Expr
}
func (e *CondExpr) String() string {
return "(" + e.Cond.String() + " ? " + e.True.String() + " : " + e.False.String() + ")"
}
// NumExpr is a literal number like 1234.
type NumExpr struct {
Value float64
}
func (e *NumExpr) String() string {
return fmt.Sprintf("%.6g", e.Value)
}
// StrExpr is a literal string like "foo".
type StrExpr struct {
Value string
}
func (e *StrExpr) String() string {
return strconv.Quote(e.Value)
}
// RegExpr is a stand-alone regex expression, equivalent to:
// $0 ~ /regex/.
type RegExpr struct {
Regex string
}
func (e *RegExpr) String() string {
escaped := strings.Replace(e.Regex, "/", `\/`, -1)
return "/" + escaped + "/"
}
type VarScope int
const (
ScopeSpecial VarScope = iota
ScopeGlobal
ScopeLocal
)
// VarExpr is a variable reference (special var, global, or local).
// Index is the resolved variable index used by the interpreter; Name
// is the original name used by String().
type VarExpr struct {
Scope VarScope
Index int
Name string
}
func (e *VarExpr) String() string {
return e.Name
}
// IndexExpr is an expression like a[k] (rvalue or lvalue).
type IndexExpr struct {
Array *ArrayExpr
Index []Expr
}
func (e *IndexExpr) String() string {
indices := make([]string, len(e.Index))
for i, index := range e.Index {
indices[i] = index.String()
}
return e.Array.String() + "[" + strings.Join(indices, ", ") + "]"
}
// AssignExpr is an expression like x = 1234.
type AssignExpr struct {
Left Expr // can be one of: var, array[x], $n
Right Expr
}
func (e *AssignExpr) String() string {
return e.Left.String() + " = " + e.Right.String()
}
// AugAssignExpr is an assignment expression like x += 5.
type AugAssignExpr struct {
Left Expr // can be one of: var, array[x], $n
Op Token
Right Expr
}
func (e *AugAssignExpr) String() string {
return e.Left.String() + " " + e.Op.String() + "= " + e.Right.String()
}
// IncrExpr is an increment or decrement expression like x++ or --y.
type IncrExpr struct {
Expr Expr
Op Token
Pre bool
}
func (e *IncrExpr) String() string {
if e.Pre {
return e.Op.String() + e.Expr.String()
} else {
return e.Expr.String() + e.Op.String()
}
}
// CallExpr is a builtin function call like length($1).
type CallExpr struct {
Func Token
Args []Expr
}
func (e *CallExpr) String() string {
args := make([]string, len(e.Args))
for i, a := range e.Args {
args[i] = a.String()
}
return e.Func.String() + "(" + strings.Join(args, ", ") + ")"
}
// UserCallExpr is a user-defined function call like my_func(1, 2, 3)
//
// Index is the resolved function index used by the interpreter; Name
// is the original name used by String().
type UserCallExpr struct {
Native bool // false = AWK-defined function, true = native Go func
Index int
Name string
Args []Expr
}
func (e *UserCallExpr) String() string {
args := make([]string, len(e.Args))
for i, a := range e.Args {
args[i] = a.String()
}
return e.Name + "(" + strings.Join(args, ", ") + ")"
}
// MultiExpr isn't an interpretable expression, but it's used as a
// pseudo-expression for print[f] parsing.
type MultiExpr struct {
Exprs []Expr
}
func (e *MultiExpr) String() string {
exprs := make([]string, len(e.Exprs))
for i, e := range e.Exprs {
exprs[i] = e.String()
}
return "(" + strings.Join(exprs, ", ") + ")"
}
// GetlineExpr is an expression read from file or pipe input.
type GetlineExpr struct {
Command Expr
Target Expr
File Expr
}
func (e *GetlineExpr) String() string {
s := ""
if e.Command != nil {
s += e.Command.String() + " |"
}
s += "getline"
if e.Target != nil {
s += " " + e.Target.String()
}
if e.File != nil {
s += " <" + e.File.String()
}
return s
}
// IsLValue returns true if the given expression can be used as an
// lvalue (on the left-hand side of an assignment, in a ++ or --
// operation, or as the third argument to sub or gsub).
func IsLValue(expr Expr) bool {
switch expr.(type) {
case *VarExpr, *IndexExpr, *FieldExpr:
return true
default:
return false
}
}
// Stmt is the abstract syntax tree for any AWK statement.
type Stmt interface {
stmt()
String() string
}
// All these types implement the Stmt interface.
func (s *PrintStmt) stmt() {}
func (s *PrintfStmt) stmt() {}
func (s *ExprStmt) stmt() {}
func (s *IfStmt) stmt() {}
func (s *ForStmt) stmt() {}
func (s *ForInStmt) stmt() {}
func (s *WhileStmt) stmt() {}
func (s *DoWhileStmt) stmt() {}
func (s *BreakStmt) stmt() {}
func (s *ContinueStmt) stmt() {}
func (s *NextStmt) stmt() {}
func (s *ExitStmt) stmt() {}
func (s *DeleteStmt) stmt() {}
func (s *ReturnStmt) stmt() {}
func (s *BlockStmt) stmt() {}
// PrintStmt is a statement like print $1, $3.
type PrintStmt struct {
Args []Expr
Redirect Token
Dest Expr
}
func (s *PrintStmt) String() string {
return printString("print", s.Args, s.Redirect, s.Dest)
}
func printString(f string, args []Expr, redirect Token, dest Expr) string {
parts := make([]string, len(args))
for i, a := range args {
parts[i] = a.String()
}
str := f + " " + strings.Join(parts, ", ")
if dest != nil {
str += " " + redirect.String() + dest.String()
}
return str
}
// PrintfStmt is a statement like printf "%3d", 1234.
type PrintfStmt struct {
Args []Expr
Redirect Token
Dest Expr
}
func (s *PrintfStmt) String() string {
return printString("printf", s.Args, s.Redirect, s.Dest)
}
// ExprStmt is statement like a bare function call: my_func(x).
type ExprStmt struct {
Expr Expr
}
func (s *ExprStmt) String() string {
return s.Expr.String()
}
// IfStmt is an if or if-else statement.
type IfStmt struct {
Cond Expr
Body Stmts
Else Stmts
}
func (s *IfStmt) String() string {
str := "if (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
if len(s.Else) > 0 {
str += " else {\n" + s.Else.String() + "}"
}
return str
}
// ForStmt is a C-like for loop: for (i=0; i<10; i++) print i.
type ForStmt struct {
Pre Stmt
Cond Expr
Post Stmt
Body Stmts
}
func (s *ForStmt) String() string {
preStr := ""
if s.Pre != nil {
preStr = s.Pre.String()
}
condStr := ""
if s.Cond != nil {
condStr = " " + trimParens(s.Cond.String())
}
postStr := ""
if s.Post != nil {
postStr = " " + s.Post.String()
}
return "for (" + preStr + ";" + condStr + ";" + postStr + ") {\n" + s.Body.String() + "}"
}
// ForInStmt is a for loop like for (k in a) print k, a[k].
type ForInStmt struct {
Var *VarExpr
Array *ArrayExpr
Body Stmts
}
func (s *ForInStmt) String() string {
return "for (" + s.Var.String() + " in " + s.Array.String() + ") {\n" + s.Body.String() + "}"
}
// WhileStmt is a while loop.
type WhileStmt struct {
Cond Expr
Body Stmts
}
func (s *WhileStmt) String() string {
return "while (" + trimParens(s.Cond.String()) + ") {\n" + s.Body.String() + "}"
}
// DoWhileStmt is a do-while loop.
type DoWhileStmt struct {
Body Stmts
Cond Expr
}
func (s *DoWhileStmt) String() string {
return "do {\n" + s.Body.String() + "} while (" + trimParens(s.Cond.String()) + ")"
}
// BreakStmt is a break statement.
type BreakStmt struct{}
func (s *BreakStmt) String() string {
return "break"
}
// ContinueStmt is a continue statement.
type ContinueStmt struct{}
func (s *ContinueStmt) String() string {
return "continue"
}
// NextStmt is a next statement.
type NextStmt struct{}
func (s *NextStmt) String() string {
return "next"
}
// ExitStmt is an exit statement.
type ExitStmt struct {
Status Expr
}
func (s *ExitStmt) String() string {
var statusStr string
if s.Status != nil {
statusStr = " " + s.Status.String()
}
return "exit" + statusStr
}
// DeleteStmt is a statement like delete a[k].
type DeleteStmt struct {
Array *ArrayExpr
Index []Expr
}
func (s *DeleteStmt) String() string {
indices := make([]string, len(s.Index))
for i, index := range s.Index {
indices[i] = index.String()
}
return "delete " + s.Array.String() + "[" + strings.Join(indices, ", ") + "]"
}
// ReturnStmt is a return statement.
type ReturnStmt struct {
Value Expr
}
func (s *ReturnStmt) String() string {
var valueStr string
if s.Value != nil {
valueStr = " " + s.Value.String()
}
return "return" + valueStr
}
// BlockStmt is a stand-alone block like { print "x" }.
type BlockStmt struct {
Body Stmts
}
func (s *BlockStmt) String() string {
return "{\n" + s.Body.String() + "}"
}
// Function is the AST for a user-defined function.
type Function struct {
Name string
Params []string
Arrays []bool
Body Stmts
}
func (f *Function) String() string {
return "function " + f.Name + "(" + strings.Join(f.Params, ", ") + ") {\n" +
f.Body.String() + "}"
}
func trimParens(s string) string {
if strings.HasPrefix(s, "(") && strings.HasSuffix(s, ")") {
s = s[1 : len(s)-1]
}
return s
}

View File

@ -0,0 +1,48 @@
// Special variable constants
package ast
const (
V_ILLEGAL = iota
V_ARGC
V_CONVFMT
V_FILENAME
V_FNR
V_FS
V_NF
V_NR
V_OFMT
V_OFS
V_ORS
V_RLENGTH
V_RS
V_RSTART
V_RT
V_SUBSEP
V_LAST = V_SUBSEP
)
var specialVars = map[string]int{
"ARGC": V_ARGC,
"CONVFMT": V_CONVFMT,
"FILENAME": V_FILENAME,
"FNR": V_FNR,
"FS": V_FS,
"NF": V_NF,
"NR": V_NR,
"OFMT": V_OFMT,
"OFS": V_OFS,
"ORS": V_ORS,
"RLENGTH": V_RLENGTH,
"RS": V_RS,
"RSTART": V_RSTART,
"RT": V_RT,
"SUBSEP": V_SUBSEP,
}
// SpecialVarIndex returns the "index" of the special variable, or 0
// if it's not a special variable.
func SpecialVarIndex(name string) int {
return specialVars[name]
}

View File

@ -0,0 +1,789 @@
// Evaluate builtin and user-defined function calls
package interp
import (
"bytes"
"errors"
"fmt"
"io"
"math"
"os/exec"
"reflect"
"sort"
"strconv"
"strings"
"time"
"unicode/utf8"
. "github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
)
// Call builtin function specified by "op" with given args
func (p *interp) callBuiltin(op Token, argExprs []Expr) (value, error) {
// split() has an array arg (not evaluated) and [g]sub() have an
// lvalue arg, so handle them as special cases
switch op {
case F_SPLIT:
strValue, err := p.eval(argExprs[0])
if err != nil {
return null(), err
}
str := p.toString(strValue)
var fieldSep string
if len(argExprs) == 3 {
sepValue, err := p.eval(argExprs[2])
if err != nil {
return null(), err
}
fieldSep = p.toString(sepValue)
} else {
fieldSep = p.fieldSep
}
arrayExpr := argExprs[1].(*ArrayExpr)
n, err := p.split(str, arrayExpr.Scope, arrayExpr.Index, fieldSep)
if err != nil {
return null(), err
}
return num(float64(n)), nil
case F_SUB, F_GSUB:
regexValue, err := p.eval(argExprs[0])
if err != nil {
return null(), err
}
regex := p.toString(regexValue)
replValue, err := p.eval(argExprs[1])
if err != nil {
return null(), err
}
repl := p.toString(replValue)
var in string
if len(argExprs) == 3 {
inValue, err := p.eval(argExprs[2])
if err != nil {
return null(), err
}
in = p.toString(inValue)
} else {
in = p.line
}
out, n, err := p.sub(regex, repl, in, op == F_GSUB)
if err != nil {
return null(), err
}
if len(argExprs) == 3 {
err := p.assign(argExprs[2], str(out))
if err != nil {
return null(), err
}
} else {
p.setLine(out, true)
}
return num(float64(n)), nil
}
// Now evaluate the argExprs (calls with up to 7 args don't
// require heap allocation)
args := make([]value, 0, 7)
for _, a := range argExprs {
arg, err := p.eval(a)
if err != nil {
return null(), err
}
args = append(args, arg)
}
// Then switch on the function for the ordinary functions
switch op {
case F_LENGTH:
var s string
if len(args) > 0 {
s = p.toString(args[0])
} else {
s = p.line
}
var n int
if p.bytes {
n = len(s)
} else {
n = utf8.RuneCountInString(s)
}
return num(float64(n)), nil
case F_MATCH:
re, err := p.compileRegex(p.toString(args[1]))
if err != nil {
return null(), err
}
s := p.toString(args[0])
loc := re.FindStringIndex(s)
if loc == nil {
p.matchStart = 0
p.matchLength = -1
return num(0), nil
}
if p.bytes {
p.matchStart = loc[0] + 1
p.matchLength = loc[1] - loc[0]
} else {
p.matchStart = utf8.RuneCountInString(s[:loc[0]]) + 1
p.matchLength = utf8.RuneCountInString(s[loc[0]:loc[1]])
}
return num(float64(p.matchStart)), nil
case F_SUBSTR:
s := p.toString(args[0])
pos := int(args[1].num())
if p.bytes {
if pos > len(s) {
pos = len(s) + 1
}
if pos < 1 {
pos = 1
}
maxLength := len(s) - pos + 1
length := maxLength
if len(args) == 3 {
length = int(args[2].num())
if length < 0 {
length = 0
}
if length > maxLength {
length = maxLength
}
}
return str(s[pos-1 : pos-1+length]), nil
} else {
// Count characters till we get to pos.
chars := 1
start := 0
for start = range s {
chars++
if chars > pos {
break
}
}
if pos >= chars {
start = len(s)
}
// Count characters from start till we reach length.
var end int
if len(args) == 3 {
length := int(args[2].num())
chars = 0
for end = range s[start:] {
chars++
if chars > length {
break
}
}
if length >= chars {
end = len(s)
} else {
end += start
}
} else {
end = len(s)
}
return str(s[start:end]), nil
}
case F_SPRINTF:
s, err := p.sprintf(p.toString(args[0]), args[1:])
if err != nil {
return null(), err
}
return str(s), nil
case F_INDEX:
s := p.toString(args[0])
substr := p.toString(args[1])
index := strings.Index(s, substr)
if p.bytes {
return num(float64(index + 1)), nil
} else {
if index < 0 {
return num(float64(0)), nil
}
index = utf8.RuneCountInString(s[:index])
return num(float64(index + 1)), nil
}
case F_TOLOWER:
return str(strings.ToLower(p.toString(args[0]))), nil
case F_TOUPPER:
return str(strings.ToUpper(p.toString(args[0]))), nil
case F_ATAN2:
return num(math.Atan2(args[0].num(), args[1].num())), nil
case F_COS:
return num(math.Cos(args[0].num())), nil
case F_EXP:
return num(math.Exp(args[0].num())), nil
case F_INT:
return num(float64(int(args[0].num()))), nil
case F_LOG:
return num(math.Log(args[0].num())), nil
case F_SQRT:
return num(math.Sqrt(args[0].num())), nil
case F_RAND:
return num(p.random.Float64()), nil
case F_SIN:
return num(math.Sin(args[0].num())), nil
case F_SRAND:
prevSeed := p.randSeed
switch len(args) {
case 0:
p.random.Seed(time.Now().UnixNano())
case 1:
p.randSeed = args[0].num()
p.random.Seed(int64(math.Float64bits(p.randSeed)))
}
return num(prevSeed), nil
case F_SYSTEM:
if p.noExec {
return null(), newError("can't call system() due to NoExec")
}
cmdline := p.toString(args[0])
cmd := p.execShell(cmdline)
cmd.Stdout = p.output
cmd.Stderr = p.errorOutput
_ = p.flushAll() // ensure synchronization
err := cmd.Start()
if err != nil {
p.printErrorf("%s\n", err)
return num(-1), nil
}
err = cmd.Wait()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
code := exitErr.ProcessState.ExitCode()
return num(float64(code)), nil
} else {
p.printErrorf("unexpected error running command %q: %v\n", cmdline, err)
return num(-1), nil
}
}
return num(0), nil
case F_CLOSE:
name := p.toString(args[0])
var c io.Closer = p.inputStreams[name]
if c != nil {
// Close input stream
delete(p.inputStreams, name)
err := c.Close()
if err != nil {
return num(-1), nil
}
return num(0), nil
}
c = p.outputStreams[name]
if c != nil {
// Close output stream
delete(p.outputStreams, name)
err := c.Close()
if err != nil {
return num(-1), nil
}
return num(0), nil
}
// Nothing to close
return num(-1), nil
case F_FFLUSH:
var name string
if len(args) > 0 {
name = p.toString(args[0])
}
var ok bool
if name != "" {
// Flush a single, named output stream
ok = p.flushStream(name)
} else {
// fflush() or fflush("") flushes all output streams
ok = p.flushAll()
}
if !ok {
return num(-1), nil
}
return num(0), nil
default:
// Shouldn't happen
panic(fmt.Sprintf("unexpected function: %s", op))
}
}
// Executes code using configured system shell
func (p *interp) execShell(code string) *exec.Cmd {
executable := p.shellCommand[0]
args := p.shellCommand[1:]
args = append(args, code)
cmd := exec.Command(executable, args...)
return cmd
}
// Call user-defined function with given index and arguments, return
// its return value (or null value if it doesn't return anything)
func (p *interp) callUser(index int, args []Expr) (value, error) {
f := p.program.Functions[index]
if p.callDepth >= maxCallDepth {
return null(), newError("calling %q exceeded maximum call depth of %d", f.Name, maxCallDepth)
}
// Evaluate the arguments and push them onto the locals stack
oldFrame := p.frame
newFrameStart := len(p.stack)
var arrays []int
for i, arg := range args {
if f.Arrays[i] {
a := arg.(*VarExpr)
arrays = append(arrays, p.getArrayIndex(a.Scope, a.Index))
} else {
argValue, err := p.eval(arg)
if err != nil {
return null(), err
}
p.stack = append(p.stack, argValue)
}
}
// Push zero value for any additional parameters (it's valid to
// call a function with fewer arguments than it has parameters)
oldArraysLen := len(p.arrays)
for i := len(args); i < len(f.Params); i++ {
if f.Arrays[i] {
arrays = append(arrays, len(p.arrays))
p.arrays = append(p.arrays, make(map[string]value))
} else {
p.stack = append(p.stack, null())
}
}
p.frame = p.stack[newFrameStart:]
p.localArrays = append(p.localArrays, arrays)
// Execute the function!
p.callDepth++
err := p.executes(f.Body)
p.callDepth--
// Pop the locals off the stack
p.stack = p.stack[:newFrameStart]
p.frame = oldFrame
p.localArrays = p.localArrays[:len(p.localArrays)-1]
p.arrays = p.arrays[:oldArraysLen]
if r, ok := err.(returnValue); ok {
return r.Value, nil
}
if err != nil {
return null(), err
}
return null(), nil
}
// Call native-defined function with given name and arguments, return
// its return value (or null value if it doesn't return anything).
func (p *interp) callNative(index int, args []Expr) (value, error) {
f := p.nativeFuncs[index]
minIn := len(f.in) // Minimum number of args we should pass
var variadicType reflect.Type
if f.isVariadic {
variadicType = f.in[len(f.in)-1].Elem()
minIn--
}
// Build list of args to pass to function
values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation
for i, arg := range args {
a, err := p.eval(arg)
if err != nil {
return null(), err
}
var argType reflect.Type
if !f.isVariadic || i < len(f.in)-1 {
argType = f.in[i]
} else {
// Final arg(s) when calling a variadic are all of this type
argType = variadicType
}
values = append(values, p.toNative(a, argType))
}
// Use zero value for any unspecified args
for i := len(args); i < minIn; i++ {
values = append(values, reflect.Zero(f.in[i]))
}
// Call Go function, determine return value
outs := f.value.Call(values)
switch len(outs) {
case 0:
// No return value, return null value to AWK
return null(), nil
case 1:
// Single return value
return fromNative(outs[0]), nil
case 2:
// Two-valued return of (scalar, error)
if !outs[1].IsNil() {
return null(), outs[1].Interface().(error)
}
return fromNative(outs[0]), nil
default:
// Should never happen (checked at parse time)
panic(fmt.Sprintf("unexpected number of return values: %d", len(outs)))
}
}
// Convert from an AWK value to a native Go value
func (p *interp) toNative(v value, typ reflect.Type) reflect.Value {
switch typ.Kind() {
case reflect.Bool:
return reflect.ValueOf(v.boolean())
case reflect.Int:
return reflect.ValueOf(int(v.num()))
case reflect.Int8:
return reflect.ValueOf(int8(v.num()))
case reflect.Int16:
return reflect.ValueOf(int16(v.num()))
case reflect.Int32:
return reflect.ValueOf(int32(v.num()))
case reflect.Int64:
return reflect.ValueOf(int64(v.num()))
case reflect.Uint:
return reflect.ValueOf(uint(v.num()))
case reflect.Uint8:
return reflect.ValueOf(uint8(v.num()))
case reflect.Uint16:
return reflect.ValueOf(uint16(v.num()))
case reflect.Uint32:
return reflect.ValueOf(uint32(v.num()))
case reflect.Uint64:
return reflect.ValueOf(uint64(v.num()))
case reflect.Float32:
return reflect.ValueOf(float32(v.num()))
case reflect.Float64:
return reflect.ValueOf(v.num())
case reflect.String:
return reflect.ValueOf(p.toString(v))
case reflect.Slice:
if typ.Elem().Kind() != reflect.Uint8 {
// Shouldn't happen: prevented by checkNativeFunc
panic(fmt.Sprintf("unexpected argument slice: %s", typ.Elem().Kind()))
}
return reflect.ValueOf([]byte(p.toString(v)))
default:
// Shouldn't happen: prevented by checkNativeFunc
panic(fmt.Sprintf("unexpected argument type: %s", typ.Kind()))
}
}
// Convert from a native Go value to an AWK value
func fromNative(v reflect.Value) value {
switch v.Kind() {
case reflect.Bool:
return boolean(v.Bool())
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return num(float64(v.Int()))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return num(float64(v.Uint()))
case reflect.Float32, reflect.Float64:
return num(v.Float())
case reflect.String:
return str(v.String())
case reflect.Slice:
if b, ok := v.Interface().([]byte); ok {
return str(string(b))
}
// Shouldn't happen: prevented by checkNativeFunc
panic(fmt.Sprintf("unexpected return slice: %s", v.Type().Elem().Kind()))
default:
// Shouldn't happen: prevented by checkNativeFunc
panic(fmt.Sprintf("unexpected return type: %s", v.Kind()))
}
}
// Used for caching native function type information on init
type nativeFunc struct {
isVariadic bool
in []reflect.Type
value reflect.Value
}
// Check and initialize native functions
func (p *interp) initNativeFuncs(funcs map[string]interface{}) error {
for name, f := range funcs {
err := checkNativeFunc(name, f)
if err != nil {
return err
}
}
// Sort functions by name, then use those indexes to build slice
// (this has to match how the parser sets the indexes).
names := make([]string, 0, len(funcs))
for name := range funcs {
names = append(names, name)
}
sort.Strings(names)
p.nativeFuncs = make([]nativeFunc, len(names))
for i, name := range names {
f := funcs[name]
typ := reflect.TypeOf(f)
in := make([]reflect.Type, typ.NumIn())
for j := 0; j < len(in); j++ {
in[j] = typ.In(j)
}
p.nativeFuncs[i] = nativeFunc{
isVariadic: typ.IsVariadic(),
in: in,
value: reflect.ValueOf(f),
}
}
return nil
}
// Got this trick from the Go stdlib text/template source
var errorType = reflect.TypeOf((*error)(nil)).Elem()
// Check that native function with given name is okay to call from
// AWK, return an *interp.Error if not. This checks that f is actually
// a function, and that its parameter and return types are good.
func checkNativeFunc(name string, f interface{}) error {
if KeywordToken(name) != ILLEGAL {
return newError("can't use keyword %q as native function name", name)
}
typ := reflect.TypeOf(f)
if typ.Kind() != reflect.Func {
return newError("native function %q is not a function", name)
}
for i := 0; i < typ.NumIn(); i++ {
param := typ.In(i)
if typ.IsVariadic() && i == typ.NumIn()-1 {
param = param.Elem()
}
if !validNativeType(param) {
return newError("native function %q param %d is not int or string", name, i)
}
}
switch typ.NumOut() {
case 0:
// No return value is fine
case 1:
// Single scalar return value is fine
if !validNativeType(typ.Out(0)) {
return newError("native function %q return value is not int or string", name)
}
case 2:
// Returning (scalar, error) is handled too
if !validNativeType(typ.Out(0)) {
return newError("native function %q first return value is not int or string", name)
}
if typ.Out(1) != errorType {
return newError("native function %q second return value is not an error", name)
}
default:
return newError("native function %q returns more than two values", name)
}
return nil
}
// Return true if typ is a valid parameter or return type.
func validNativeType(typ reflect.Type) bool {
switch typ.Kind() {
case reflect.Bool:
return true
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return true
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
return true
case reflect.Float32, reflect.Float64:
return true
case reflect.String:
return true
case reflect.Slice:
// Only allow []byte (convert to string in AWK)
return typ.Elem().Kind() == reflect.Uint8
default:
return false
}
}
// Guts of the split() function
func (p *interp) split(s string, scope VarScope, index int, fs string) (int, error) {
var parts []string
if fs == " " {
parts = strings.Fields(s)
} else if s == "" {
// NF should be 0 on empty line
} else if utf8.RuneCountInString(fs) <= 1 {
parts = strings.Split(s, fs)
} else {
re, err := p.compileRegex(fs)
if err != nil {
return 0, err
}
parts = re.Split(s, -1)
}
array := make(map[string]value, len(parts))
for i, part := range parts {
array[strconv.Itoa(i+1)] = numStr(part)
}
p.arrays[p.getArrayIndex(scope, index)] = array
return len(array), nil
}
// Guts of the sub() and gsub() functions
func (p *interp) sub(regex, repl, in string, global bool) (out string, num int, err error) {
re, err := p.compileRegex(regex)
if err != nil {
return "", 0, err
}
count := 0
out = re.ReplaceAllStringFunc(in, func(s string) string {
// Only do the first replacement for sub(), or all for gsub()
if !global && count > 0 {
return s
}
count++
// Handle & (ampersand) properly in replacement string
r := make([]byte, 0, 64) // Up to 64 byte replacement won't require heap allocation
for i := 0; i < len(repl); i++ {
switch repl[i] {
case '&':
r = append(r, s...)
case '\\':
i++
if i < len(repl) {
switch repl[i] {
case '&':
r = append(r, '&')
case '\\':
r = append(r, '\\')
default:
r = append(r, '\\', repl[i])
}
} else {
r = append(r, '\\')
}
default:
r = append(r, repl[i])
}
}
return string(r)
})
return out, count, nil
}
type cachedFormat struct {
format string
types []byte
}
// Parse given sprintf format string into Go format string, along with
// type conversion specifiers. Output is memoized in a simple cache
// for performance.
func (p *interp) parseFmtTypes(s string) (format string, types []byte, err error) {
if item, ok := p.formatCache[s]; ok {
return item.format, item.types, nil
}
out := []byte(s)
for i := 0; i < len(s); i++ {
if s[i] == '%' {
i++
if i >= len(s) {
return "", nil, errors.New("expected type specifier after %")
}
if s[i] == '%' {
continue
}
for i < len(s) && bytes.IndexByte([]byte(" .-+*#0123456789"), s[i]) >= 0 {
if s[i] == '*' {
types = append(types, 'd')
}
i++
}
if i >= len(s) {
return "", nil, errors.New("expected type specifier after %")
}
var t byte
switch s[i] {
case 's':
t = 's'
case 'd', 'i', 'o', 'x', 'X':
t = 'd'
case 'f', 'e', 'E', 'g', 'G':
t = 'f'
case 'u':
t = 'u'
out[i] = 'd'
case 'c':
t = 'c'
out[i] = 's'
default:
return "", nil, fmt.Errorf("invalid format type %q", s[i])
}
types = append(types, t)
}
}
// Dumb, non-LRU cache: just cache the first N formats
format = string(out)
if len(p.formatCache) < maxCachedFormats {
p.formatCache[s] = cachedFormat{format, types}
}
return format, types, nil
}
// Guts of sprintf() function (also used by "printf" statement)
func (p *interp) sprintf(format string, args []value) (string, error) {
format, types, err := p.parseFmtTypes(format)
if err != nil {
return "", newError("format error: %s", err)
}
if len(types) > len(args) {
return "", newError("format error: got %d args, expected %d", len(args), len(types))
}
converted := make([]interface{}, len(types))
for i, t := range types {
a := args[i]
var v interface{}
switch t {
case 's':
v = p.toString(a)
case 'd':
v = int(a.num())
case 'f':
v = a.num()
case 'u':
v = uint32(a.num())
case 'c':
var c []byte
n, isStr := a.isTrueStr()
if isStr {
s := p.toString(a)
if len(s) > 0 {
c = []byte{s[0]}
} else {
c = []byte{0}
}
} else {
// Follow the behaviour of awk and mawk, where %c
// operates on bytes (0-255), not Unicode codepoints
c = []byte{byte(n)}
}
v = c
}
converted[i] = v
}
return fmt.Sprintf(format, converted...), nil
}

1369
play/vendor/github.com/benhoyt/goawk/interp/interp.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

561
play/vendor/github.com/benhoyt/goawk/interp/io.go generated vendored Normal file
View File

@ -0,0 +1,561 @@
// Input/output handling for GoAWK interpreter
package interp
import (
"bufio"
"bytes"
"fmt"
"io"
"io/ioutil"
"os"
"regexp"
"strconv"
"strings"
"unicode/utf8"
. "github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
)
// Print a line of output followed by a newline
func (p *interp) printLine(writer io.Writer, line string) error {
err := writeOutput(writer, line)
if err != nil {
return err
}
return writeOutput(writer, p.outputRecordSep)
}
// Implement a buffered version of WriteCloser so output is buffered
// when redirecting to a file (eg: print >"out")
type bufferedWriteCloser struct {
*bufio.Writer
io.Closer
}
func newBufferedWriteCloser(w io.WriteCloser) *bufferedWriteCloser {
writer := bufio.NewWriterSize(w, outputBufSize)
return &bufferedWriteCloser{writer, w}
}
func (wc *bufferedWriteCloser) Close() error {
err := wc.Writer.Flush()
if err != nil {
return err
}
return wc.Closer.Close()
}
// Determine the output stream for given redirect token and
// destination (file or pipe name)
func (p *interp) getOutputStream(redirect Token, dest Expr) (io.Writer, error) {
if redirect == ILLEGAL {
// Token "ILLEGAL" means send to standard output
return p.output, nil
}
destValue, err := p.eval(dest)
if err != nil {
return nil, err
}
name := p.toString(destValue)
if _, ok := p.inputStreams[name]; ok {
return nil, newError("can't write to reader stream")
}
if w, ok := p.outputStreams[name]; ok {
return w, nil
}
switch redirect {
case GREATER, APPEND:
if name == "-" {
// filename of "-" means write to stdout, eg: print "x" >"-"
return p.output, nil
}
// Write or append to file
if p.noFileWrites {
return nil, newError("can't write to file due to NoFileWrites")
}
p.flushOutputAndError() // ensure synchronization
flags := os.O_CREATE | os.O_WRONLY
if redirect == GREATER {
flags |= os.O_TRUNC
} else {
flags |= os.O_APPEND
}
w, err := os.OpenFile(name, flags, 0644)
if err != nil {
return nil, newError("output redirection error: %s", err)
}
buffered := newBufferedWriteCloser(w)
p.outputStreams[name] = buffered
return buffered, nil
case PIPE:
// Pipe to command
if p.noExec {
return nil, newError("can't write to pipe due to NoExec")
}
cmd := p.execShell(name)
w, err := cmd.StdinPipe()
if err != nil {
return nil, newError("error connecting to stdin pipe: %v", err)
}
cmd.Stdout = p.output
cmd.Stderr = p.errorOutput
p.flushOutputAndError() // ensure synchronization
err = cmd.Start()
if err != nil {
p.printErrorf("%s\n", err)
return ioutil.Discard, nil
}
p.commands[name] = cmd
buffered := newBufferedWriteCloser(w)
p.outputStreams[name] = buffered
return buffered, nil
default:
// Should never happen
panic(fmt.Sprintf("unexpected redirect type %s", redirect))
}
}
// Get input Scanner to use for "getline" based on file name
func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
if _, ok := p.outputStreams[name]; ok {
return nil, newError("can't read from writer stream")
}
if _, ok := p.inputStreams[name]; ok {
return p.scanners[name], nil
}
if name == "-" {
// filename of "-" means read from stdin, eg: getline <"-"
if scanner, ok := p.scanners["-"]; ok {
return scanner, nil
}
scanner := p.newScanner(p.stdin)
p.scanners[name] = scanner
return scanner, nil
}
if p.noFileReads {
return nil, newError("can't read from file due to NoFileReads")
}
r, err := os.Open(name)
if err != nil {
return nil, err // *os.PathError is handled by caller (getline returns -1)
}
scanner := p.newScanner(r)
p.scanners[name] = scanner
p.inputStreams[name] = r
return scanner, nil
}
// Get input Scanner to use for "getline" based on pipe name
func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) {
if _, ok := p.outputStreams[name]; ok {
return nil, newError("can't read from writer stream")
}
if _, ok := p.inputStreams[name]; ok {
return p.scanners[name], nil
}
if p.noExec {
return nil, newError("can't read from pipe due to NoExec")
}
cmd := p.execShell(name)
cmd.Stdin = p.stdin
cmd.Stderr = p.errorOutput
r, err := cmd.StdoutPipe()
if err != nil {
return nil, newError("error connecting to stdout pipe: %v", err)
}
p.flushOutputAndError() // ensure synchronization
err = cmd.Start()
if err != nil {
p.printErrorf("%s\n", err)
return bufio.NewScanner(strings.NewReader("")), nil
}
scanner := p.newScanner(r)
p.commands[name] = cmd
p.inputStreams[name] = r
p.scanners[name] = scanner
return scanner, nil
}
// Create a new buffered Scanner for reading input records
func (p *interp) newScanner(input io.Reader) *bufio.Scanner {
scanner := bufio.NewScanner(input)
switch {
case p.recordSep == "\n":
// Scanner default is to split on newlines
case p.recordSep == "":
// Empty string for RS means split on \n\n (blank lines)
splitter := blankLineSplitter{&p.recordTerminator}
scanner.Split(splitter.scan)
case len(p.recordSep) == 1:
splitter := byteSplitter{p.recordSep[0]}
scanner.Split(splitter.scan)
case utf8.RuneCountInString(p.recordSep) >= 1:
// Multi-byte and single char but multi-byte RS use regex
splitter := regexSplitter{p.recordSepRegex, &p.recordTerminator}
scanner.Split(splitter.scan)
}
buffer := make([]byte, inputBufSize)
scanner.Buffer(buffer, maxRecordLength)
return scanner
}
// Copied from bufio/scan.go in the stdlib: I guess it's a bit more
// efficient than bytes.TrimSuffix(data, []byte("\r"))
func dropCR(data []byte) []byte {
if len(data) > 0 && data[len(data)-1] == '\r' {
return data[:len(data)-1]
}
return data
}
func dropLF(data []byte) []byte {
if len(data) > 0 && data[len(data)-1] == '\n' {
return data[:len(data)-1]
}
return data
}
type blankLineSplitter struct {
terminator *string
}
func (s blankLineSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
// Skip newlines at beginning of data
i := 0
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
i++
}
if i >= len(data) {
// At end of data after newlines, skip entire data block
return i, nil, nil
}
start := i
// Try to find two consecutive newlines (or \n\r\n for Windows)
for ; i < len(data); i++ {
if data[i] != '\n' {
continue
}
end := i
if i+1 < len(data) && data[i+1] == '\n' {
i += 2
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
i++ // Skip newlines at end of record
}
*s.terminator = string(data[end:i])
return i, dropCR(data[start:end]), nil
}
if i+2 < len(data) && data[i+1] == '\r' && data[i+2] == '\n' {
i += 3
for i < len(data) && (data[i] == '\n' || data[i] == '\r') {
i++ // Skip newlines at end of record
}
*s.terminator = string(data[end:i])
return i, dropCR(data[start:end]), nil
}
}
// If we're at EOF, we have one final record; return it
if atEOF {
token = dropCR(dropLF(data[start:]))
*s.terminator = string(data[len(token):])
return len(data), token, nil
}
// Request more data
return 0, nil, nil
}
// Splitter that splits records on the given separator byte
type byteSplitter struct {
sep byte
}
func (s byteSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, s.sep); i >= 0 {
// We have a full sep-terminated record
return i + 1, data[:i], nil
}
// If at EOF, we have a final, non-terminated record; return it
if atEOF {
return len(data), data, nil
}
// Request more data
return 0, nil, nil
}
// Splitter that splits records on the given regular expression
type regexSplitter struct {
re *regexp.Regexp
terminator *string
}
func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
loc := s.re.FindIndex(data)
// Note: for a regex such as "()", loc[0]==loc[1]. Gawk behavior for this
// case is to match the entire input.
if loc != nil && loc[0] != loc[1] {
*s.terminator = string(data[loc[0]:loc[1]]) // set RT special variable
return loc[1], data[:loc[0]], nil
}
// If at EOF, we have a final, non-terminated record; return it
if atEOF {
*s.terminator = ""
return len(data), data, nil
}
// Request more data
return 0, nil, nil
}
// Setup for a new input file with given name (empty string if stdin)
func (p *interp) setFile(filename string) {
p.filename = numStr(filename)
p.fileLineNum = 0
}
// Setup for a new input line (but don't parse it into fields till we
// need to)
func (p *interp) setLine(line string, isTrueStr bool) {
p.line = line
p.lineIsTrueStr = isTrueStr
p.haveFields = false
}
// Ensure that the current line is parsed into fields, splitting it
// into fields if it hasn't been already
func (p *interp) ensureFields() {
if p.haveFields {
return
}
p.haveFields = true
switch {
case p.fieldSep == " ":
// FS space (default) means split fields on any whitespace
p.fields = strings.Fields(p.line)
case p.line == "":
p.fields = nil
case utf8.RuneCountInString(p.fieldSep) <= 1:
// 1-char FS is handled as plain split (not regex)
p.fields = strings.Split(p.line, p.fieldSep)
default:
// Split on FS as a regex
p.fields = p.fieldSepRegex.Split(p.line, -1)
}
// Special case for when RS=="" and FS is single character,
// split on newline in addition to FS. See more here:
// https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html
if p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 {
fields := make([]string, 0, len(p.fields))
for _, field := range p.fields {
lines := strings.Split(field, "\n")
for _, line := range lines {
trimmed := strings.TrimSuffix(line, "\r")
fields = append(fields, trimmed)
}
}
p.fields = fields
}
p.fieldsIsTrueStr = make([]bool, len(p.fields))
p.numFields = len(p.fields)
}
// Fetch next line (record) of input from current input file, opening
// next input file if done with previous one
func (p *interp) nextLine() (string, error) {
for {
if p.scanner == nil {
if prevInput, ok := p.input.(io.Closer); ok && p.input != p.stdin {
// Previous input is file, close it
_ = prevInput.Close()
}
if p.filenameIndex >= p.argc && !p.hadFiles {
// Moved past number of ARGV args and haven't seen
// any files yet, use stdin
p.input = p.stdin
p.setFile("")
p.hadFiles = true
} else {
if p.filenameIndex >= p.argc {
// Done with ARGV args, all done with input
return "", io.EOF
}
// Fetch next filename from ARGV. Can't use
// getArrayValue() here as it would set the value if
// not present
index := strconv.Itoa(p.filenameIndex)
argvIndex := p.program.Arrays["ARGV"]
argvArray := p.arrays[p.getArrayIndex(ScopeGlobal, argvIndex)]
filename := p.toString(argvArray[index])
p.filenameIndex++
// Is it actually a var=value assignment?
matches := varRegex.FindStringSubmatch(filename)
if len(matches) >= 3 {
// Yep, set variable to value and keep going
err := p.setVarByName(matches[1], matches[2])
if err != nil {
return "", err
}
continue
} else if filename == "" {
// ARGV arg is empty string, skip
p.input = nil
continue
} else if filename == "-" {
// ARGV arg is "-" meaning stdin
p.input = p.stdin
p.setFile("")
} else {
// A regular file name, open it
if p.noFileReads {
return "", newError("can't read from file due to NoFileReads")
}
input, err := os.Open(filename)
if err != nil {
return "", err
}
p.input = input
p.setFile(filename)
p.hadFiles = true
}
}
p.scanner = p.newScanner(p.input)
}
p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars
if p.scanner.Scan() {
// We scanned some input, break and return it
break
}
err := p.scanner.Err()
if err != nil {
return "", fmt.Errorf("error reading from input: %s", err)
}
// Signal loop to move onto next file
p.scanner = nil
}
// Got a line (record) of input, return it
p.lineNum++
p.fileLineNum++
return p.scanner.Text(), nil
}
// Write output string to given writer, producing correct line endings
// on Windows (CR LF).
func writeOutput(w io.Writer, s string) error {
if crlfNewline {
// First normalize to \n, then convert all newlines to \r\n
// (on Windows). NOTE: creating two new strings is almost
// certainly slow; would be better to create a custom Writer.
s = strings.Replace(s, "\r\n", "\n", -1)
s = strings.Replace(s, "\n", "\r\n", -1)
}
_, err := io.WriteString(w, s)
return err
}
// Close all streams, commands, and so on (after program execution).
func (p *interp) closeAll() {
if prevInput, ok := p.input.(io.Closer); ok {
_ = prevInput.Close()
}
for _, r := range p.inputStreams {
_ = r.Close()
}
for _, w := range p.outputStreams {
_ = w.Close()
}
for _, cmd := range p.commands {
_ = cmd.Wait()
}
if f, ok := p.output.(flusher); ok {
_ = f.Flush()
}
if f, ok := p.errorOutput.(flusher); ok {
_ = f.Flush()
}
}
// Flush all output streams as well as standard output. Report whether all
// streams were flushed successfully (logging error(s) if not).
func (p *interp) flushAll() bool {
allGood := true
for name, writer := range p.outputStreams {
allGood = allGood && p.flushWriter(name, writer)
}
if _, ok := p.output.(flusher); ok {
// User-provided output may or may not be flushable
allGood = allGood && p.flushWriter("stdout", p.output)
}
return allGood
}
// Flush a single, named output stream, and report whether it was flushed
// successfully (logging an error if not).
func (p *interp) flushStream(name string) bool {
writer := p.outputStreams[name]
if writer == nil {
p.printErrorf("error flushing %q: not an output file or pipe\n", name)
return false
}
return p.flushWriter(name, writer)
}
type flusher interface {
Flush() error
}
// Flush given output writer, and report whether it was flushed successfully
// (logging an error if not).
func (p *interp) flushWriter(name string, writer io.Writer) bool {
flusher, ok := writer.(flusher)
if !ok {
return true // not a flusher, don't error
}
err := flusher.Flush()
if err != nil {
p.printErrorf("error flushing %q: %v\n", name, err)
return false
}
return true
}
// Flush output and error streams.
func (p *interp) flushOutputAndError() {
if flusher, ok := p.output.(flusher); ok {
_ = flusher.Flush()
}
if flusher, ok := p.errorOutput.(flusher); ok {
_ = flusher.Flush()
}
}
// Print a message to the error output stream, flushing as necessary.
func (p *interp) printErrorf(format string, args ...interface{}) {
if flusher, ok := p.output.(flusher); ok {
_ = flusher.Flush() // ensure synchronization
}
fmt.Fprintf(p.errorOutput, format, args...)
if flusher, ok := p.errorOutput.(flusher); ok {
_ = flusher.Flush()
}
}

178
play/vendor/github.com/benhoyt/goawk/interp/value.go generated vendored Normal file
View File

@ -0,0 +1,178 @@
// GoAWK interpreter value type (not exported).
package interp
import (
"fmt"
"math"
"strconv"
"strings"
)
type valueType uint8
const (
typeNull valueType = iota
typeStr
typeNum
typeNumStr
)
// An AWK value (these are passed around by value)
type value struct {
typ valueType // Type of value
s string // String value (for typeStr and typeNumStr)
n float64 // Numeric value (for typeNum)
}
// Create a new null value
func null() value {
return value{}
}
// Create a new number value
func num(n float64) value {
return value{typ: typeNum, n: n}
}
// Create a new string value
func str(s string) value {
return value{typ: typeStr, s: s}
}
// Create a new value to represent a "numeric string" from an input field
func numStr(s string) value {
return value{typ: typeNumStr, s: s}
}
// Create a numeric value from a Go bool
func boolean(b bool) value {
if b {
return num(1)
}
return num(0)
}
// Return true if value is a "true string" (a string or a "numeric string"
// from an input field that can't be converted to a number). If false,
// also return the (possibly converted) number.
func (v value) isTrueStr() (float64, bool) {
switch v.typ {
case typeStr:
return 0, true
case typeNumStr:
f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64)
if err != nil {
return 0, true
}
return f, false
default: // typeNum, typeNull
return v.n, false
}
}
// Return Go bool value of AWK value. For numbers or numeric strings,
// zero is false and everything else is true. For strings, empty
// string is false and everything else is true.
func (v value) boolean() bool {
switch v.typ {
case typeStr:
return v.s != ""
case typeNumStr:
f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64)
if err != nil {
return v.s != ""
}
return f != 0
default: // typeNum, typeNull
return v.n != 0
}
}
// Return value's string value, or convert to a string using given
// format if a number value. Integers are a special case and don't
// use floatFormat.
func (v value) str(floatFormat string) string {
if v.typ == typeNum {
switch {
case math.IsNaN(v.n):
return "nan"
case math.IsInf(v.n, 0):
if v.n < 0 {
return "-inf"
} else {
return "inf"
}
case v.n == float64(int(v.n)):
return strconv.Itoa(int(v.n))
default:
return fmt.Sprintf(floatFormat, v.n)
}
}
// For typeStr and typeNumStr we already have the string, for
// typeNull v.s == "".
return v.s
}
// Return value's number value, converting from string if necessary
func (v value) num() float64 {
switch v.typ {
case typeStr, typeNumStr:
// Ensure string starts with a float and convert it
return parseFloatPrefix(v.s)
default: // typeNum, typeNull
return v.n
}
}
var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
// Like strconv.ParseFloat, but parses at the start of string and
// allows things like "1.5foo"
func parseFloatPrefix(s string) float64 {
// Skip whitespace at start
i := 0
for i < len(s) && asciiSpace[s[i]] != 0 {
i++
}
start := i
// Parse mantissa: optional sign, initial digit(s), optional '.',
// then more digits
gotDigit := false
if i < len(s) && (s[i] == '+' || s[i] == '-') {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
gotDigit = true
i++
}
if i < len(s) && s[i] == '.' {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
gotDigit = true
i++
}
if !gotDigit {
return 0
}
// Parse exponent ("1e" and similar are allowed, but ParseFloat
// rejects them)
end := i
if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
i++
if i < len(s) && (s[i] == '+' || s[i] == '-') {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
i++
end = i
}
}
floatStr := s[start:end]
f, _ := strconv.ParseFloat(floatStr, 64)
return f // Returns infinity in case of "value out of range" error
}

461
play/vendor/github.com/benhoyt/goawk/lexer/lexer.go generated vendored Normal file
View File

@ -0,0 +1,461 @@
// Package lexer is an AWK lexer (tokenizer).
//
// The lexer turns a string of AWK source code into a stream of
// tokens for parsing.
//
// To tokenize some source, create a new lexer with NewLexer(src) and
// then call Scan() until the token type is EOF or ILLEGAL.
//
package lexer
import (
"fmt"
)
// Lexer tokenizes a byte string of AWK source code. Use NewLexer to
// actually create a lexer, and Scan() or ScanRegex() to get tokens.
type Lexer struct {
src []byte
offset int
ch byte
pos Position
nextPos Position
hadSpace bool
lastTok Token
}
// Position stores the source line and column where a token starts.
type Position struct {
// Line number of the token (starts at 1).
Line int
// Column on the line (starts at 1). Note that this is the byte
// offset into the line, not rune offset.
Column int
}
// NewLexer creates a new lexer that will tokenize the given source
// code. See the module-level example for a working example.
func NewLexer(src []byte) *Lexer {
l := &Lexer{src: src}
l.nextPos.Line = 1
l.nextPos.Column = 1
l.next()
return l
}
// HadSpace returns true if the previously-scanned token had
// whitespace before it. Used by the parser because when calling a
// user-defined function the grammar doesn't allow a space between
// the function name and the left parenthesis.
func (l *Lexer) HadSpace() bool {
return l.hadSpace
}
// Scan scans the next token and returns its position (line/column),
// token value (one of the uppercase token constants), and the
// string value of the token. For most tokens, the token value is
// empty. For NAME, NUMBER, STRING, and REGEX tokens, it's the
// token's value. For an ILLEGAL token, it's the error message.
func (l *Lexer) Scan() (Position, Token, string) {
pos, tok, val := l.scan()
l.lastTok = tok
return pos, tok, val
}
// Does the real work of scanning. Scan() wraps this to more easily
// set lastTok.
func (l *Lexer) scan() (Position, Token, string) {
// Skip whitespace (except newline, which is a token)
l.hadSpace = false
for l.ch == ' ' || l.ch == '\t' || l.ch == '\r' || l.ch == '\\' {
l.hadSpace = true
if l.ch == '\\' {
l.next()
if l.ch == '\r' {
l.next()
}
if l.ch != '\n' {
return l.pos, ILLEGAL, "expected \\n after \\ line continuation"
}
}
l.next()
}
if l.ch == '#' {
// Skip comment till end of line
l.next()
for l.ch != '\n' && l.ch != 0 {
l.next()
}
}
if l.ch == 0 {
// l.next() reached end of input
return l.pos, EOF, ""
}
pos := l.pos
tok := ILLEGAL
val := ""
ch := l.ch
l.next()
// Names: keywords and functions
if isNameStart(ch) {
start := l.offset - 2
for isNameStart(l.ch) || isDigit(l.ch) {
l.next()
}
name := string(l.src[start : l.offset-1])
tok := KeywordToken(name)
if tok == ILLEGAL {
tok = NAME
val = name
}
return pos, tok, val
}
// These are ordered by my guess at frequency of use. Should run
// through a corpus of real AWK programs to determine actual
// frequency.
switch ch {
case '$':
tok = DOLLAR
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
// Avoid make/append and use l.offset directly for performance
start := l.offset - 2
gotDigit := false
if ch != '.' {
gotDigit = true
for isDigit(l.ch) {
l.next()
}
if l.ch == '.' {
l.next()
}
}
for isDigit(l.ch) {
gotDigit = true
l.next()
}
if !gotDigit {
return l.pos, ILLEGAL, "expected digits"
}
if l.ch == 'e' || l.ch == 'E' {
l.next()
gotSign := false
if l.ch == '+' || l.ch == '-' {
gotSign = true
l.next()
}
gotDigit = false
for isDigit(l.ch) {
l.next()
gotDigit = true
}
// Per awk/gawk, "1e" is allowed and parsed as "1 e" (with "e"
// considered a variable). "1e+" is parsed as "1e + ...".
if !gotDigit {
if gotSign {
l.unread() // unread the '+' or '-'
}
l.unread() // unread the 'e' or 'E'
}
}
tok = NUMBER
val = string(l.src[start : l.offset-1])
case '{':
tok = LBRACE
case '}':
tok = RBRACE
case '=':
tok = l.choice('=', ASSIGN, EQUALS)
case '<':
tok = l.choice('=', LESS, LTE)
case '>':
switch l.ch {
case '=':
l.next()
tok = GTE
case '>':
l.next()
tok = APPEND
default:
tok = GREATER
}
case '"', '\'':
// Note: POSIX awk spec doesn't allow single-quoted strings,
// but this helps without quoting, especially on Windows
// where the shell quote character is " (double quote).
chars := make([]byte, 0, 32) // most won't require heap allocation
for l.ch != ch {
c := l.ch
if c == 0 {
return l.pos, ILLEGAL, "didn't find end quote in string"
}
if c == '\r' || c == '\n' {
return l.pos, ILLEGAL, "can't have newline in string"
}
if c != '\\' {
// Normal, non-escaped character
chars = append(chars, c)
l.next()
continue
}
// Escape sequence, skip over \ and process
l.next()
switch l.ch {
case 'n':
c = '\n'
l.next()
case 't':
c = '\t'
l.next()
case 'r':
c = '\r'
l.next()
case 'a':
c = '\a'
l.next()
case 'b':
c = '\b'
l.next()
case 'f':
c = '\f'
l.next()
case 'v':
c = '\v'
l.next()
case 'x':
// Hex byte of one of two hex digits
l.next()
digit := hexDigit(l.ch)
if digit < 0 {
return l.pos, ILLEGAL, "1 or 2 hex digits expected"
}
c = byte(digit)
l.next()
digit = hexDigit(l.ch)
if digit >= 0 {
c = c*16 + byte(digit)
l.next()
}
case '0', '1', '2', '3', '4', '5', '6', '7':
// Octal byte of 1-3 octal digits
c = l.ch - '0'
l.next()
for i := 0; i < 2 && l.ch >= '0' && l.ch <= '7'; i++ {
c = c*8 + l.ch - '0'
l.next()
}
default:
// Any other escape character is just the char
// itself, eg: "\z" is just "z"
c = l.ch
l.next()
}
chars = append(chars, c)
}
l.next()
tok = STRING
val = string(chars)
case '(':
tok = LPAREN
case ')':
tok = RPAREN
case ',':
tok = COMMA
case ';':
tok = SEMICOLON
case '+':
switch l.ch {
case '+':
l.next()
tok = INCR
case '=':
l.next()
tok = ADD_ASSIGN
default:
tok = ADD
}
case '-':
switch l.ch {
case '-':
l.next()
tok = DECR
case '=':
l.next()
tok = SUB_ASSIGN
default:
tok = SUB
}
case '*':
switch l.ch {
case '*':
l.next()
tok = l.choice('=', POW, POW_ASSIGN)
case '=':
l.next()
tok = MUL_ASSIGN
default:
tok = MUL
}
case '/':
tok = l.choice('=', DIV, DIV_ASSIGN)
case '%':
tok = l.choice('=', MOD, MOD_ASSIGN)
case '[':
tok = LBRACKET
case ']':
tok = RBRACKET
case '\n':
tok = NEWLINE
case '^':
tok = l.choice('=', POW, POW_ASSIGN)
case '!':
switch l.ch {
case '=':
l.next()
tok = NOT_EQUALS
case '~':
l.next()
tok = NOT_MATCH
default:
tok = NOT
}
case '~':
tok = MATCH
case '?':
tok = QUESTION
case ':':
tok = COLON
case '&':
tok = l.choice('&', ILLEGAL, AND)
if tok == ILLEGAL {
return l.pos, ILLEGAL, "unexpected char after '&'"
}
case '|':
tok = l.choice('|', PIPE, OR)
default:
tok = ILLEGAL
val = "unexpected char"
}
return pos, tok, val
}
// ScanRegex parses an AWK regular expression in /slash/ syntax. The
// AWK grammar has somewhat special handling of regex tokens, so the
// parser can only call this after a DIV or DIV_ASSIGN token has just
// been scanned.
func (l *Lexer) ScanRegex() (Position, Token, string) {
pos, tok, val := l.scanRegex()
l.lastTok = tok
return pos, tok, val
}
// Does the real work of scanning a regex. ScanRegex() wraps this to
// more easily set lastTok.
func (l *Lexer) scanRegex() (Position, Token, string) {
pos := l.pos
chars := make([]byte, 0, 32) // most won't require heap allocation
switch l.lastTok {
case DIV:
// Regex after '/' (the usual case)
pos.Column -= 1
case DIV_ASSIGN:
// Regex after '/=' (happens when regex starts with '=')
pos.Column -= 2
chars = append(chars, '=')
default:
return l.pos, ILLEGAL, fmt.Sprintf("unexpected %s preceding regex", l.lastTok)
}
for l.ch != '/' {
c := l.ch
if c == 0 {
return l.pos, ILLEGAL, "didn't find end slash in regex"
}
if c == '\r' || c == '\n' {
return l.pos, ILLEGAL, "can't have newline in regex"
}
if c == '\\' {
l.next()
if l.ch != '/' {
chars = append(chars, '\\')
}
c = l.ch
}
chars = append(chars, c)
l.next()
}
l.next()
return pos, REGEX, string(chars)
}
// Load the next character into l.ch (or 0 on end of input) and update
// line and column position.
func (l *Lexer) next() {
l.pos = l.nextPos
if l.offset >= len(l.src) {
// For last character, move offset 1 past the end as it
// simplifies offset calculations in NAME and NUMBER
if l.ch != 0 {
l.ch = 0
l.offset++
l.nextPos.Column++
}
return
}
ch := l.src[l.offset]
if ch == '\n' {
l.nextPos.Line++
l.nextPos.Column = 1
} else if ch != '\r' {
l.nextPos.Column++
}
l.ch = ch
l.offset++
}
// Un-read the character just scanned (doesn't handle line boundaries).
func (l *Lexer) unread() {
l.offset--
l.pos.Column--
l.nextPos.Column--
l.ch = l.src[l.offset-1]
}
func isNameStart(ch byte) bool {
return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}
func isDigit(ch byte) bool {
return ch >= '0' && ch <= '9'
}
// Return the hex digit 0-15 corresponding to the given ASCII byte,
// or -1 if it's not a valid hex digit.
func hexDigit(ch byte) int {
switch {
case isDigit(ch):
return int(ch - '0')
case ch >= 'a' && ch <= 'f':
return int(ch - 'a' + 10)
case ch >= 'A' && ch <= 'F':
return int(ch - 'A' + 10)
default:
return -1
}
}
func (l *Lexer) choice(ch byte, one, two Token) Token {
if l.ch == ch {
l.next()
return two
}
return one
}
// PeekByte returns the next unscanned byte; used when parsing
// "getline lvalue" expressions. Returns 0 at end of input.
func (l *Lexer) PeekByte() byte {
return l.ch
}

261
play/vendor/github.com/benhoyt/goawk/lexer/token.go generated vendored Normal file
View File

@ -0,0 +1,261 @@
// Lexer tokens
package lexer
// Token is the type of a single token.
type Token int
const (
ILLEGAL Token = iota
EOF
NEWLINE
CONCAT // Not really a token, but used as an operator
// Symbols
ADD
ADD_ASSIGN
AND
APPEND
ASSIGN
COLON
COMMA
DECR
DIV
DIV_ASSIGN
DOLLAR
EQUALS
GTE
GREATER
INCR
LBRACE
LBRACKET
LESS
LPAREN
LTE
MATCH
MOD
MOD_ASSIGN
MUL
MUL_ASSIGN
NOT_MATCH
NOT
NOT_EQUALS
OR
PIPE
POW
POW_ASSIGN
QUESTION
RBRACE
RBRACKET
RPAREN
SEMICOLON
SUB
SUB_ASSIGN
// Keywords
BEGIN
BREAK
CONTINUE
DELETE
DO
ELSE
END
EXIT
FOR
FUNCTION
GETLINE
IF
IN
NEXT
PRINT
PRINTF
RETURN
WHILE
// Built-in functions
F_ATAN2
F_CLOSE
F_COS
F_EXP
F_FFLUSH
F_GSUB
F_INDEX
F_INT
F_LENGTH
F_LOG
F_MATCH
F_RAND
F_SIN
F_SPLIT
F_SPRINTF
F_SQRT
F_SRAND
F_SUB
F_SUBSTR
F_SYSTEM
F_TOLOWER
F_TOUPPER
// Literals and names (variables and arrays)
NAME
NUMBER
STRING
REGEX
LAST = REGEX
FIRST_FUNC = F_ATAN2
LAST_FUNC = F_TOUPPER
)
var keywordTokens = map[string]Token{
"BEGIN": BEGIN,
"break": BREAK,
"continue": CONTINUE,
"delete": DELETE,
"do": DO,
"else": ELSE,
"END": END,
"exit": EXIT,
"for": FOR,
"function": FUNCTION,
"getline": GETLINE,
"if": IF,
"in": IN,
"next": NEXT,
"print": PRINT,
"printf": PRINTF,
"return": RETURN,
"while": WHILE,
"atan2": F_ATAN2,
"close": F_CLOSE,
"cos": F_COS,
"exp": F_EXP,
"fflush": F_FFLUSH,
"gsub": F_GSUB,
"index": F_INDEX,
"int": F_INT,
"length": F_LENGTH,
"log": F_LOG,
"match": F_MATCH,
"rand": F_RAND,
"sin": F_SIN,
"split": F_SPLIT,
"sprintf": F_SPRINTF,
"sqrt": F_SQRT,
"srand": F_SRAND,
"sub": F_SUB,
"substr": F_SUBSTR,
"system": F_SYSTEM,
"tolower": F_TOLOWER,
"toupper": F_TOUPPER,
}
// KeywordToken returns the token associated with the given keyword
// string, or ILLEGAL if given name is not a keyword.
func KeywordToken(name string) Token {
return keywordTokens[name]
}
var tokenNames = map[Token]string{
ILLEGAL: "<illegal>",
EOF: "EOF",
NEWLINE: "<newline>",
CONCAT: "<concat>",
ADD: "+",
ADD_ASSIGN: "+=",
AND: "&&",
APPEND: ">>",
ASSIGN: "=",
COLON: ":",
COMMA: ",",
DECR: "--",
DIV: "/",
DIV_ASSIGN: "/=",
DOLLAR: "$",
EQUALS: "==",
GTE: ">=",
GREATER: ">",
INCR: "++",
LBRACE: "{",
LBRACKET: "[",
LESS: "<",
LPAREN: "(",
LTE: "<=",
MATCH: "~",
MOD: "%",
MOD_ASSIGN: "%=",
MUL: "*",
MUL_ASSIGN: "*=",
NOT_MATCH: "!~",
NOT: "!",
NOT_EQUALS: "!=",
OR: "||",
PIPE: "|",
POW: "^",
POW_ASSIGN: "^=",
QUESTION: "?",
RBRACE: "}",
RBRACKET: "]",
RPAREN: ")",
SEMICOLON: ";",
SUB: "-",
SUB_ASSIGN: "-=",
BEGIN: "BEGIN",
BREAK: "break",
CONTINUE: "continue",
DELETE: "delete",
DO: "do",
ELSE: "else",
END: "END",
EXIT: "exit",
FOR: "for",
FUNCTION: "function",
GETLINE: "getline",
IF: "if",
IN: "in",
NEXT: "next",
PRINT: "print",
PRINTF: "printf",
RETURN: "return",
WHILE: "while",
F_ATAN2: "atan2",
F_CLOSE: "close",
F_COS: "cos",
F_EXP: "exp",
F_FFLUSH: "fflush",
F_GSUB: "gsub",
F_INDEX: "index",
F_INT: "int",
F_LENGTH: "length",
F_LOG: "log",
F_MATCH: "match",
F_RAND: "rand",
F_SIN: "sin",
F_SPLIT: "split",
F_SPRINTF: "sprintf",
F_SQRT: "sqrt",
F_SRAND: "srand",
F_SUB: "sub",
F_SUBSTR: "substr",
F_SYSTEM: "system",
F_TOLOWER: "tolower",
F_TOUPPER: "toupper",
NAME: "name",
NUMBER: "number",
STRING: "string",
REGEX: "regex",
}
// String returns the string name of this token.
func (t Token) String() string {
return tokenNames[t]
}

1041
play/vendor/github.com/benhoyt/goawk/parser/parser.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

468
play/vendor/github.com/benhoyt/goawk/parser/resolve.go generated vendored Normal file
View File

@ -0,0 +1,468 @@
// Resolve function calls and variable types
package parser
import (
"fmt"
"reflect"
"sort"
. "github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
)
type varType int
const (
typeUnknown varType = iota
typeScalar
typeArray
)
func (t varType) String() string {
switch t {
case typeScalar:
return "Scalar"
case typeArray:
return "Array"
default:
return "Unknown"
}
}
// typeInfo records type information for a single variable
type typeInfo struct {
typ varType
ref *VarExpr
scope VarScope
index int
callName string
argIndex int
}
// Used by printVarTypes when debugTypes is turned on
func (t typeInfo) String() string {
var scope string
switch t.scope {
case ScopeGlobal:
scope = "Global"
case ScopeLocal:
scope = "Local"
default:
scope = "Special"
}
return fmt.Sprintf("typ=%s ref=%p scope=%s index=%d callName=%q argIndex=%d",
t.typ, t.ref, scope, t.index, t.callName, t.argIndex)
}
// A single variable reference (normally scalar)
type varRef struct {
funcName string
ref *VarExpr
isArg bool
pos Position
}
// A single array reference
type arrayRef struct {
funcName string
ref *ArrayExpr
pos Position
}
// Initialize the resolver
func (p *parser) initResolve() {
p.varTypes = make(map[string]map[string]typeInfo)
p.varTypes[""] = make(map[string]typeInfo) // globals
p.functions = make(map[string]int)
p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present
p.arrayRef("ENVIRON", Position{1, 1}) // and ENVIRON
p.multiExprs = make(map[*MultiExpr]Position, 3)
}
// Signal the start of a function
func (p *parser) startFunction(name string, params []string) {
p.funcName = name
p.varTypes[name] = make(map[string]typeInfo)
}
// Signal the end of a function
func (p *parser) stopFunction() {
p.funcName = ""
}
// Add function by name with given index
func (p *parser) addFunction(name string, index int) {
p.functions[name] = index
}
// Records a call to a user function (for resolving indexes later)
type userCall struct {
call *UserCallExpr
pos Position
inFunc string
}
// Record a user call site
func (p *parser) recordUserCall(call *UserCallExpr, pos Position) {
p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName})
}
// After parsing, resolve all user calls to their indexes. Also
// ensures functions called have actually been defined, and that
// they're not being called with too many arguments.
func (p *parser) resolveUserCalls(prog *Program) {
// Number the native funcs (order by name to get consistent order)
nativeNames := make([]string, 0, len(p.nativeFuncs))
for name := range p.nativeFuncs {
nativeNames = append(nativeNames, name)
}
sort.Strings(nativeNames)
nativeIndexes := make(map[string]int, len(nativeNames))
for i, name := range nativeNames {
nativeIndexes[name] = i
}
for _, c := range p.userCalls {
// AWK-defined functions take precedence over native Go funcs
index, ok := p.functions[c.call.Name]
if !ok {
f, haveNative := p.nativeFuncs[c.call.Name]
if !haveNative {
panic(p.posErrorf(c.pos, "undefined function %q", c.call.Name))
}
typ := reflect.TypeOf(f)
if !typ.IsVariadic() && len(c.call.Args) > typ.NumIn() {
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
}
c.call.Native = true
c.call.Index = nativeIndexes[c.call.Name]
continue
}
function := prog.Functions[index]
if len(c.call.Args) > len(function.Params) {
panic(p.posErrorf(c.pos, "%q called with more arguments than declared", c.call.Name))
}
c.call.Index = index
}
}
// For arguments that are variable references, we don't know the
// type based on context, so mark the types for these as unknown.
func (p *parser) processUserCallArg(funcName string, arg Expr, index int) {
if varExpr, ok := arg.(*VarExpr); ok {
scope, varFuncName := p.getScope(varExpr.Name)
ref := p.varTypes[varFuncName][varExpr.Name].ref
if ref == varExpr {
// Only applies if this is the first reference to this
// variable (otherwise we know the type already)
p.varTypes[varFuncName][varExpr.Name] = typeInfo{typeUnknown, ref, scope, 0, funcName, index}
}
// Mark the last related varRef (the most recent one) as a
// call argument for later error handling
p.varRefs[len(p.varRefs)-1].isArg = true
}
}
// Determine scope of given variable reference (and funcName if it's
// a local, otherwise empty string)
func (p *parser) getScope(name string) (VarScope, string) {
switch {
case p.locals[name]:
return ScopeLocal, p.funcName
case SpecialVarIndex(name) > 0:
return ScopeSpecial, ""
default:
return ScopeGlobal, ""
}
}
// Record a variable (scalar) reference and return the *VarExpr (but
// VarExpr.Index won't be set till later)
func (p *parser) varRef(name string, pos Position) *VarExpr {
scope, funcName := p.getScope(name)
expr := &VarExpr{scope, 0, name}
p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos})
info := p.varTypes[funcName][name]
if info.typ == typeUnknown {
p.varTypes[funcName][name] = typeInfo{typeScalar, expr, scope, 0, info.callName, 0}
}
return expr
}
// Record an array reference and return the *ArrayExpr (but
// ArrayExpr.Index won't be set till later)
func (p *parser) arrayRef(name string, pos Position) *ArrayExpr {
scope, funcName := p.getScope(name)
if scope == ScopeSpecial {
panic(p.errorf("can't use scalar %q as array", name))
}
expr := &ArrayExpr{scope, 0, name}
p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos})
info := p.varTypes[funcName][name]
if info.typ == typeUnknown {
p.varTypes[funcName][name] = typeInfo{typeArray, nil, scope, 0, info.callName, 0}
}
return expr
}
// Print variable type information (for debugging) on p.debugWriter
func (p *parser) printVarTypes(prog *Program) {
fmt.Fprintf(p.debugWriter, "scalars: %v\n", prog.Scalars)
fmt.Fprintf(p.debugWriter, "arrays: %v\n", prog.Arrays)
funcNames := []string{}
for funcName := range p.varTypes {
funcNames = append(funcNames, funcName)
}
sort.Strings(funcNames)
for _, funcName := range funcNames {
if funcName != "" {
fmt.Fprintf(p.debugWriter, "function %s\n", funcName)
} else {
fmt.Fprintf(p.debugWriter, "globals\n")
}
varNames := []string{}
for name := range p.varTypes[funcName] {
varNames = append(varNames, name)
}
sort.Strings(varNames)
for _, name := range varNames {
info := p.varTypes[funcName][name]
fmt.Fprintf(p.debugWriter, " %s: %s\n", name, info)
}
}
}
// If we can't finish resolving after this many iterations, give up
const maxResolveIterations = 10000
// Resolve unknown variables types and generate variable indexes and
// name-to-index mappings for interpreter
func (p *parser) resolveVars(prog *Program) {
// First go through all unknown types and try to determine the
// type from the parameter type in that function definition. May
// need multiple passes depending on the order of functions. This
// is not particularly efficient, but on realistic programs it's
// not an issue.
for i := 0; ; i++ {
progressed := false
for funcName, infos := range p.varTypes {
for name, info := range infos {
if info.scope == ScopeSpecial || info.typ != typeUnknown {
// It's a special var or type is already known
continue
}
funcIndex, ok := p.functions[info.callName]
if !ok {
// Function being called is a native function
continue
}
// Determine var type based on type of this parameter
// in the called function (if we know that)
paramName := prog.Functions[funcIndex].Params[info.argIndex]
typ := p.varTypes[info.callName][paramName].typ
if typ != typeUnknown {
if p.debugTypes {
fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n",
funcName, name, typ)
}
info.typ = typ
p.varTypes[funcName][name] = info
progressed = true
}
}
}
if !progressed {
// If we didn't progress we're done (or trying again is
// not going to help)
break
}
if i >= maxResolveIterations {
panic(p.errorf("too many iterations trying to resolve variable types"))
}
}
// Resolve global variables (iteration order is undefined, so
// assign indexes basically randomly)
prog.Scalars = make(map[string]int)
prog.Arrays = make(map[string]int)
for name, info := range p.varTypes[""] {
_, isFunc := p.functions[name]
if isFunc {
// Global var can't also be the name of a function
panic(p.errorf("global var %q can't also be a function", name))
}
var index int
if info.scope == ScopeSpecial {
index = SpecialVarIndex(name)
} else if info.typ == typeArray {
index = len(prog.Arrays)
prog.Arrays[name] = index
} else {
index = len(prog.Scalars)
prog.Scalars[name] = index
}
info.index = index
p.varTypes[""][name] = info
}
// Fill in unknown parameter types that are being called with arrays,
// for example, as in the following code:
//
// BEGIN { arr[0]; f(arr) }
// function f(a) { }
for _, c := range p.userCalls {
if c.call.Native {
continue
}
function := prog.Functions[c.call.Index]
for i, arg := range c.call.Args {
varExpr, ok := arg.(*VarExpr)
if !ok {
continue
}
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
argType := p.varTypes[funcName][varExpr.Name]
paramType := p.varTypes[function.Name][function.Params[i]]
if argType.typ == typeArray && paramType.typ == typeUnknown {
paramType.typ = argType.typ
p.varTypes[function.Name][function.Params[i]] = paramType
}
}
}
// Resolve local variables (assign indexes in order of params).
// Also patch up Function.Arrays (tells interpreter which args
// are arrays).
for funcName, infos := range p.varTypes {
if funcName == "" {
continue
}
scalarIndex := 0
arrayIndex := 0
functionIndex := p.functions[funcName]
function := prog.Functions[functionIndex]
arrays := make([]bool, len(function.Params))
for i, name := range function.Params {
info := infos[name]
var index int
if info.typ == typeArray {
index = arrayIndex
arrayIndex++
arrays[i] = true
} else {
// typeScalar or typeUnknown: variables may still be
// of unknown type if they've never been referenced --
// default to scalar in that case
index = scalarIndex
scalarIndex++
}
info.index = index
p.varTypes[funcName][name] = info
}
prog.Functions[functionIndex].Arrays = arrays
}
// Check that variables passed to functions are the correct type
for _, c := range p.userCalls {
// Check native function calls
if c.call.Native {
for _, arg := range c.call.Args {
varExpr, ok := arg.(*VarExpr)
if !ok {
// Non-variable expression, must be scalar
continue
}
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
info := p.varTypes[funcName][varExpr.Name]
if info.typ == typeArray {
panic(p.posErrorf(c.pos, "can't pass array %q to native function", varExpr.Name))
}
}
continue
}
// Check AWK function calls
function := prog.Functions[c.call.Index]
for i, arg := range c.call.Args {
varExpr, ok := arg.(*VarExpr)
if !ok {
if function.Arrays[i] {
panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg))
}
continue
}
funcName := p.getVarFuncName(prog, varExpr.Name, c.inFunc)
info := p.varTypes[funcName][varExpr.Name]
if info.typ == typeArray && !function.Arrays[i] {
panic(p.posErrorf(c.pos, "can't pass array %q as scalar param", varExpr.Name))
}
if info.typ != typeArray && function.Arrays[i] {
panic(p.posErrorf(c.pos, "can't pass scalar %q as array param", varExpr.Name))
}
}
}
if p.debugTypes {
p.printVarTypes(prog)
}
// Patch up variable indexes (interpreter uses an index instead
// of name for more efficient lookups)
for _, varRef := range p.varRefs {
info := p.varTypes[varRef.funcName][varRef.ref.Name]
if info.typ == typeArray && !varRef.isArg {
panic(p.posErrorf(varRef.pos, "can't use array %q as scalar", varRef.ref.Name))
}
varRef.ref.Index = info.index
}
for _, arrayRef := range p.arrayRefs {
info := p.varTypes[arrayRef.funcName][arrayRef.ref.Name]
if info.typ == typeScalar {
panic(p.posErrorf(arrayRef.pos, "can't use scalar %q as array", arrayRef.ref.Name))
}
arrayRef.ref.Index = info.index
}
}
// If name refers to a local (in function inFunc), return that
// function's name, otherwise return "" (meaning global).
func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string {
if inFunc == "" {
return ""
}
for _, param := range prog.Functions[p.functions[inFunc]].Params {
if name == param {
return inFunc
}
}
return ""
}
// Record a "multi expression" (comma-separated pseudo-expression
// used to allow commas around print/printf arguments).
func (p *parser) multiExpr(exprs []Expr, pos Position) Expr {
expr := &MultiExpr{exprs}
p.multiExprs[expr] = pos
return expr
}
// Mark the multi expression as used (by a print/printf statement).
func (p *parser) useMultiExpr(expr *MultiExpr) {
delete(p.multiExprs, expr)
}
// Check that there are no unused multi expressions (syntax error).
func (p *parser) checkMultiExprs() {
if len(p.multiExprs) == 0 {
return
}
// Show error on first comma-separated expression
min := Position{1000000000, 1000000000}
for _, pos := range p.multiExprs {
if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) {
min = pos
}
}
panic(p.posErrorf(min, "unexpected comma-separated expression"))
}

6
play/vendor/modules.txt vendored Normal file
View File

@ -0,0 +1,6 @@
# github.com/benhoyt/goawk v1.13.0
## explicit; go 1.13
github.com/benhoyt/goawk/internal/ast
github.com/benhoyt/goawk/interp
github.com/benhoyt/goawk/lexer
github.com/benhoyt/goawk/parser