grawkit/play/vendor/github.com/benhoyt/goawk/interp/interp.go

1370 lines
33 KiB
Go

// Package interp is the GoAWK interpreter (a simple tree-walker).
//
// For basic usage, use the Exec function. For more complicated use
// cases and configuration options, first use the parser package to
// parse the AWK source, and then use ExecProgram to execute it with
// a specific configuration.
//
package interp
import (
"bufio"
"errors"
"fmt"
"io"
"io/ioutil"
"math"
"math/rand"
"os"
"os/exec"
"regexp"
"runtime"
"strconv"
"strings"
"unicode/utf8"
. "github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
. "github.com/benhoyt/goawk/parser"
)
var (
errExit = errors.New("exit")
errBreak = errors.New("break")
errContinue = errors.New("continue")
errNext = errors.New("next")
crlfNewline = runtime.GOOS == "windows"
varRegex = regexp.MustCompile(`^([_a-zA-Z][_a-zA-Z0-9]*)=(.*)`)
)
// Error (actually *Error) is returned by Exec and Eval functions on
// interpreter error, for example a negative field index.
type Error struct {
message string
}
func (e *Error) Error() string {
return e.message
}
func newError(format string, args ...interface{}) error {
return &Error{fmt.Sprintf(format, args...)}
}
type returnValue struct {
Value value
}
func (r returnValue) Error() string {
return "<return " + r.Value.str("%.6g") + ">"
}
type interp struct {
// Input/output
output io.Writer
errorOutput io.Writer
scanner *bufio.Scanner
scanners map[string]*bufio.Scanner
stdin io.Reader
filenameIndex int
hadFiles bool
input io.Reader
inputStreams map[string]io.ReadCloser
outputStreams map[string]io.WriteCloser
commands map[string]*exec.Cmd
noExec bool
noFileWrites bool
noFileReads bool
shellCommand []string
// Scalars, arrays, and function state
globals []value
stack []value
frame []value
arrays []map[string]value
localArrays [][]int
callDepth int
nativeFuncs []nativeFunc
// File, line, and field handling
filename value
line string
lineIsTrueStr bool
lineNum int
fileLineNum int
fields []string
fieldsIsTrueStr []bool
numFields int
haveFields bool
// Built-in variables
argc int
convertFormat string
outputFormat string
fieldSep string
fieldSepRegex *regexp.Regexp
recordSep string
recordSepRegex *regexp.Regexp
recordTerminator string
outputFieldSep string
outputRecordSep string
subscriptSep string
matchLength int
matchStart int
// Misc pieces of state
program *Program
random *rand.Rand
randSeed float64
exitStatus int
regexCache map[string]*regexp.Regexp
formatCache map[string]cachedFormat
bytes bool
}
// Various const configuration. Could make these part of Config if
// we wanted to, but no need for now.
const (
maxCachedRegexes = 100
maxCachedFormats = 100
maxRecordLength = 10 * 1024 * 1024 // 10MB seems like plenty
maxFieldIndex = 1000000
maxCallDepth = 1000
initialStackSize = 100
outputBufSize = 64 * 1024
inputBufSize = 64 * 1024
)
// Config defines the interpreter configuration for ExecProgram.
type Config struct {
// Standard input reader (defaults to os.Stdin)
Stdin io.Reader
// Writer for normal output (defaults to a buffered version of
// os.Stdout)
Output io.Writer
// Writer for non-fatal error messages (defaults to os.Stderr)
Error io.Writer
// The name of the executable (accessible via ARGV[0])
Argv0 string
// Input arguments (usually filenames): empty slice means read
// only from Stdin, and a filename of "-" means read from Stdin
// instead of a real file.
Args []string
// List of name-value pairs for variables to set before executing
// the program (useful for setting FS and other built-in
// variables, for example []string{"FS", ",", "OFS", ","}).
Vars []string
// Map of named Go functions to allow calling from AWK. You need
// to pass this same map to the parser.ParseProgram config.
//
// Functions can have any number of parameters, and variadic
// functions are supported. Functions can have no return values,
// one return value, or two return values (result, error). In the
// two-value case, if the function returns a non-nil error,
// program execution will stop and ExecProgram will return that
// error.
//
// Apart from the error return value, the types supported are
// bool, integer and floating point types (excluding complex),
// and string types (string or []byte).
//
// It's not an error to call a Go function from AWK with fewer
// arguments than it has parameters in Go. In this case, the zero
// value will be used for any additional parameters. However, it
// is a parse error to call a non-variadic function from AWK with
// more arguments than it has parameters in Go.
//
// Functions defined with the "function" keyword in AWK code
// take precedence over functions in Funcs.
Funcs map[string]interface{}
// Set one or more of these to true to prevent unsafe behaviours,
// useful when executing untrusted scripts:
//
// * NoExec prevents system calls via system() or pipe operator
// * NoFileWrites prevents writing to files via '>' or '>>'
// * NoFileReads prevents reading from files via getline or the
// filenames in Args
NoExec bool
NoFileWrites bool
NoFileReads bool
// Exec args used to run system shell. Typically, this will
// be {"/bin/sh", "-c"}
ShellCommand []string
// List of name-value pairs to be assigned to the ENVIRON special
// array, for example []string{"USER", "bob", "HOME", "/home/bob"}.
// If nil (the default), values from os.Environ() are used.
Environ []string
// Set to true to use byte indexes instead of character indexes for
// the index, length, match, and substr functions. Note: the default
// was changed from bytes to characters in GoAWK version 1.11.
Bytes bool
}
// ExecProgram executes the parsed program using the given interpreter
// config, returning the exit status code of the program. Error is nil
// on successful execution of the program, even if the program returns
// a non-zero status code.
func ExecProgram(program *Program, config *Config) (int, error) {
if len(config.Vars)%2 != 0 {
return 0, newError("length of config.Vars must be a multiple of 2, not %d", len(config.Vars))
}
if len(config.Environ)%2 != 0 {
return 0, newError("length of config.Environ must be a multiple of 2, not %d", len(config.Environ))
}
p := &interp{program: program}
// Allocate memory for variables
p.globals = make([]value, len(program.Scalars))
p.stack = make([]value, 0, initialStackSize)
p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize)
for i := 0; i < len(program.Arrays); i++ {
p.arrays[i] = make(map[string]value)
}
// Initialize defaults
p.regexCache = make(map[string]*regexp.Regexp, 10)
p.formatCache = make(map[string]cachedFormat, 10)
p.randSeed = 1.0
seed := math.Float64bits(p.randSeed)
p.random = rand.New(rand.NewSource(int64(seed)))
p.convertFormat = "%.6g"
p.outputFormat = "%.6g"
p.fieldSep = " "
p.recordSep = "\n"
p.outputFieldSep = " "
p.outputRecordSep = "\n"
p.subscriptSep = "\x1c"
p.noExec = config.NoExec
p.noFileWrites = config.NoFileWrites
p.noFileReads = config.NoFileReads
p.bytes = config.Bytes
err := p.initNativeFuncs(config.Funcs)
if err != nil {
return 0, err
}
// Setup ARGV and other variables from config
argvIndex := program.Arrays["ARGV"]
p.setArrayValue(ScopeGlobal, argvIndex, "0", str(config.Argv0))
p.argc = len(config.Args) + 1
for i, arg := range config.Args {
p.setArrayValue(ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg))
}
p.filenameIndex = 1
p.hadFiles = false
for i := 0; i < len(config.Vars); i += 2 {
err := p.setVarByName(config.Vars[i], config.Vars[i+1])
if err != nil {
return 0, err
}
}
// Setup ENVIRON from config or environment variables
environIndex := program.Arrays["ENVIRON"]
if config.Environ != nil {
for i := 0; i < len(config.Environ); i += 2 {
p.setArrayValue(ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1]))
}
} else {
for _, kv := range os.Environ() {
eq := strings.IndexByte(kv, '=')
if eq >= 0 {
p.setArrayValue(ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:]))
}
}
}
// Setup system shell command
if len(config.ShellCommand) != 0 {
p.shellCommand = config.ShellCommand
} else {
executable := "/bin/sh"
if runtime.GOOS == "windows" {
executable = "sh"
}
p.shellCommand = []string{executable, "-c"}
}
// Setup I/O structures
p.stdin = config.Stdin
if p.stdin == nil {
p.stdin = os.Stdin
}
p.output = config.Output
if p.output == nil {
p.output = bufio.NewWriterSize(os.Stdout, outputBufSize)
}
p.errorOutput = config.Error
if p.errorOutput == nil {
p.errorOutput = os.Stderr
}
p.inputStreams = make(map[string]io.ReadCloser)
p.outputStreams = make(map[string]io.WriteCloser)
p.commands = make(map[string]*exec.Cmd)
p.scanners = make(map[string]*bufio.Scanner)
defer p.closeAll()
// Execute the program! BEGIN, then pattern/actions, then END
err = p.execBeginEnd(program.Begin)
if err != nil && err != errExit {
return 0, err
}
if program.Actions == nil && program.End == nil {
return p.exitStatus, nil
}
if err != errExit {
err = p.execActions(program.Actions)
if err != nil && err != errExit {
return 0, err
}
}
err = p.execBeginEnd(program.End)
if err != nil && err != errExit {
return 0, err
}
return p.exitStatus, nil
}
// Exec provides a simple way to parse and execute an AWK program
// with the given field separator. Exec reads input from the given
// reader (nil means use os.Stdin) and writes output to stdout (nil
// means use a buffered version of os.Stdout).
func Exec(source, fieldSep string, input io.Reader, output io.Writer) error {
prog, err := ParseProgram([]byte(source), nil)
if err != nil {
return err
}
config := &Config{
Stdin: input,
Output: output,
Error: ioutil.Discard,
Vars: []string{"FS", fieldSep},
}
_, err = ExecProgram(prog, config)
return err
}
// Execute BEGIN or END blocks (may be multiple)
func (p *interp) execBeginEnd(beginEnd []Stmts) error {
for _, statements := range beginEnd {
err := p.executes(statements)
if err != nil {
return err
}
}
return nil
}
// Execute pattern-action blocks (may be multiple)
func (p *interp) execActions(actions []Action) error {
inRange := make([]bool, len(actions))
lineLoop:
for {
// Read and setup next line of input
line, err := p.nextLine()
if err == io.EOF {
break
}
if err != nil {
return err
}
p.setLine(line, false)
// Execute all the pattern-action blocks for each line
for i, action := range actions {
// First determine whether the pattern matches
matched := false
switch len(action.Pattern) {
case 0:
// No pattern is equivalent to pattern evaluating to true
matched = true
case 1:
// Single boolean pattern
v, err := p.eval(action.Pattern[0])
if err != nil {
return err
}
matched = v.boolean()
case 2:
// Range pattern (matches between start and stop lines)
if !inRange[i] {
v, err := p.eval(action.Pattern[0])
if err != nil {
return err
}
inRange[i] = v.boolean()
}
matched = inRange[i]
if inRange[i] {
v, err := p.eval(action.Pattern[1])
if err != nil {
return err
}
inRange[i] = !v.boolean()
}
}
if !matched {
continue
}
// No action is equivalent to { print $0 }
if action.Stmts == nil {
err := p.printLine(p.output, p.line)
if err != nil {
return err
}
continue
}
// Execute the body statements
err := p.executes(action.Stmts)
if err == errNext {
// "next" statement skips straight to next line
continue lineLoop
}
if err != nil {
return err
}
}
}
return nil
}
// Execute a block of multiple statements
func (p *interp) executes(stmts Stmts) error {
for _, s := range stmts {
err := p.execute(s)
if err != nil {
return err
}
}
return nil
}
// Execute a single statement
func (p *interp) execute(stmt Stmt) error {
switch s := stmt.(type) {
case *ExprStmt:
// Expression statement: simply throw away the expression value
_, err := p.eval(s.Expr)
return err
case *PrintStmt:
// Print OFS-separated args followed by ORS (usually newline)
var line string
if len(s.Args) > 0 {
strs := make([]string, len(s.Args))
for i, a := range s.Args {
v, err := p.eval(a)
if err != nil {
return err
}
strs[i] = v.str(p.outputFormat)
}
line = strings.Join(strs, p.outputFieldSep)
} else {
// "print" with no args is equivalent to "print $0"
line = p.line
}
output, err := p.getOutputStream(s.Redirect, s.Dest)
if err != nil {
return err
}
return p.printLine(output, line)
case *PrintfStmt:
// printf(fmt, arg1, arg2, ...): uses our version of sprintf
// to build the formatted string and then print that
formatValue, err := p.eval(s.Args[0])
if err != nil {
return err
}
format := p.toString(formatValue)
args := make([]value, len(s.Args)-1)
for i, a := range s.Args[1:] {
args[i], err = p.eval(a)
if err != nil {
return err
}
}
output, err := p.getOutputStream(s.Redirect, s.Dest)
if err != nil {
return err
}
str, err := p.sprintf(format, args)
if err != nil {
return err
}
err = writeOutput(output, str)
if err != nil {
return err
}
case *IfStmt:
v, err := p.eval(s.Cond)
if err != nil {
return err
}
if v.boolean() {
return p.executes(s.Body)
} else {
// Doesn't do anything if s.Else is nil
return p.executes(s.Else)
}
case *ForStmt:
// C-like for loop with pre-statement, cond, and post-statement
if s.Pre != nil {
err := p.execute(s.Pre)
if err != nil {
return err
}
}
for {
if s.Cond != nil {
v, err := p.eval(s.Cond)
if err != nil {
return err
}
if !v.boolean() {
break
}
}
err := p.executes(s.Body)
if err == errBreak {
break
}
if err != nil && err != errContinue {
return err
}
if s.Post != nil {
err := p.execute(s.Post)
if err != nil {
return err
}
}
}
case *ForInStmt:
// Foreach-style "for (key in array)" loop
array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)]
for index := range array {
err := p.setVar(s.Var.Scope, s.Var.Index, str(index))
if err != nil {
return err
}
err = p.executes(s.Body)
if err == errBreak {
break
}
if err == errContinue {
continue
}
if err != nil {
return err
}
}
case *ReturnStmt:
// Return statement uses special error value which is "caught"
// by the callUser function
var v value
if s.Value != nil {
var err error
v, err = p.eval(s.Value)
if err != nil {
return err
}
}
return returnValue{v}
case *WhileStmt:
// Simple "while (cond)" loop
for {
v, err := p.eval(s.Cond)
if err != nil {
return err
}
if !v.boolean() {
break
}
err = p.executes(s.Body)
if err == errBreak {
break
}
if err == errContinue {
continue
}
if err != nil {
return err
}
}
case *DoWhileStmt:
// Do-while loop (tests condition after executing body)
for {
err := p.executes(s.Body)
if err == errBreak {
break
}
if err == errContinue {
continue
}
if err != nil {
return err
}
v, err := p.eval(s.Cond)
if err != nil {
return err
}
if !v.boolean() {
break
}
}
// Break, continue, next, and exit statements
case *BreakStmt:
return errBreak
case *ContinueStmt:
return errContinue
case *NextStmt:
return errNext
case *ExitStmt:
if s.Status != nil {
status, err := p.eval(s.Status)
if err != nil {
return err
}
p.exitStatus = int(status.num())
}
// Return special errExit value "caught" by top-level executor
return errExit
case *DeleteStmt:
if len(s.Index) > 0 {
// Delete single key from array
index, err := p.evalIndex(s.Index)
if err != nil {
return err
}
array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)]
delete(array, index) // Does nothing if key isn't present
} else {
// Delete entire array
array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)]
for k := range array {
delete(array, k)
}
}
case *BlockStmt:
// Nested block (just syntax, doesn't do anything)
return p.executes(s.Body)
default:
// Should never happen
panic(fmt.Sprintf("unexpected stmt type: %T", stmt))
}
return nil
}
// Evaluate a single expression, return expression value and error
func (p *interp) eval(expr Expr) (value, error) {
switch e := expr.(type) {
case *NumExpr:
// Number literal
return num(e.Value), nil
case *StrExpr:
// String literal
return str(e.Value), nil
case *FieldExpr:
// $n field expression
index, err := p.eval(e.Index)
if err != nil {
return null(), err
}
return p.getField(int(index.num()))
case *VarExpr:
// Variable read expression (scope is global, local, or special)
return p.getVar(e.Scope, e.Index), nil
case *RegExpr:
// Stand-alone /regex/ is equivalent to: $0 ~ /regex/
re, err := p.compileRegex(e.Regex)
if err != nil {
return null(), err
}
return boolean(re.MatchString(p.line)), nil
case *BinaryExpr:
// Binary expression. Note that && and || are special cases
// as they're short-circuit operators.
left, err := p.eval(e.Left)
if err != nil {
return null(), err
}
switch e.Op {
case AND:
if !left.boolean() {
return num(0), nil
}
right, err := p.eval(e.Right)
if err != nil {
return null(), err
}
return boolean(right.boolean()), nil
case OR:
if left.boolean() {
return num(1), nil
}
right, err := p.eval(e.Right)
if err != nil {
return null(), err
}
return boolean(right.boolean()), nil
default:
right, err := p.eval(e.Right)
if err != nil {
return null(), err
}
return p.evalBinary(e.Op, left, right)
}
case *IncrExpr:
// Pre-increment, post-increment, pre-decrement, post-decrement
// First evaluate the expression, but remember array or field
// index, so we don't evaluate part of the expression twice
exprValue, arrayIndex, fieldIndex, err := p.evalForAugAssign(e.Expr)
if err != nil {
return null(), err
}
// Then convert to number and increment or decrement
exprNum := exprValue.num()
var incr float64
if e.Op == INCR {
incr = exprNum + 1
} else {
incr = exprNum - 1
}
incrValue := num(incr)
// Finally, assign back to expression and return the correct value
err = p.assignAug(e.Expr, arrayIndex, fieldIndex, incrValue)
if err != nil {
return null(), err
}
if e.Pre {
return incrValue, nil
} else {
return num(exprNum), nil
}
case *AssignExpr:
// Assignment expression (returns right-hand side)
right, err := p.eval(e.Right)
if err != nil {
return null(), err
}
err = p.assign(e.Left, right)
if err != nil {
return null(), err
}
return right, nil
case *AugAssignExpr:
// Augmented assignment like += (returns right-hand side)
right, err := p.eval(e.Right)
if err != nil {
return null(), err
}
left, arrayIndex, fieldIndex, err := p.evalForAugAssign(e.Left)
if err != nil {
return null(), err
}
right, err = p.evalBinary(e.Op, left, right)
if err != nil {
return null(), err
}
err = p.assignAug(e.Left, arrayIndex, fieldIndex, right)
if err != nil {
return null(), err
}
return right, nil
case *CondExpr:
// C-like ?: ternary conditional operator
cond, err := p.eval(e.Cond)
if err != nil {
return null(), err
}
if cond.boolean() {
return p.eval(e.True)
} else {
return p.eval(e.False)
}
case *IndexExpr:
// Read value from array by index
index, err := p.evalIndex(e.Index)
if err != nil {
return null(), err
}
return p.getArrayValue(e.Array.Scope, e.Array.Index, index), nil
case *CallExpr:
// Call a builtin function
return p.callBuiltin(e.Func, e.Args)
case *UnaryExpr:
// Unary ! or + or -
v, err := p.eval(e.Value)
if err != nil {
return null(), err
}
return p.evalUnary(e.Op, v), nil
case *InExpr:
// "key in array" expression
index, err := p.evalIndex(e.Index)
if err != nil {
return null(), err
}
array := p.arrays[p.getArrayIndex(e.Array.Scope, e.Array.Index)]
_, ok := array[index]
return boolean(ok), nil
case *UserCallExpr:
// Call user-defined or native Go function
if e.Native {
return p.callNative(e.Index, e.Args)
} else {
return p.callUser(e.Index, e.Args)
}
case *GetlineExpr:
// Getline: read line from input
var line string
switch {
case e.Command != nil:
nameValue, err := p.eval(e.Command)
if err != nil {
return null(), err
}
name := p.toString(nameValue)
scanner, err := p.getInputScannerPipe(name)
if err != nil {
return null(), err
}
if !scanner.Scan() {
if err := scanner.Err(); err != nil {
return num(-1), nil
}
return num(0), nil
}
line = scanner.Text()
case e.File != nil:
nameValue, err := p.eval(e.File)
if err != nil {
return null(), err
}
name := p.toString(nameValue)
scanner, err := p.getInputScannerFile(name)
if err != nil {
if _, ok := err.(*os.PathError); ok {
// File not found is not a hard error, getline just returns -1.
// See: https://github.com/benhoyt/goawk/issues/41
return num(-1), nil
}
return null(), err
}
if !scanner.Scan() {
if err := scanner.Err(); err != nil {
return num(-1), nil
}
return num(0), nil
}
line = scanner.Text()
default:
p.flushOutputAndError() // Flush output in case they've written a prompt
var err error
line, err = p.nextLine()
if err == io.EOF {
return num(0), nil
}
if err != nil {
return num(-1), nil
}
}
if e.Target != nil {
err := p.assign(e.Target, numStr(line))
if err != nil {
return null(), err
}
} else {
p.setLine(line, false)
}
return num(1), nil
default:
// Should never happen
panic(fmt.Sprintf("unexpected expr type: %T", expr))
}
}
func (p *interp) evalForAugAssign(expr Expr) (v value, arrayIndex string, fieldIndex int, err error) {
switch expr := expr.(type) {
case *VarExpr:
v = p.getVar(expr.Scope, expr.Index)
case *IndexExpr:
arrayIndex, err = p.evalIndex(expr.Index)
if err != nil {
return null(), "", 0, err
}
v = p.getArrayValue(expr.Array.Scope, expr.Array.Index, arrayIndex)
case *FieldExpr:
index, err := p.eval(expr.Index)
if err != nil {
return null(), "", 0, err
}
fieldIndex = int(index.num())
v, err = p.getField(fieldIndex)
if err != nil {
return null(), "", 0, err
}
}
return v, arrayIndex, fieldIndex, nil
}
func (p *interp) assignAug(expr Expr, arrayIndex string, fieldIndex int, v value) error {
switch expr := expr.(type) {
case *VarExpr:
return p.setVar(expr.Scope, expr.Index, v)
case *IndexExpr:
p.setArrayValue(expr.Array.Scope, expr.Array.Index, arrayIndex, v)
default: // *FieldExpr
return p.setField(fieldIndex, p.toString(v))
}
return nil
}
// Get a variable's value by index in given scope
func (p *interp) getVar(scope VarScope, index int) value {
switch scope {
case ScopeGlobal:
return p.globals[index]
case ScopeLocal:
return p.frame[index]
default: // ScopeSpecial
switch index {
case V_NF:
p.ensureFields()
return num(float64(p.numFields))
case V_NR:
return num(float64(p.lineNum))
case V_RLENGTH:
return num(float64(p.matchLength))
case V_RSTART:
return num(float64(p.matchStart))
case V_FNR:
return num(float64(p.fileLineNum))
case V_ARGC:
return num(float64(p.argc))
case V_CONVFMT:
return str(p.convertFormat)
case V_FILENAME:
return p.filename
case V_FS:
return str(p.fieldSep)
case V_OFMT:
return str(p.outputFormat)
case V_OFS:
return str(p.outputFieldSep)
case V_ORS:
return str(p.outputRecordSep)
case V_RS:
return str(p.recordSep)
case V_RT:
return str(p.recordTerminator)
case V_SUBSEP:
return str(p.subscriptSep)
default:
panic(fmt.Sprintf("unexpected special variable index: %d", index))
}
}
}
// Set a variable by name (specials and globals only)
func (p *interp) setVarByName(name, value string) error {
index := SpecialVarIndex(name)
if index > 0 {
return p.setVar(ScopeSpecial, index, numStr(value))
}
index, ok := p.program.Scalars[name]
if ok {
return p.setVar(ScopeGlobal, index, numStr(value))
}
// Ignore variables that aren't defined in program
return nil
}
// Set a variable by index in given scope to given value
func (p *interp) setVar(scope VarScope, index int, v value) error {
switch scope {
case ScopeGlobal:
p.globals[index] = v
return nil
case ScopeLocal:
p.frame[index] = v
return nil
default: // ScopeSpecial
switch index {
case V_NF:
numFields := int(v.num())
if numFields < 0 {
return newError("NF set to negative value: %d", numFields)
}
if numFields > maxFieldIndex {
return newError("NF set too large: %d", numFields)
}
p.ensureFields()
p.numFields = numFields
if p.numFields < len(p.fields) {
p.fields = p.fields[:p.numFields]
p.fieldsIsTrueStr = p.fieldsIsTrueStr[:p.numFields]
}
for i := len(p.fields); i < p.numFields; i++ {
p.fields = append(p.fields, "")
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false)
}
p.line = strings.Join(p.fields, p.outputFieldSep)
p.lineIsTrueStr = true
case V_NR:
p.lineNum = int(v.num())
case V_RLENGTH:
p.matchLength = int(v.num())
case V_RSTART:
p.matchStart = int(v.num())
case V_FNR:
p.fileLineNum = int(v.num())
case V_ARGC:
p.argc = int(v.num())
case V_CONVFMT:
p.convertFormat = p.toString(v)
case V_FILENAME:
p.filename = v
case V_FS:
p.fieldSep = p.toString(v)
if utf8.RuneCountInString(p.fieldSep) > 1 { // compare to interp.ensureFields
re, err := regexp.Compile(p.fieldSep)
if err != nil {
return newError("invalid regex %q: %s", p.fieldSep, err)
}
p.fieldSepRegex = re
}
case V_OFMT:
p.outputFormat = p.toString(v)
case V_OFS:
p.outputFieldSep = p.toString(v)
case V_ORS:
p.outputRecordSep = p.toString(v)
case V_RS:
p.recordSep = p.toString(v)
switch { // compare to interp.newScanner
case len(p.recordSep) <= 1:
// Simple cases use specialized splitters, not regex
case utf8.RuneCountInString(p.recordSep) == 1:
// Multi-byte unicode char falls back to regex splitter
sep := regexp.QuoteMeta(p.recordSep) // not strictly necessary as no multi-byte chars are regex meta chars
p.recordSepRegex = regexp.MustCompile(sep)
default:
re, err := regexp.Compile(p.recordSep)
if err != nil {
return newError("invalid regex %q: %s", p.recordSep, err)
}
p.recordSepRegex = re
}
case V_RT:
p.recordTerminator = p.toString(v)
case V_SUBSEP:
p.subscriptSep = p.toString(v)
default:
panic(fmt.Sprintf("unexpected special variable index: %d", index))
}
return nil
}
}
// Determine the index of given array into the p.arrays slice. Global
// arrays are just at p.arrays[index], local arrays have to be looked
// up indirectly.
func (p *interp) getArrayIndex(scope VarScope, index int) int {
if scope == ScopeGlobal {
return index
} else {
return p.localArrays[len(p.localArrays)-1][index]
}
}
// Get a value from given array by key (index)
func (p *interp) getArrayValue(scope VarScope, arrayIndex int, index string) value {
resolved := p.getArrayIndex(scope, arrayIndex)
array := p.arrays[resolved]
v, ok := array[index]
if !ok {
// Strangely, per the POSIX spec, "Any other reference to a
// nonexistent array element [apart from "in" expressions]
// shall automatically create it."
array[index] = v
}
return v
}
// Set a value in given array by key (index)
func (p *interp) setArrayValue(scope VarScope, arrayIndex int, index string, v value) {
resolved := p.getArrayIndex(scope, arrayIndex)
p.arrays[resolved][index] = v
}
// Get the value of given numbered field, equivalent to "$index"
func (p *interp) getField(index int) (value, error) {
if index < 0 {
return null(), newError("field index negative: %d", index)
}
if index == 0 {
if p.lineIsTrueStr {
return str(p.line), nil
} else {
return numStr(p.line), nil
}
}
p.ensureFields()
if index > len(p.fields) {
return str(""), nil
}
if p.fieldsIsTrueStr[index-1] {
return str(p.fields[index-1]), nil
} else {
return numStr(p.fields[index-1]), nil
}
}
// Sets a single field, equivalent to "$index = value"
func (p *interp) setField(index int, value string) error {
if index == 0 {
p.setLine(value, true)
return nil
}
if index < 0 {
return newError("field index negative: %d", index)
}
if index > maxFieldIndex {
return newError("field index too large: %d", index)
}
// If there aren't enough fields, add empty string fields in between
p.ensureFields()
for i := len(p.fields); i < index; i++ {
p.fields = append(p.fields, "")
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, true)
}
p.fields[index-1] = value
p.fieldsIsTrueStr[index-1] = true
p.numFields = len(p.fields)
p.line = strings.Join(p.fields, p.outputFieldSep)
p.lineIsTrueStr = true
return nil
}
// Convert value to string using current CONVFMT
func (p *interp) toString(v value) string {
return v.str(p.convertFormat)
}
// Compile regex string (or fetch from regex cache)
func (p *interp) compileRegex(regex string) (*regexp.Regexp, error) {
if re, ok := p.regexCache[regex]; ok {
return re, nil
}
re, err := regexp.Compile(regex)
if err != nil {
return nil, newError("invalid regex %q: %s", regex, err)
}
// Dumb, non-LRU cache: just cache the first N regexes
if len(p.regexCache) < maxCachedRegexes {
p.regexCache[regex] = re
}
return re, nil
}
// Evaluate simple binary expression and return result
func (p *interp) evalBinary(op Token, l, r value) (value, error) {
// Note: cases are ordered (very roughly) in order of frequency
// of occurrence for performance reasons. Benchmark on common code
// before changing the order.
switch op {
case ADD:
return num(l.num() + r.num()), nil
case SUB:
return num(l.num() - r.num()), nil
case EQUALS:
ln, lIsStr := l.isTrueStr()
rn, rIsStr := r.isTrueStr()
if lIsStr || rIsStr {
return boolean(p.toString(l) == p.toString(r)), nil
} else {
return boolean(ln == rn), nil
}
case LESS:
ln, lIsStr := l.isTrueStr()
rn, rIsStr := r.isTrueStr()
if lIsStr || rIsStr {
return boolean(p.toString(l) < p.toString(r)), nil
} else {
return boolean(ln < rn), nil
}
case LTE:
ln, lIsStr := l.isTrueStr()
rn, rIsStr := r.isTrueStr()
if lIsStr || rIsStr {
return boolean(p.toString(l) <= p.toString(r)), nil
} else {
return boolean(ln <= rn), nil
}
case CONCAT:
return str(p.toString(l) + p.toString(r)), nil
case MUL:
return num(l.num() * r.num()), nil
case DIV:
rf := r.num()
if rf == 0.0 {
return null(), newError("division by zero")
}
return num(l.num() / rf), nil
case GREATER:
ln, lIsStr := l.isTrueStr()
rn, rIsStr := r.isTrueStr()
if lIsStr || rIsStr {
return boolean(p.toString(l) > p.toString(r)), nil
} else {
return boolean(ln > rn), nil
}
case GTE:
ln, lIsStr := l.isTrueStr()
rn, rIsStr := r.isTrueStr()
if lIsStr || rIsStr {
return boolean(p.toString(l) >= p.toString(r)), nil
} else {
return boolean(ln >= rn), nil
}
case NOT_EQUALS:
ln, lIsStr := l.isTrueStr()
rn, rIsStr := r.isTrueStr()
if lIsStr || rIsStr {
return boolean(p.toString(l) != p.toString(r)), nil
} else {
return boolean(ln != rn), nil
}
case MATCH:
re, err := p.compileRegex(p.toString(r))
if err != nil {
return null(), err
}
matched := re.MatchString(p.toString(l))
return boolean(matched), nil
case NOT_MATCH:
re, err := p.compileRegex(p.toString(r))
if err != nil {
return null(), err
}
matched := re.MatchString(p.toString(l))
return boolean(!matched), nil
case POW:
return num(math.Pow(l.num(), r.num())), nil
case MOD:
rf := r.num()
if rf == 0.0 {
return null(), newError("division by zero in mod")
}
return num(math.Mod(l.num(), rf)), nil
default:
panic(fmt.Sprintf("unexpected binary operation: %s", op))
}
}
// Evaluate unary expression and return result
func (p *interp) evalUnary(op Token, v value) value {
switch op {
case SUB:
return num(-v.num())
case NOT:
return boolean(!v.boolean())
case ADD:
return num(v.num())
default:
panic(fmt.Sprintf("unexpected unary operation: %s", op))
}
}
// Perform an assignment: can assign to var, array[key], or $field
func (p *interp) assign(left Expr, right value) error {
switch left := left.(type) {
case *VarExpr:
return p.setVar(left.Scope, left.Index, right)
case *IndexExpr:
index, err := p.evalIndex(left.Index)
if err != nil {
return err
}
p.setArrayValue(left.Array.Scope, left.Array.Index, index, right)
return nil
case *FieldExpr:
index, err := p.eval(left.Index)
if err != nil {
return err
}
return p.setField(int(index.num()), p.toString(right))
}
// Shouldn't happen
panic(fmt.Sprintf("unexpected lvalue type: %T", left))
}
// Evaluate an index expression to a string. Multi-valued indexes are
// separated by SUBSEP.
func (p *interp) evalIndex(indexExprs []Expr) (string, error) {
// Optimize the common case of a 1-dimensional index
if len(indexExprs) == 1 {
v, err := p.eval(indexExprs[0])
if err != nil {
return "", err
}
return p.toString(v), nil
}
// Up to 3-dimensional indices won't require heap allocation
indices := make([]string, 0, 3)
for _, expr := range indexExprs {
v, err := p.eval(expr)
if err != nil {
return "", err
}
indices = append(indices, p.toString(v))
}
return strings.Join(indices, p.subscriptSep), nil
}