mirror of https://github.com/deuill/grawkit.git
1104 lines
30 KiB
Go
1104 lines
30 KiB
Go
// Package interp is the GoAWK interpreter.
|
|
//
|
|
// For basic usage, use the Exec function. For more complicated use
|
|
// cases and configuration options, first use the parser package to
|
|
// parse the AWK source, and then use ExecProgram to execute it with
|
|
// a specific configuration.
|
|
//
|
|
// If you need to re-run the same parsed program repeatedly on different
|
|
// inputs or with different variables, use New to instantiate an Interpreter
|
|
// and then call the Interpreter.Execute method as many times as you need.
|
|
package interp
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"math"
|
|
"math/rand"
|
|
"os"
|
|
"os/exec"
|
|
"regexp"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"unicode/utf8"
|
|
|
|
"github.com/benhoyt/goawk/internal/ast"
|
|
"github.com/benhoyt/goawk/internal/compiler"
|
|
"github.com/benhoyt/goawk/parser"
|
|
)
|
|
|
|
var (
|
|
errExit = errors.New("exit")
|
|
errBreak = errors.New("break")
|
|
errNext = errors.New("next")
|
|
|
|
errCSVSeparator = errors.New("invalid CSV field separator or comment delimiter")
|
|
|
|
crlfNewline = runtime.GOOS == "windows"
|
|
varRegex = regexp.MustCompile(`^([_a-zA-Z][_a-zA-Z0-9]*)=(.*)`)
|
|
|
|
defaultShellCommand = getDefaultShellCommand()
|
|
)
|
|
|
|
// Error (actually *Error) is returned by Exec and Eval functions on
|
|
// interpreter error, for example FS being set to an invalid regex.
|
|
type Error struct {
|
|
message string
|
|
}
|
|
|
|
func (e *Error) Error() string {
|
|
return e.message
|
|
}
|
|
|
|
func newError(format string, args ...interface{}) error {
|
|
return &Error{fmt.Sprintf(format, args...)}
|
|
}
|
|
|
|
type returnValue struct {
|
|
Value value
|
|
}
|
|
|
|
func (r returnValue) Error() string {
|
|
return "<return " + r.Value.str("%.6g") + ">"
|
|
}
|
|
|
|
type interp struct {
|
|
// Input/output
|
|
output io.Writer
|
|
errorOutput io.Writer
|
|
scanner *bufio.Scanner
|
|
scanners map[string]*bufio.Scanner
|
|
stdin io.Reader
|
|
filenameIndex int
|
|
hadFiles bool
|
|
input io.Reader
|
|
inputBuffer []byte
|
|
inputStreams map[string]io.ReadCloser
|
|
outputStreams map[string]io.WriteCloser
|
|
commands map[string]*exec.Cmd
|
|
noExec bool
|
|
noFileWrites bool
|
|
noFileReads bool
|
|
shellCommand []string
|
|
csvOutput *bufio.Writer
|
|
noArgVars bool
|
|
|
|
// Scalars, arrays, and function state
|
|
globals []value
|
|
stack []value
|
|
sp int
|
|
frame []value
|
|
arrays []map[string]value
|
|
localArrays [][]int
|
|
callDepth int
|
|
nativeFuncs []nativeFunc
|
|
|
|
// File, line, and field handling
|
|
filename value
|
|
line string
|
|
lineIsTrueStr bool
|
|
lineNum int
|
|
fileLineNum int
|
|
fields []string
|
|
fieldsIsTrueStr []bool
|
|
numFields int
|
|
haveFields bool
|
|
fieldNames []string
|
|
fieldIndexes map[string]int
|
|
reparseCSV bool
|
|
|
|
// Built-in variables
|
|
argc int
|
|
convertFormat string
|
|
outputFormat string
|
|
fieldSep string
|
|
fieldSepRegex *regexp.Regexp
|
|
recordSep string
|
|
recordSepRegex *regexp.Regexp
|
|
recordTerminator string
|
|
outputFieldSep string
|
|
outputRecordSep string
|
|
subscriptSep string
|
|
matchLength int
|
|
matchStart int
|
|
inputMode IOMode
|
|
csvInputConfig CSVInputConfig
|
|
outputMode IOMode
|
|
csvOutputConfig CSVOutputConfig
|
|
|
|
// Parsed program, compiled functions and constants
|
|
program *parser.Program
|
|
functions []compiler.Function
|
|
nums []float64
|
|
strs []string
|
|
regexes []*regexp.Regexp
|
|
|
|
// Context support (for Interpreter.ExecuteContext)
|
|
checkCtx bool
|
|
ctx context.Context
|
|
ctxDone <-chan struct{}
|
|
ctxOps int
|
|
|
|
// Misc pieces of state
|
|
random *rand.Rand
|
|
randSeed float64
|
|
exitStatus int
|
|
regexCache map[string]*regexp.Regexp
|
|
formatCache map[string]cachedFormat
|
|
csvJoinFieldsBuf bytes.Buffer
|
|
}
|
|
|
|
// Various const configuration. Could make these part of Config if
|
|
// we wanted to, but no need for now.
|
|
const (
|
|
maxCachedRegexes = 100
|
|
maxCachedFormats = 100
|
|
maxRecordLength = 10 * 1024 * 1024 // 10MB seems like plenty
|
|
maxFieldIndex = 1000000
|
|
maxCallDepth = 1000
|
|
initialStackSize = 100
|
|
outputBufSize = 64 * 1024
|
|
inputBufSize = 64 * 1024
|
|
)
|
|
|
|
// Config defines the interpreter configuration for ExecProgram.
|
|
type Config struct {
|
|
// Standard input reader (defaults to os.Stdin)
|
|
Stdin io.Reader
|
|
|
|
// Writer for normal output (defaults to a buffered version of os.Stdout).
|
|
// If you need to write to stdout but want control over the buffer size or
|
|
// allocation, wrap os.Stdout yourself and set Output to that.
|
|
Output io.Writer
|
|
|
|
// Writer for non-fatal error messages (defaults to os.Stderr)
|
|
Error io.Writer
|
|
|
|
// The name of the executable (accessible via ARGV[0])
|
|
Argv0 string
|
|
|
|
// Input arguments (usually filenames): empty slice means read
|
|
// only from Stdin, and a filename of "-" means read from Stdin
|
|
// instead of a real file.
|
|
//
|
|
// Arguments of the form "var=value" are treated as variable
|
|
// assignments.
|
|
Args []string
|
|
|
|
// Set to true to disable "var=value" assignments in Args.
|
|
NoArgVars bool
|
|
|
|
// List of name-value pairs for variables to set before executing
|
|
// the program (useful for setting FS and other built-in
|
|
// variables, for example []string{"FS", ",", "OFS", ","}).
|
|
Vars []string
|
|
|
|
// Map of named Go functions to allow calling from AWK. You need
|
|
// to pass this same map to the parser.ParseProgram config.
|
|
//
|
|
// Functions can have any number of parameters, and variadic
|
|
// functions are supported. Functions can have no return values,
|
|
// one return value, or two return values (result, error). In the
|
|
// two-value case, if the function returns a non-nil error,
|
|
// program execution will stop and ExecProgram will return that
|
|
// error.
|
|
//
|
|
// Apart from the error return value, the types supported are
|
|
// bool, integer and floating point types (excluding complex),
|
|
// and string types (string or []byte).
|
|
//
|
|
// It's not an error to call a Go function from AWK with fewer
|
|
// arguments than it has parameters in Go. In this case, the zero
|
|
// value will be used for any additional parameters. However, it
|
|
// is a parse error to call a non-variadic function from AWK with
|
|
// more arguments than it has parameters in Go.
|
|
//
|
|
// Functions defined with the "function" keyword in AWK code
|
|
// take precedence over functions in Funcs.
|
|
Funcs map[string]interface{}
|
|
|
|
// Set one or more of these to true to prevent unsafe behaviours,
|
|
// useful when executing untrusted scripts:
|
|
//
|
|
// * NoExec prevents system calls via system() or pipe operator
|
|
// * NoFileWrites prevents writing to files via '>' or '>>'
|
|
// * NoFileReads prevents reading from files via getline or the
|
|
// filenames in Args
|
|
NoExec bool
|
|
NoFileWrites bool
|
|
NoFileReads bool
|
|
|
|
// Exec args used to run system shell. Typically, this will
|
|
// be {"/bin/sh", "-c"}
|
|
ShellCommand []string
|
|
|
|
// List of name-value pairs to be assigned to the ENVIRON special
|
|
// array, for example []string{"USER", "bob", "HOME", "/home/bob"}.
|
|
// If nil (the default), values from os.Environ() are used.
|
|
//
|
|
// If the script doesn't need environment variables, set Environ to a
|
|
// non-nil empty slice, []string{}.
|
|
Environ []string
|
|
|
|
// Mode for parsing input fields and record: default is to use normal FS
|
|
// and RS behaviour. If set to CSVMode or TSVMode, FS and RS are ignored,
|
|
// and input records are parsed as comma-separated values or tab-separated
|
|
// values, respectively. Parsing is done as per RFC 4180 and the
|
|
// "encoding/csv" package, but FieldsPerRecord is not supported,
|
|
// LazyQuotes is always on, and TrimLeadingSpace is always off.
|
|
//
|
|
// You can also enable CSV or TSV input mode by setting INPUTMODE to "csv"
|
|
// or "tsv" in Vars or in the BEGIN block (those override this setting).
|
|
//
|
|
// For further documentation about GoAWK's CSV support, see the full docs:
|
|
// https://github.com/benhoyt/goawk/blob/master/csv.md
|
|
InputMode IOMode
|
|
|
|
// Additional options if InputMode is CSVMode or TSVMode. The zero value
|
|
// is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode.
|
|
//
|
|
// You can also specify these options by setting INPUTMODE in the BEGIN
|
|
// block, for example, to use '|' as the field separator, '#' as the
|
|
// comment character, and enable header row parsing:
|
|
//
|
|
// BEGIN { INPUTMODE="csv separator=| comment=# header" }
|
|
CSVInput CSVInputConfig
|
|
|
|
// Mode for print output: default is to use normal OFS and ORS
|
|
// behaviour. If set to CSVMode or TSVMode, the "print" statement with one
|
|
// or more arguments outputs fields using CSV or TSV formatting,
|
|
// respectively. Output is written as per RFC 4180 and the "encoding/csv"
|
|
// package.
|
|
//
|
|
// You can also enable CSV or TSV output mode by setting OUTPUTMODE to
|
|
// "csv" or "tsv" in Vars or in the BEGIN block (those override this
|
|
// setting).
|
|
OutputMode IOMode
|
|
|
|
// Additional options if OutputMode is CSVMode or TSVMode. The zero value
|
|
// is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode.
|
|
//
|
|
// You can also specify these options by setting OUTPUTMODE in the BEGIN
|
|
// block, for example, to use '|' as the output field separator:
|
|
//
|
|
// BEGIN { OUTPUTMODE="csv separator=|" }
|
|
CSVOutput CSVOutputConfig
|
|
}
|
|
|
|
// IOMode specifies the input parsing or print output mode.
|
|
type IOMode int
|
|
|
|
const (
|
|
// DefaultMode uses normal AWK field and record separators: FS and RS for
|
|
// input, OFS and ORS for print output.
|
|
DefaultMode IOMode = 0
|
|
|
|
// CSVMode uses comma-separated value mode for input or output.
|
|
CSVMode IOMode = 1
|
|
|
|
// TSVMode uses tab-separated value mode for input or output.
|
|
TSVMode IOMode = 2
|
|
)
|
|
|
|
// CSVInputConfig holds additional configuration for when InputMode is CSVMode
|
|
// or TSVMode.
|
|
type CSVInputConfig struct {
|
|
// Input field separator character. If this is zero, it defaults to ','
|
|
// when InputMode is CSVMode and '\t' when InputMode is TSVMode.
|
|
Separator rune
|
|
|
|
// If nonzero, specifies that lines beginning with this character (and no
|
|
// leading whitespace) should be ignored as comments.
|
|
Comment rune
|
|
|
|
// If true, parse the first row in each input file as a header row (that
|
|
// is, a list of field names), and enable the @"field" syntax to get a
|
|
// field by name as well as the FIELDS special array.
|
|
Header bool
|
|
}
|
|
|
|
// CSVOutputConfig holds additional configuration for when OutputMode is
|
|
// CSVMode or TSVMode.
|
|
type CSVOutputConfig struct {
|
|
// Output field separator character. If this is zero, it defaults to ','
|
|
// when OutputMode is CSVMode and '\t' when OutputMode is TSVMode.
|
|
Separator rune
|
|
}
|
|
|
|
// ExecProgram executes the parsed program using the given interpreter
|
|
// config, returning the exit status code of the program. Error is nil
|
|
// on successful execution of the program, even if the program returns
|
|
// a non-zero status code.
|
|
//
|
|
// As of GoAWK version v1.16.0, a nil config is valid and will use the
|
|
// defaults (zero values). However, it may be simpler to use Exec in that
|
|
// case.
|
|
func ExecProgram(program *parser.Program, config *Config) (int, error) {
|
|
p := newInterp(program)
|
|
err := p.setExecuteConfig(config)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return p.executeAll()
|
|
}
|
|
|
|
func newInterp(program *parser.Program) *interp {
|
|
p := &interp{
|
|
program: program,
|
|
functions: program.Compiled.Functions,
|
|
nums: program.Compiled.Nums,
|
|
strs: program.Compiled.Strs,
|
|
regexes: program.Compiled.Regexes,
|
|
}
|
|
|
|
// Allocate memory for variables and virtual machine stack
|
|
p.globals = make([]value, len(program.Scalars))
|
|
p.stack = make([]value, initialStackSize)
|
|
p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize)
|
|
for i := 0; i < len(program.Arrays); i++ {
|
|
p.arrays[i] = make(map[string]value)
|
|
}
|
|
|
|
// Initialize defaults
|
|
p.regexCache = make(map[string]*regexp.Regexp, 10)
|
|
p.formatCache = make(map[string]cachedFormat, 10)
|
|
p.randSeed = 1.0
|
|
seed := math.Float64bits(p.randSeed)
|
|
p.random = rand.New(rand.NewSource(int64(seed)))
|
|
p.convertFormat = "%.6g"
|
|
p.outputFormat = "%.6g"
|
|
p.fieldSep = " "
|
|
p.recordSep = "\n"
|
|
p.outputFieldSep = " "
|
|
p.outputRecordSep = "\n"
|
|
p.subscriptSep = "\x1c"
|
|
|
|
p.inputStreams = make(map[string]io.ReadCloser)
|
|
p.outputStreams = make(map[string]io.WriteCloser)
|
|
p.commands = make(map[string]*exec.Cmd)
|
|
p.scanners = make(map[string]*bufio.Scanner)
|
|
|
|
return p
|
|
}
|
|
|
|
func (p *interp) setExecuteConfig(config *Config) error {
|
|
if config == nil {
|
|
config = &Config{}
|
|
}
|
|
if len(config.Vars)%2 != 0 {
|
|
return newError("length of config.Vars must be a multiple of 2, not %d", len(config.Vars))
|
|
}
|
|
if len(config.Environ)%2 != 0 {
|
|
return newError("length of config.Environ must be a multiple of 2, not %d", len(config.Environ))
|
|
}
|
|
|
|
// Set up I/O mode config (Vars will override)
|
|
p.inputMode = config.InputMode
|
|
p.csvInputConfig = config.CSVInput
|
|
switch p.inputMode {
|
|
case CSVMode:
|
|
if p.csvInputConfig.Separator == 0 {
|
|
p.csvInputConfig.Separator = ','
|
|
}
|
|
case TSVMode:
|
|
if p.csvInputConfig.Separator == 0 {
|
|
p.csvInputConfig.Separator = '\t'
|
|
}
|
|
case DefaultMode:
|
|
if p.csvInputConfig != (CSVInputConfig{}) {
|
|
return newError("input mode configuration not valid in default input mode")
|
|
}
|
|
}
|
|
p.outputMode = config.OutputMode
|
|
p.csvOutputConfig = config.CSVOutput
|
|
switch p.outputMode {
|
|
case CSVMode:
|
|
if p.csvOutputConfig.Separator == 0 {
|
|
p.csvOutputConfig.Separator = ','
|
|
}
|
|
case TSVMode:
|
|
if p.csvOutputConfig.Separator == 0 {
|
|
p.csvOutputConfig.Separator = '\t'
|
|
}
|
|
case DefaultMode:
|
|
if p.csvOutputConfig != (CSVOutputConfig{}) {
|
|
return newError("output mode configuration not valid in default output mode")
|
|
}
|
|
}
|
|
|
|
// Set up ARGV and other variables from config
|
|
argvIndex := p.program.Arrays["ARGV"]
|
|
p.setArrayValue(ast.ScopeGlobal, argvIndex, "0", str(config.Argv0))
|
|
p.argc = len(config.Args) + 1
|
|
for i, arg := range config.Args {
|
|
p.setArrayValue(ast.ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg))
|
|
}
|
|
p.noArgVars = config.NoArgVars
|
|
p.filenameIndex = 1
|
|
p.hadFiles = false
|
|
for i := 0; i < len(config.Vars); i += 2 {
|
|
err := p.setVarByName(config.Vars[i], config.Vars[i+1])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// After Vars has been handled, validate CSV configuration.
|
|
err := validateCSVInputConfig(p.inputMode, p.csvInputConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Set up ENVIRON from config or environment variables
|
|
environIndex := p.program.Arrays["ENVIRON"]
|
|
if config.Environ != nil {
|
|
for i := 0; i < len(config.Environ); i += 2 {
|
|
p.setArrayValue(ast.ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1]))
|
|
}
|
|
} else {
|
|
for _, kv := range os.Environ() {
|
|
eq := strings.IndexByte(kv, '=')
|
|
if eq >= 0 {
|
|
p.setArrayValue(ast.ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:]))
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set up system shell command
|
|
if len(config.ShellCommand) != 0 {
|
|
p.shellCommand = config.ShellCommand
|
|
} else {
|
|
p.shellCommand = defaultShellCommand
|
|
}
|
|
|
|
// Set up I/O structures
|
|
p.noExec = config.NoExec
|
|
p.noFileWrites = config.NoFileWrites
|
|
p.noFileReads = config.NoFileReads
|
|
p.stdin = config.Stdin
|
|
if p.stdin == nil {
|
|
p.stdin = os.Stdin
|
|
}
|
|
p.output = config.Output
|
|
if p.output == nil {
|
|
p.output = bufio.NewWriterSize(os.Stdout, outputBufSize)
|
|
}
|
|
p.errorOutput = config.Error
|
|
if p.errorOutput == nil {
|
|
p.errorOutput = os.Stderr
|
|
}
|
|
|
|
// Initialize native Go functions
|
|
if p.nativeFuncs == nil {
|
|
err := p.initNativeFuncs(config.Funcs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func validateCSVInputConfig(mode IOMode, config CSVInputConfig) error {
|
|
if mode != CSVMode && mode != TSVMode {
|
|
return nil
|
|
}
|
|
if config.Separator == config.Comment || !validCSVSeparator(config.Separator) ||
|
|
config.Comment != 0 && !validCSVSeparator(config.Comment) {
|
|
return errCSVSeparator
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateCSVOutputConfig(mode IOMode, config CSVOutputConfig) error {
|
|
if mode != CSVMode && mode != TSVMode {
|
|
return nil
|
|
}
|
|
if !validCSVSeparator(config.Separator) {
|
|
return errCSVSeparator
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validCSVSeparator(r rune) bool {
|
|
return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
|
|
}
|
|
|
|
func (p *interp) executeAll() (int, error) {
|
|
defer p.closeAll()
|
|
|
|
// Execute the program: BEGIN, then pattern/actions, then END
|
|
err := p.execute(p.program.Compiled.Begin)
|
|
if err != nil && err != errExit {
|
|
if p.checkCtx {
|
|
ctxErr := p.checkContextNow()
|
|
if ctxErr != nil {
|
|
return 0, ctxErr
|
|
}
|
|
}
|
|
return 0, err
|
|
}
|
|
if p.program.Actions == nil && p.program.End == nil {
|
|
return p.exitStatus, nil // only BEGIN specified, don't process input
|
|
}
|
|
if err != errExit {
|
|
err = p.execActions(p.program.Compiled.Actions)
|
|
if err != nil && err != errExit {
|
|
if p.checkCtx {
|
|
ctxErr := p.checkContextNow()
|
|
if ctxErr != nil {
|
|
return 0, ctxErr
|
|
}
|
|
}
|
|
return 0, err
|
|
}
|
|
}
|
|
err = p.execute(p.program.Compiled.End)
|
|
if err != nil && err != errExit {
|
|
if p.checkCtx {
|
|
ctxErr := p.checkContextNow()
|
|
if ctxErr != nil {
|
|
return 0, ctxErr
|
|
}
|
|
}
|
|
return 0, err
|
|
}
|
|
return p.exitStatus, nil
|
|
}
|
|
|
|
// Exec provides a simple way to parse and execute an AWK program
|
|
// with the given field separator. Exec reads input from the given
|
|
// reader (nil means use os.Stdin) and writes output to stdout (nil
|
|
// means use a buffered version of os.Stdout).
|
|
func Exec(source, fieldSep string, input io.Reader, output io.Writer) error {
|
|
prog, err := parser.ParseProgram([]byte(source), nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
config := &Config{
|
|
Stdin: input,
|
|
Output: output,
|
|
Error: ioutil.Discard,
|
|
Vars: []string{"FS", fieldSep},
|
|
}
|
|
_, err = ExecProgram(prog, config)
|
|
return err
|
|
}
|
|
|
|
// Execute pattern-action blocks (may be multiple)
|
|
func (p *interp) execActions(actions []compiler.Action) error {
|
|
var inRange []bool
|
|
lineLoop:
|
|
for {
|
|
// Read and setup next line of input
|
|
line, err := p.nextLine()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
p.setLine(line, false)
|
|
p.reparseCSV = false
|
|
|
|
// Execute all the pattern-action blocks for each line
|
|
for i, action := range actions {
|
|
// First determine whether the pattern matches
|
|
matched := false
|
|
switch len(action.Pattern) {
|
|
case 0:
|
|
// No pattern is equivalent to pattern evaluating to true
|
|
matched = true
|
|
case 1:
|
|
// Single boolean pattern
|
|
err := p.execute(action.Pattern[0])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
matched = p.pop().boolean()
|
|
case 2:
|
|
// Range pattern (matches between start and stop lines)
|
|
if inRange == nil {
|
|
inRange = make([]bool, len(actions))
|
|
}
|
|
if !inRange[i] {
|
|
err := p.execute(action.Pattern[0])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
inRange[i] = p.pop().boolean()
|
|
}
|
|
matched = inRange[i]
|
|
if inRange[i] {
|
|
err := p.execute(action.Pattern[1])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
inRange[i] = !p.pop().boolean()
|
|
}
|
|
}
|
|
if !matched {
|
|
continue
|
|
}
|
|
|
|
// No action is equivalent to { print $0 }
|
|
if len(action.Body) == 0 {
|
|
err := p.printLine(p.output, p.line)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Execute the body statements
|
|
err := p.execute(action.Body)
|
|
if err == errNext {
|
|
// "next" statement skips straight to next line
|
|
continue lineLoop
|
|
}
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Get a special variable by index
|
|
func (p *interp) getSpecial(index int) value {
|
|
switch index {
|
|
case ast.V_NF:
|
|
p.ensureFields()
|
|
return num(float64(p.numFields))
|
|
case ast.V_NR:
|
|
return num(float64(p.lineNum))
|
|
case ast.V_RLENGTH:
|
|
return num(float64(p.matchLength))
|
|
case ast.V_RSTART:
|
|
return num(float64(p.matchStart))
|
|
case ast.V_FNR:
|
|
return num(float64(p.fileLineNum))
|
|
case ast.V_ARGC:
|
|
return num(float64(p.argc))
|
|
case ast.V_CONVFMT:
|
|
return str(p.convertFormat)
|
|
case ast.V_FILENAME:
|
|
return p.filename
|
|
case ast.V_FS:
|
|
return str(p.fieldSep)
|
|
case ast.V_OFMT:
|
|
return str(p.outputFormat)
|
|
case ast.V_OFS:
|
|
return str(p.outputFieldSep)
|
|
case ast.V_ORS:
|
|
return str(p.outputRecordSep)
|
|
case ast.V_RS:
|
|
return str(p.recordSep)
|
|
case ast.V_RT:
|
|
return str(p.recordTerminator)
|
|
case ast.V_SUBSEP:
|
|
return str(p.subscriptSep)
|
|
case ast.V_INPUTMODE:
|
|
return str(inputModeString(p.inputMode, p.csvInputConfig))
|
|
case ast.V_OUTPUTMODE:
|
|
return str(outputModeString(p.outputMode, p.csvOutputConfig))
|
|
default:
|
|
panic(fmt.Sprintf("unexpected special variable index: %d", index))
|
|
}
|
|
}
|
|
|
|
// Set a variable by name (specials and globals only)
|
|
func (p *interp) setVarByName(name, value string) error {
|
|
index := ast.SpecialVarIndex(name)
|
|
if index > 0 {
|
|
return p.setSpecial(index, numStr(value))
|
|
}
|
|
index, ok := p.program.Scalars[name]
|
|
if ok {
|
|
p.globals[index] = numStr(value)
|
|
return nil
|
|
}
|
|
// Ignore variables that aren't defined in program
|
|
return nil
|
|
}
|
|
|
|
// Set special variable by index to given value
|
|
func (p *interp) setSpecial(index int, v value) error {
|
|
switch index {
|
|
case ast.V_NF:
|
|
numFields := int(v.num())
|
|
if numFields < 0 {
|
|
return newError("NF set to negative value: %d", numFields)
|
|
}
|
|
if numFields > maxFieldIndex {
|
|
return newError("NF set too large: %d", numFields)
|
|
}
|
|
p.ensureFields()
|
|
p.numFields = numFields
|
|
if p.numFields < len(p.fields) {
|
|
p.fields = p.fields[:p.numFields]
|
|
p.fieldsIsTrueStr = p.fieldsIsTrueStr[:p.numFields]
|
|
}
|
|
for i := len(p.fields); i < p.numFields; i++ {
|
|
p.fields = append(p.fields, "")
|
|
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false)
|
|
}
|
|
p.line = p.joinFields(p.fields)
|
|
p.lineIsTrueStr = true
|
|
case ast.V_NR:
|
|
p.lineNum = int(v.num())
|
|
case ast.V_RLENGTH:
|
|
p.matchLength = int(v.num())
|
|
case ast.V_RSTART:
|
|
p.matchStart = int(v.num())
|
|
case ast.V_FNR:
|
|
p.fileLineNum = int(v.num())
|
|
case ast.V_ARGC:
|
|
p.argc = int(v.num())
|
|
case ast.V_CONVFMT:
|
|
p.convertFormat = p.toString(v)
|
|
case ast.V_FILENAME:
|
|
p.filename = v
|
|
case ast.V_FS:
|
|
p.fieldSep = p.toString(v)
|
|
if utf8.RuneCountInString(p.fieldSep) > 1 { // compare to interp.ensureFields
|
|
re, err := regexp.Compile(compiler.AddRegexFlags(p.fieldSep))
|
|
if err != nil {
|
|
return newError("invalid regex %q: %s", p.fieldSep, err)
|
|
}
|
|
p.fieldSepRegex = re
|
|
}
|
|
case ast.V_OFMT:
|
|
p.outputFormat = p.toString(v)
|
|
case ast.V_OFS:
|
|
p.outputFieldSep = p.toString(v)
|
|
case ast.V_ORS:
|
|
p.outputRecordSep = p.toString(v)
|
|
case ast.V_RS:
|
|
p.recordSep = p.toString(v)
|
|
switch { // compare to interp.newScanner
|
|
case len(p.recordSep) <= 1:
|
|
// Simple cases use specialized splitters, not regex
|
|
case utf8.RuneCountInString(p.recordSep) == 1:
|
|
// Multi-byte unicode char falls back to regex splitter
|
|
sep := regexp.QuoteMeta(p.recordSep) // not strictly necessary as no multi-byte chars are regex meta chars
|
|
p.recordSepRegex = regexp.MustCompile(sep)
|
|
default:
|
|
re, err := regexp.Compile(compiler.AddRegexFlags(p.recordSep))
|
|
if err != nil {
|
|
return newError("invalid regex %q: %s", p.recordSep, err)
|
|
}
|
|
p.recordSepRegex = re
|
|
}
|
|
case ast.V_RT:
|
|
p.recordTerminator = p.toString(v)
|
|
case ast.V_SUBSEP:
|
|
p.subscriptSep = p.toString(v)
|
|
case ast.V_INPUTMODE:
|
|
var err error
|
|
p.inputMode, p.csvInputConfig, err = parseInputMode(p.toString(v))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = validateCSVInputConfig(p.inputMode, p.csvInputConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
case ast.V_OUTPUTMODE:
|
|
var err error
|
|
p.outputMode, p.csvOutputConfig, err = parseOutputMode(p.toString(v))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
default:
|
|
panic(fmt.Sprintf("unexpected special variable index: %d", index))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Determine the index of given array into the p.arrays slice. Global
|
|
// arrays are just at p.arrays[index], local arrays have to be looked
|
|
// up indirectly.
|
|
func (p *interp) arrayIndex(scope ast.VarScope, index int) int {
|
|
if scope == ast.ScopeGlobal {
|
|
return index
|
|
} else {
|
|
return p.localArrays[len(p.localArrays)-1][index]
|
|
}
|
|
}
|
|
|
|
// Return array with given scope and index.
|
|
func (p *interp) array(scope ast.VarScope, index int) map[string]value {
|
|
return p.arrays[p.arrayIndex(scope, index)]
|
|
}
|
|
|
|
// Return local array with given index.
|
|
func (p *interp) localArray(index int) map[string]value {
|
|
return p.arrays[p.localArrays[len(p.localArrays)-1][index]]
|
|
}
|
|
|
|
// Set a value in given array by key (index)
|
|
func (p *interp) setArrayValue(scope ast.VarScope, arrayIndex int, index string, v value) {
|
|
array := p.array(scope, arrayIndex)
|
|
array[index] = v
|
|
}
|
|
|
|
// Get the value of given numbered field, equivalent to "$index"
|
|
func (p *interp) getField(index int) value {
|
|
if index == 0 {
|
|
if p.lineIsTrueStr {
|
|
return str(p.line)
|
|
} else {
|
|
return numStr(p.line)
|
|
}
|
|
}
|
|
p.ensureFields()
|
|
if index < 1 {
|
|
index = len(p.fields) + 1 + index
|
|
if index < 1 {
|
|
return str("")
|
|
}
|
|
}
|
|
if index > len(p.fields) {
|
|
return str("")
|
|
}
|
|
if p.fieldsIsTrueStr[index-1] {
|
|
return str(p.fields[index-1])
|
|
} else {
|
|
return numStr(p.fields[index-1])
|
|
}
|
|
}
|
|
|
|
// Get the value of a field by name (for CSV/TSV mode), as in @"name".
|
|
func (p *interp) getFieldByName(name string) (value, error) {
|
|
if p.fieldIndexes == nil {
|
|
// Lazily create map of field names to indexes.
|
|
if p.fieldNames == nil {
|
|
return null(), newError(`@ only supported if header parsing enabled; use -H or add "header" to INPUTMODE`)
|
|
}
|
|
p.fieldIndexes = make(map[string]int, len(p.fieldNames))
|
|
for i, n := range p.fieldNames {
|
|
p.fieldIndexes[n] = i + 1
|
|
}
|
|
}
|
|
index := p.fieldIndexes[name]
|
|
if index == 0 {
|
|
return str(""), nil
|
|
}
|
|
return p.getField(index), nil
|
|
}
|
|
|
|
// Sets a single field, equivalent to "$index = value"
|
|
func (p *interp) setField(index int, value string) error {
|
|
if index == 0 {
|
|
p.setLine(value, true)
|
|
return nil
|
|
}
|
|
if index > maxFieldIndex {
|
|
return newError("field index too large: %d", index)
|
|
}
|
|
// If there aren't enough fields, add empty string fields in between
|
|
p.ensureFields()
|
|
if index < 1 {
|
|
index = len(p.fields) + 1 + index
|
|
if index < 1 {
|
|
return nil
|
|
}
|
|
}
|
|
for i := len(p.fields); i < index; i++ {
|
|
p.fields = append(p.fields, "")
|
|
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, true)
|
|
}
|
|
p.fields[index-1] = value
|
|
p.fieldsIsTrueStr[index-1] = true
|
|
p.numFields = len(p.fields)
|
|
p.line = p.joinFields(p.fields)
|
|
p.lineIsTrueStr = true
|
|
return nil
|
|
}
|
|
|
|
func (p *interp) joinFields(fields []string) string {
|
|
switch p.outputMode {
|
|
case CSVMode, TSVMode:
|
|
p.csvJoinFieldsBuf.Reset()
|
|
_ = p.writeCSV(&p.csvJoinFieldsBuf, fields)
|
|
line := p.csvJoinFieldsBuf.Bytes()
|
|
line = line[:len(line)-lenNewline(line)]
|
|
return string(line)
|
|
default:
|
|
return strings.Join(fields, p.outputFieldSep)
|
|
}
|
|
}
|
|
|
|
// Convert value to string using current CONVFMT
|
|
func (p *interp) toString(v value) string {
|
|
return v.str(p.convertFormat)
|
|
}
|
|
|
|
// Compile regex string (or fetch from regex cache)
|
|
func (p *interp) compileRegex(regex string) (*regexp.Regexp, error) {
|
|
if re, ok := p.regexCache[regex]; ok {
|
|
return re, nil
|
|
}
|
|
re, err := regexp.Compile(compiler.AddRegexFlags(regex))
|
|
if err != nil {
|
|
return nil, newError("invalid regex %q: %s", regex, err)
|
|
}
|
|
// Dumb, non-LRU cache: just cache the first N regexes
|
|
if len(p.regexCache) < maxCachedRegexes {
|
|
p.regexCache[regex] = re
|
|
}
|
|
return re, nil
|
|
}
|
|
|
|
func getDefaultShellCommand() []string {
|
|
executable := "/bin/sh"
|
|
if runtime.GOOS == "windows" {
|
|
executable = "sh"
|
|
}
|
|
return []string{executable, "-c"}
|
|
}
|
|
|
|
func inputModeString(mode IOMode, csvConfig CSVInputConfig) string {
|
|
var s string
|
|
var defaultSep rune
|
|
switch mode {
|
|
case CSVMode:
|
|
s = "csv"
|
|
defaultSep = ','
|
|
case TSVMode:
|
|
s = "tsv"
|
|
defaultSep = '\t'
|
|
case DefaultMode:
|
|
return ""
|
|
}
|
|
if csvConfig.Separator != defaultSep {
|
|
s += " separator=" + string([]rune{csvConfig.Separator})
|
|
}
|
|
if csvConfig.Comment != 0 {
|
|
s += " comment=" + string([]rune{csvConfig.Comment})
|
|
}
|
|
if csvConfig.Header {
|
|
s += " header"
|
|
}
|
|
return s
|
|
}
|
|
|
|
func parseInputMode(s string) (mode IOMode, csvConfig CSVInputConfig, err error) {
|
|
fields := strings.Fields(s)
|
|
if len(fields) == 0 {
|
|
return DefaultMode, CSVInputConfig{}, nil
|
|
}
|
|
switch fields[0] {
|
|
case "csv":
|
|
mode = CSVMode
|
|
csvConfig.Separator = ','
|
|
case "tsv":
|
|
mode = TSVMode
|
|
csvConfig.Separator = '\t'
|
|
default:
|
|
return DefaultMode, CSVInputConfig{}, newError("invalid input mode %q", fields[0])
|
|
}
|
|
for _, field := range fields[1:] {
|
|
key := field
|
|
val := ""
|
|
equals := strings.IndexByte(field, '=')
|
|
if equals >= 0 {
|
|
key = field[:equals]
|
|
val = field[equals+1:]
|
|
}
|
|
switch key {
|
|
case "separator":
|
|
r, n := utf8.DecodeRuneInString(val)
|
|
if n == 0 || n < len(val) {
|
|
return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV separator %q", val)
|
|
}
|
|
csvConfig.Separator = r
|
|
case "comment":
|
|
r, n := utf8.DecodeRuneInString(val)
|
|
if n == 0 || n < len(val) {
|
|
return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV comment character %q", val)
|
|
}
|
|
csvConfig.Comment = r
|
|
case "header":
|
|
if val != "" && val != "true" && val != "false" {
|
|
return DefaultMode, CSVInputConfig{}, newError("invalid header value %q", val)
|
|
}
|
|
csvConfig.Header = val == "" || val == "true"
|
|
default:
|
|
return DefaultMode, CSVInputConfig{}, newError("invalid input mode key %q", key)
|
|
}
|
|
}
|
|
return mode, csvConfig, nil
|
|
}
|
|
|
|
func outputModeString(mode IOMode, csvConfig CSVOutputConfig) string {
|
|
var s string
|
|
var defaultSep rune
|
|
switch mode {
|
|
case CSVMode:
|
|
s = "csv"
|
|
defaultSep = ','
|
|
case TSVMode:
|
|
s = "tsv"
|
|
defaultSep = '\t'
|
|
case DefaultMode:
|
|
return ""
|
|
}
|
|
if csvConfig.Separator != defaultSep {
|
|
s += " separator=" + string([]rune{csvConfig.Separator})
|
|
}
|
|
return s
|
|
}
|
|
|
|
func parseOutputMode(s string) (mode IOMode, csvConfig CSVOutputConfig, err error) {
|
|
fields := strings.Fields(s)
|
|
if len(fields) == 0 {
|
|
return DefaultMode, CSVOutputConfig{}, nil
|
|
}
|
|
switch fields[0] {
|
|
case "csv":
|
|
mode = CSVMode
|
|
csvConfig.Separator = ','
|
|
case "tsv":
|
|
mode = TSVMode
|
|
csvConfig.Separator = '\t'
|
|
default:
|
|
return DefaultMode, CSVOutputConfig{}, newError("invalid output mode %q", fields[0])
|
|
}
|
|
for _, field := range fields[1:] {
|
|
key := field
|
|
val := ""
|
|
equals := strings.IndexByte(field, '=')
|
|
if equals >= 0 {
|
|
key = field[:equals]
|
|
val = field[equals+1:]
|
|
}
|
|
switch key {
|
|
case "separator":
|
|
r, n := utf8.DecodeRuneInString(val)
|
|
if n == 0 || n < len(val) {
|
|
return DefaultMode, CSVOutputConfig{}, newError("invalid CSV/TSV separator %q", val)
|
|
}
|
|
csvConfig.Separator = r
|
|
default:
|
|
return DefaultMode, CSVOutputConfig{}, newError("invalid output mode key %q", key)
|
|
}
|
|
}
|
|
return mode, csvConfig, nil
|
|
}
|