grawkit/play/vendor/github.com/benhoyt/goawk/interp/interp.go

1104 lines
30 KiB
Go

// Package interp is the GoAWK interpreter.
//
// For basic usage, use the Exec function. For more complicated use
// cases and configuration options, first use the parser package to
// parse the AWK source, and then use ExecProgram to execute it with
// a specific configuration.
//
// If you need to re-run the same parsed program repeatedly on different
// inputs or with different variables, use New to instantiate an Interpreter
// and then call the Interpreter.Execute method as many times as you need.
package interp
import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"math"
"math/rand"
"os"
"os/exec"
"regexp"
"runtime"
"strconv"
"strings"
"unicode/utf8"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/compiler"
"github.com/benhoyt/goawk/parser"
)
var (
errExit = errors.New("exit")
errBreak = errors.New("break")
errNext = errors.New("next")
errCSVSeparator = errors.New("invalid CSV field separator or comment delimiter")
crlfNewline = runtime.GOOS == "windows"
varRegex = regexp.MustCompile(`^([_a-zA-Z][_a-zA-Z0-9]*)=(.*)`)
defaultShellCommand = getDefaultShellCommand()
)
// Error (actually *Error) is returned by Exec and Eval functions on
// interpreter error, for example FS being set to an invalid regex.
type Error struct {
message string
}
func (e *Error) Error() string {
return e.message
}
func newError(format string, args ...interface{}) error {
return &Error{fmt.Sprintf(format, args...)}
}
type returnValue struct {
Value value
}
func (r returnValue) Error() string {
return "<return " + r.Value.str("%.6g") + ">"
}
type interp struct {
// Input/output
output io.Writer
errorOutput io.Writer
scanner *bufio.Scanner
scanners map[string]*bufio.Scanner
stdin io.Reader
filenameIndex int
hadFiles bool
input io.Reader
inputBuffer []byte
inputStreams map[string]io.ReadCloser
outputStreams map[string]io.WriteCloser
commands map[string]*exec.Cmd
noExec bool
noFileWrites bool
noFileReads bool
shellCommand []string
csvOutput *bufio.Writer
noArgVars bool
// Scalars, arrays, and function state
globals []value
stack []value
sp int
frame []value
arrays []map[string]value
localArrays [][]int
callDepth int
nativeFuncs []nativeFunc
// File, line, and field handling
filename value
line string
lineIsTrueStr bool
lineNum int
fileLineNum int
fields []string
fieldsIsTrueStr []bool
numFields int
haveFields bool
fieldNames []string
fieldIndexes map[string]int
reparseCSV bool
// Built-in variables
argc int
convertFormat string
outputFormat string
fieldSep string
fieldSepRegex *regexp.Regexp
recordSep string
recordSepRegex *regexp.Regexp
recordTerminator string
outputFieldSep string
outputRecordSep string
subscriptSep string
matchLength int
matchStart int
inputMode IOMode
csvInputConfig CSVInputConfig
outputMode IOMode
csvOutputConfig CSVOutputConfig
// Parsed program, compiled functions and constants
program *parser.Program
functions []compiler.Function
nums []float64
strs []string
regexes []*regexp.Regexp
// Context support (for Interpreter.ExecuteContext)
checkCtx bool
ctx context.Context
ctxDone <-chan struct{}
ctxOps int
// Misc pieces of state
random *rand.Rand
randSeed float64
exitStatus int
regexCache map[string]*regexp.Regexp
formatCache map[string]cachedFormat
csvJoinFieldsBuf bytes.Buffer
}
// Various const configuration. Could make these part of Config if
// we wanted to, but no need for now.
const (
maxCachedRegexes = 100
maxCachedFormats = 100
maxRecordLength = 10 * 1024 * 1024 // 10MB seems like plenty
maxFieldIndex = 1000000
maxCallDepth = 1000
initialStackSize = 100
outputBufSize = 64 * 1024
inputBufSize = 64 * 1024
)
// Config defines the interpreter configuration for ExecProgram.
type Config struct {
// Standard input reader (defaults to os.Stdin)
Stdin io.Reader
// Writer for normal output (defaults to a buffered version of os.Stdout).
// If you need to write to stdout but want control over the buffer size or
// allocation, wrap os.Stdout yourself and set Output to that.
Output io.Writer
// Writer for non-fatal error messages (defaults to os.Stderr)
Error io.Writer
// The name of the executable (accessible via ARGV[0])
Argv0 string
// Input arguments (usually filenames): empty slice means read
// only from Stdin, and a filename of "-" means read from Stdin
// instead of a real file.
//
// Arguments of the form "var=value" are treated as variable
// assignments.
Args []string
// Set to true to disable "var=value" assignments in Args.
NoArgVars bool
// List of name-value pairs for variables to set before executing
// the program (useful for setting FS and other built-in
// variables, for example []string{"FS", ",", "OFS", ","}).
Vars []string
// Map of named Go functions to allow calling from AWK. You need
// to pass this same map to the parser.ParseProgram config.
//
// Functions can have any number of parameters, and variadic
// functions are supported. Functions can have no return values,
// one return value, or two return values (result, error). In the
// two-value case, if the function returns a non-nil error,
// program execution will stop and ExecProgram will return that
// error.
//
// Apart from the error return value, the types supported are
// bool, integer and floating point types (excluding complex),
// and string types (string or []byte).
//
// It's not an error to call a Go function from AWK with fewer
// arguments than it has parameters in Go. In this case, the zero
// value will be used for any additional parameters. However, it
// is a parse error to call a non-variadic function from AWK with
// more arguments than it has parameters in Go.
//
// Functions defined with the "function" keyword in AWK code
// take precedence over functions in Funcs.
Funcs map[string]interface{}
// Set one or more of these to true to prevent unsafe behaviours,
// useful when executing untrusted scripts:
//
// * NoExec prevents system calls via system() or pipe operator
// * NoFileWrites prevents writing to files via '>' or '>>'
// * NoFileReads prevents reading from files via getline or the
// filenames in Args
NoExec bool
NoFileWrites bool
NoFileReads bool
// Exec args used to run system shell. Typically, this will
// be {"/bin/sh", "-c"}
ShellCommand []string
// List of name-value pairs to be assigned to the ENVIRON special
// array, for example []string{"USER", "bob", "HOME", "/home/bob"}.
// If nil (the default), values from os.Environ() are used.
//
// If the script doesn't need environment variables, set Environ to a
// non-nil empty slice, []string{}.
Environ []string
// Mode for parsing input fields and record: default is to use normal FS
// and RS behaviour. If set to CSVMode or TSVMode, FS and RS are ignored,
// and input records are parsed as comma-separated values or tab-separated
// values, respectively. Parsing is done as per RFC 4180 and the
// "encoding/csv" package, but FieldsPerRecord is not supported,
// LazyQuotes is always on, and TrimLeadingSpace is always off.
//
// You can also enable CSV or TSV input mode by setting INPUTMODE to "csv"
// or "tsv" in Vars or in the BEGIN block (those override this setting).
//
// For further documentation about GoAWK's CSV support, see the full docs:
// https://github.com/benhoyt/goawk/blob/master/csv.md
InputMode IOMode
// Additional options if InputMode is CSVMode or TSVMode. The zero value
// is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode.
//
// You can also specify these options by setting INPUTMODE in the BEGIN
// block, for example, to use '|' as the field separator, '#' as the
// comment character, and enable header row parsing:
//
// BEGIN { INPUTMODE="csv separator=| comment=# header" }
CSVInput CSVInputConfig
// Mode for print output: default is to use normal OFS and ORS
// behaviour. If set to CSVMode or TSVMode, the "print" statement with one
// or more arguments outputs fields using CSV or TSV formatting,
// respectively. Output is written as per RFC 4180 and the "encoding/csv"
// package.
//
// You can also enable CSV or TSV output mode by setting OUTPUTMODE to
// "csv" or "tsv" in Vars or in the BEGIN block (those override this
// setting).
OutputMode IOMode
// Additional options if OutputMode is CSVMode or TSVMode. The zero value
// is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode.
//
// You can also specify these options by setting OUTPUTMODE in the BEGIN
// block, for example, to use '|' as the output field separator:
//
// BEGIN { OUTPUTMODE="csv separator=|" }
CSVOutput CSVOutputConfig
}
// IOMode specifies the input parsing or print output mode.
type IOMode int
const (
// DefaultMode uses normal AWK field and record separators: FS and RS for
// input, OFS and ORS for print output.
DefaultMode IOMode = 0
// CSVMode uses comma-separated value mode for input or output.
CSVMode IOMode = 1
// TSVMode uses tab-separated value mode for input or output.
TSVMode IOMode = 2
)
// CSVInputConfig holds additional configuration for when InputMode is CSVMode
// or TSVMode.
type CSVInputConfig struct {
// Input field separator character. If this is zero, it defaults to ','
// when InputMode is CSVMode and '\t' when InputMode is TSVMode.
Separator rune
// If nonzero, specifies that lines beginning with this character (and no
// leading whitespace) should be ignored as comments.
Comment rune
// If true, parse the first row in each input file as a header row (that
// is, a list of field names), and enable the @"field" syntax to get a
// field by name as well as the FIELDS special array.
Header bool
}
// CSVOutputConfig holds additional configuration for when OutputMode is
// CSVMode or TSVMode.
type CSVOutputConfig struct {
// Output field separator character. If this is zero, it defaults to ','
// when OutputMode is CSVMode and '\t' when OutputMode is TSVMode.
Separator rune
}
// ExecProgram executes the parsed program using the given interpreter
// config, returning the exit status code of the program. Error is nil
// on successful execution of the program, even if the program returns
// a non-zero status code.
//
// As of GoAWK version v1.16.0, a nil config is valid and will use the
// defaults (zero values). However, it may be simpler to use Exec in that
// case.
func ExecProgram(program *parser.Program, config *Config) (int, error) {
p := newInterp(program)
err := p.setExecuteConfig(config)
if err != nil {
return 0, err
}
return p.executeAll()
}
func newInterp(program *parser.Program) *interp {
p := &interp{
program: program,
functions: program.Compiled.Functions,
nums: program.Compiled.Nums,
strs: program.Compiled.Strs,
regexes: program.Compiled.Regexes,
}
// Allocate memory for variables and virtual machine stack
p.globals = make([]value, len(program.Scalars))
p.stack = make([]value, initialStackSize)
p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize)
for i := 0; i < len(program.Arrays); i++ {
p.arrays[i] = make(map[string]value)
}
// Initialize defaults
p.regexCache = make(map[string]*regexp.Regexp, 10)
p.formatCache = make(map[string]cachedFormat, 10)
p.randSeed = 1.0
seed := math.Float64bits(p.randSeed)
p.random = rand.New(rand.NewSource(int64(seed)))
p.convertFormat = "%.6g"
p.outputFormat = "%.6g"
p.fieldSep = " "
p.recordSep = "\n"
p.outputFieldSep = " "
p.outputRecordSep = "\n"
p.subscriptSep = "\x1c"
p.inputStreams = make(map[string]io.ReadCloser)
p.outputStreams = make(map[string]io.WriteCloser)
p.commands = make(map[string]*exec.Cmd)
p.scanners = make(map[string]*bufio.Scanner)
return p
}
func (p *interp) setExecuteConfig(config *Config) error {
if config == nil {
config = &Config{}
}
if len(config.Vars)%2 != 0 {
return newError("length of config.Vars must be a multiple of 2, not %d", len(config.Vars))
}
if len(config.Environ)%2 != 0 {
return newError("length of config.Environ must be a multiple of 2, not %d", len(config.Environ))
}
// Set up I/O mode config (Vars will override)
p.inputMode = config.InputMode
p.csvInputConfig = config.CSVInput
switch p.inputMode {
case CSVMode:
if p.csvInputConfig.Separator == 0 {
p.csvInputConfig.Separator = ','
}
case TSVMode:
if p.csvInputConfig.Separator == 0 {
p.csvInputConfig.Separator = '\t'
}
case DefaultMode:
if p.csvInputConfig != (CSVInputConfig{}) {
return newError("input mode configuration not valid in default input mode")
}
}
p.outputMode = config.OutputMode
p.csvOutputConfig = config.CSVOutput
switch p.outputMode {
case CSVMode:
if p.csvOutputConfig.Separator == 0 {
p.csvOutputConfig.Separator = ','
}
case TSVMode:
if p.csvOutputConfig.Separator == 0 {
p.csvOutputConfig.Separator = '\t'
}
case DefaultMode:
if p.csvOutputConfig != (CSVOutputConfig{}) {
return newError("output mode configuration not valid in default output mode")
}
}
// Set up ARGV and other variables from config
argvIndex := p.program.Arrays["ARGV"]
p.setArrayValue(ast.ScopeGlobal, argvIndex, "0", str(config.Argv0))
p.argc = len(config.Args) + 1
for i, arg := range config.Args {
p.setArrayValue(ast.ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg))
}
p.noArgVars = config.NoArgVars
p.filenameIndex = 1
p.hadFiles = false
for i := 0; i < len(config.Vars); i += 2 {
err := p.setVarByName(config.Vars[i], config.Vars[i+1])
if err != nil {
return err
}
}
// After Vars has been handled, validate CSV configuration.
err := validateCSVInputConfig(p.inputMode, p.csvInputConfig)
if err != nil {
return err
}
err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig)
if err != nil {
return err
}
// Set up ENVIRON from config or environment variables
environIndex := p.program.Arrays["ENVIRON"]
if config.Environ != nil {
for i := 0; i < len(config.Environ); i += 2 {
p.setArrayValue(ast.ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1]))
}
} else {
for _, kv := range os.Environ() {
eq := strings.IndexByte(kv, '=')
if eq >= 0 {
p.setArrayValue(ast.ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:]))
}
}
}
// Set up system shell command
if len(config.ShellCommand) != 0 {
p.shellCommand = config.ShellCommand
} else {
p.shellCommand = defaultShellCommand
}
// Set up I/O structures
p.noExec = config.NoExec
p.noFileWrites = config.NoFileWrites
p.noFileReads = config.NoFileReads
p.stdin = config.Stdin
if p.stdin == nil {
p.stdin = os.Stdin
}
p.output = config.Output
if p.output == nil {
p.output = bufio.NewWriterSize(os.Stdout, outputBufSize)
}
p.errorOutput = config.Error
if p.errorOutput == nil {
p.errorOutput = os.Stderr
}
// Initialize native Go functions
if p.nativeFuncs == nil {
err := p.initNativeFuncs(config.Funcs)
if err != nil {
return err
}
}
return nil
}
func validateCSVInputConfig(mode IOMode, config CSVInputConfig) error {
if mode != CSVMode && mode != TSVMode {
return nil
}
if config.Separator == config.Comment || !validCSVSeparator(config.Separator) ||
config.Comment != 0 && !validCSVSeparator(config.Comment) {
return errCSVSeparator
}
return nil
}
func validateCSVOutputConfig(mode IOMode, config CSVOutputConfig) error {
if mode != CSVMode && mode != TSVMode {
return nil
}
if !validCSVSeparator(config.Separator) {
return errCSVSeparator
}
return nil
}
func validCSVSeparator(r rune) bool {
return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
}
func (p *interp) executeAll() (int, error) {
defer p.closeAll()
// Execute the program: BEGIN, then pattern/actions, then END
err := p.execute(p.program.Compiled.Begin)
if err != nil && err != errExit {
if p.checkCtx {
ctxErr := p.checkContextNow()
if ctxErr != nil {
return 0, ctxErr
}
}
return 0, err
}
if p.program.Actions == nil && p.program.End == nil {
return p.exitStatus, nil // only BEGIN specified, don't process input
}
if err != errExit {
err = p.execActions(p.program.Compiled.Actions)
if err != nil && err != errExit {
if p.checkCtx {
ctxErr := p.checkContextNow()
if ctxErr != nil {
return 0, ctxErr
}
}
return 0, err
}
}
err = p.execute(p.program.Compiled.End)
if err != nil && err != errExit {
if p.checkCtx {
ctxErr := p.checkContextNow()
if ctxErr != nil {
return 0, ctxErr
}
}
return 0, err
}
return p.exitStatus, nil
}
// Exec provides a simple way to parse and execute an AWK program
// with the given field separator. Exec reads input from the given
// reader (nil means use os.Stdin) and writes output to stdout (nil
// means use a buffered version of os.Stdout).
func Exec(source, fieldSep string, input io.Reader, output io.Writer) error {
prog, err := parser.ParseProgram([]byte(source), nil)
if err != nil {
return err
}
config := &Config{
Stdin: input,
Output: output,
Error: ioutil.Discard,
Vars: []string{"FS", fieldSep},
}
_, err = ExecProgram(prog, config)
return err
}
// Execute pattern-action blocks (may be multiple)
func (p *interp) execActions(actions []compiler.Action) error {
var inRange []bool
lineLoop:
for {
// Read and setup next line of input
line, err := p.nextLine()
if err == io.EOF {
break
}
if err != nil {
return err
}
p.setLine(line, false)
p.reparseCSV = false
// Execute all the pattern-action blocks for each line
for i, action := range actions {
// First determine whether the pattern matches
matched := false
switch len(action.Pattern) {
case 0:
// No pattern is equivalent to pattern evaluating to true
matched = true
case 1:
// Single boolean pattern
err := p.execute(action.Pattern[0])
if err != nil {
return err
}
matched = p.pop().boolean()
case 2:
// Range pattern (matches between start and stop lines)
if inRange == nil {
inRange = make([]bool, len(actions))
}
if !inRange[i] {
err := p.execute(action.Pattern[0])
if err != nil {
return err
}
inRange[i] = p.pop().boolean()
}
matched = inRange[i]
if inRange[i] {
err := p.execute(action.Pattern[1])
if err != nil {
return err
}
inRange[i] = !p.pop().boolean()
}
}
if !matched {
continue
}
// No action is equivalent to { print $0 }
if len(action.Body) == 0 {
err := p.printLine(p.output, p.line)
if err != nil {
return err
}
continue
}
// Execute the body statements
err := p.execute(action.Body)
if err == errNext {
// "next" statement skips straight to next line
continue lineLoop
}
if err != nil {
return err
}
}
}
return nil
}
// Get a special variable by index
func (p *interp) getSpecial(index int) value {
switch index {
case ast.V_NF:
p.ensureFields()
return num(float64(p.numFields))
case ast.V_NR:
return num(float64(p.lineNum))
case ast.V_RLENGTH:
return num(float64(p.matchLength))
case ast.V_RSTART:
return num(float64(p.matchStart))
case ast.V_FNR:
return num(float64(p.fileLineNum))
case ast.V_ARGC:
return num(float64(p.argc))
case ast.V_CONVFMT:
return str(p.convertFormat)
case ast.V_FILENAME:
return p.filename
case ast.V_FS:
return str(p.fieldSep)
case ast.V_OFMT:
return str(p.outputFormat)
case ast.V_OFS:
return str(p.outputFieldSep)
case ast.V_ORS:
return str(p.outputRecordSep)
case ast.V_RS:
return str(p.recordSep)
case ast.V_RT:
return str(p.recordTerminator)
case ast.V_SUBSEP:
return str(p.subscriptSep)
case ast.V_INPUTMODE:
return str(inputModeString(p.inputMode, p.csvInputConfig))
case ast.V_OUTPUTMODE:
return str(outputModeString(p.outputMode, p.csvOutputConfig))
default:
panic(fmt.Sprintf("unexpected special variable index: %d", index))
}
}
// Set a variable by name (specials and globals only)
func (p *interp) setVarByName(name, value string) error {
index := ast.SpecialVarIndex(name)
if index > 0 {
return p.setSpecial(index, numStr(value))
}
index, ok := p.program.Scalars[name]
if ok {
p.globals[index] = numStr(value)
return nil
}
// Ignore variables that aren't defined in program
return nil
}
// Set special variable by index to given value
func (p *interp) setSpecial(index int, v value) error {
switch index {
case ast.V_NF:
numFields := int(v.num())
if numFields < 0 {
return newError("NF set to negative value: %d", numFields)
}
if numFields > maxFieldIndex {
return newError("NF set too large: %d", numFields)
}
p.ensureFields()
p.numFields = numFields
if p.numFields < len(p.fields) {
p.fields = p.fields[:p.numFields]
p.fieldsIsTrueStr = p.fieldsIsTrueStr[:p.numFields]
}
for i := len(p.fields); i < p.numFields; i++ {
p.fields = append(p.fields, "")
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false)
}
p.line = p.joinFields(p.fields)
p.lineIsTrueStr = true
case ast.V_NR:
p.lineNum = int(v.num())
case ast.V_RLENGTH:
p.matchLength = int(v.num())
case ast.V_RSTART:
p.matchStart = int(v.num())
case ast.V_FNR:
p.fileLineNum = int(v.num())
case ast.V_ARGC:
p.argc = int(v.num())
case ast.V_CONVFMT:
p.convertFormat = p.toString(v)
case ast.V_FILENAME:
p.filename = v
case ast.V_FS:
p.fieldSep = p.toString(v)
if utf8.RuneCountInString(p.fieldSep) > 1 { // compare to interp.ensureFields
re, err := regexp.Compile(compiler.AddRegexFlags(p.fieldSep))
if err != nil {
return newError("invalid regex %q: %s", p.fieldSep, err)
}
p.fieldSepRegex = re
}
case ast.V_OFMT:
p.outputFormat = p.toString(v)
case ast.V_OFS:
p.outputFieldSep = p.toString(v)
case ast.V_ORS:
p.outputRecordSep = p.toString(v)
case ast.V_RS:
p.recordSep = p.toString(v)
switch { // compare to interp.newScanner
case len(p.recordSep) <= 1:
// Simple cases use specialized splitters, not regex
case utf8.RuneCountInString(p.recordSep) == 1:
// Multi-byte unicode char falls back to regex splitter
sep := regexp.QuoteMeta(p.recordSep) // not strictly necessary as no multi-byte chars are regex meta chars
p.recordSepRegex = regexp.MustCompile(sep)
default:
re, err := regexp.Compile(compiler.AddRegexFlags(p.recordSep))
if err != nil {
return newError("invalid regex %q: %s", p.recordSep, err)
}
p.recordSepRegex = re
}
case ast.V_RT:
p.recordTerminator = p.toString(v)
case ast.V_SUBSEP:
p.subscriptSep = p.toString(v)
case ast.V_INPUTMODE:
var err error
p.inputMode, p.csvInputConfig, err = parseInputMode(p.toString(v))
if err != nil {
return err
}
err = validateCSVInputConfig(p.inputMode, p.csvInputConfig)
if err != nil {
return err
}
case ast.V_OUTPUTMODE:
var err error
p.outputMode, p.csvOutputConfig, err = parseOutputMode(p.toString(v))
if err != nil {
return err
}
err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig)
if err != nil {
return err
}
default:
panic(fmt.Sprintf("unexpected special variable index: %d", index))
}
return nil
}
// Determine the index of given array into the p.arrays slice. Global
// arrays are just at p.arrays[index], local arrays have to be looked
// up indirectly.
func (p *interp) arrayIndex(scope ast.VarScope, index int) int {
if scope == ast.ScopeGlobal {
return index
} else {
return p.localArrays[len(p.localArrays)-1][index]
}
}
// Return array with given scope and index.
func (p *interp) array(scope ast.VarScope, index int) map[string]value {
return p.arrays[p.arrayIndex(scope, index)]
}
// Return local array with given index.
func (p *interp) localArray(index int) map[string]value {
return p.arrays[p.localArrays[len(p.localArrays)-1][index]]
}
// Set a value in given array by key (index)
func (p *interp) setArrayValue(scope ast.VarScope, arrayIndex int, index string, v value) {
array := p.array(scope, arrayIndex)
array[index] = v
}
// Get the value of given numbered field, equivalent to "$index"
func (p *interp) getField(index int) value {
if index == 0 {
if p.lineIsTrueStr {
return str(p.line)
} else {
return numStr(p.line)
}
}
p.ensureFields()
if index < 1 {
index = len(p.fields) + 1 + index
if index < 1 {
return str("")
}
}
if index > len(p.fields) {
return str("")
}
if p.fieldsIsTrueStr[index-1] {
return str(p.fields[index-1])
} else {
return numStr(p.fields[index-1])
}
}
// Get the value of a field by name (for CSV/TSV mode), as in @"name".
func (p *interp) getFieldByName(name string) (value, error) {
if p.fieldIndexes == nil {
// Lazily create map of field names to indexes.
if p.fieldNames == nil {
return null(), newError(`@ only supported if header parsing enabled; use -H or add "header" to INPUTMODE`)
}
p.fieldIndexes = make(map[string]int, len(p.fieldNames))
for i, n := range p.fieldNames {
p.fieldIndexes[n] = i + 1
}
}
index := p.fieldIndexes[name]
if index == 0 {
return str(""), nil
}
return p.getField(index), nil
}
// Sets a single field, equivalent to "$index = value"
func (p *interp) setField(index int, value string) error {
if index == 0 {
p.setLine(value, true)
return nil
}
if index > maxFieldIndex {
return newError("field index too large: %d", index)
}
// If there aren't enough fields, add empty string fields in between
p.ensureFields()
if index < 1 {
index = len(p.fields) + 1 + index
if index < 1 {
return nil
}
}
for i := len(p.fields); i < index; i++ {
p.fields = append(p.fields, "")
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, true)
}
p.fields[index-1] = value
p.fieldsIsTrueStr[index-1] = true
p.numFields = len(p.fields)
p.line = p.joinFields(p.fields)
p.lineIsTrueStr = true
return nil
}
func (p *interp) joinFields(fields []string) string {
switch p.outputMode {
case CSVMode, TSVMode:
p.csvJoinFieldsBuf.Reset()
_ = p.writeCSV(&p.csvJoinFieldsBuf, fields)
line := p.csvJoinFieldsBuf.Bytes()
line = line[:len(line)-lenNewline(line)]
return string(line)
default:
return strings.Join(fields, p.outputFieldSep)
}
}
// Convert value to string using current CONVFMT
func (p *interp) toString(v value) string {
return v.str(p.convertFormat)
}
// Compile regex string (or fetch from regex cache)
func (p *interp) compileRegex(regex string) (*regexp.Regexp, error) {
if re, ok := p.regexCache[regex]; ok {
return re, nil
}
re, err := regexp.Compile(compiler.AddRegexFlags(regex))
if err != nil {
return nil, newError("invalid regex %q: %s", regex, err)
}
// Dumb, non-LRU cache: just cache the first N regexes
if len(p.regexCache) < maxCachedRegexes {
p.regexCache[regex] = re
}
return re, nil
}
func getDefaultShellCommand() []string {
executable := "/bin/sh"
if runtime.GOOS == "windows" {
executable = "sh"
}
return []string{executable, "-c"}
}
func inputModeString(mode IOMode, csvConfig CSVInputConfig) string {
var s string
var defaultSep rune
switch mode {
case CSVMode:
s = "csv"
defaultSep = ','
case TSVMode:
s = "tsv"
defaultSep = '\t'
case DefaultMode:
return ""
}
if csvConfig.Separator != defaultSep {
s += " separator=" + string([]rune{csvConfig.Separator})
}
if csvConfig.Comment != 0 {
s += " comment=" + string([]rune{csvConfig.Comment})
}
if csvConfig.Header {
s += " header"
}
return s
}
func parseInputMode(s string) (mode IOMode, csvConfig CSVInputConfig, err error) {
fields := strings.Fields(s)
if len(fields) == 0 {
return DefaultMode, CSVInputConfig{}, nil
}
switch fields[0] {
case "csv":
mode = CSVMode
csvConfig.Separator = ','
case "tsv":
mode = TSVMode
csvConfig.Separator = '\t'
default:
return DefaultMode, CSVInputConfig{}, newError("invalid input mode %q", fields[0])
}
for _, field := range fields[1:] {
key := field
val := ""
equals := strings.IndexByte(field, '=')
if equals >= 0 {
key = field[:equals]
val = field[equals+1:]
}
switch key {
case "separator":
r, n := utf8.DecodeRuneInString(val)
if n == 0 || n < len(val) {
return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV separator %q", val)
}
csvConfig.Separator = r
case "comment":
r, n := utf8.DecodeRuneInString(val)
if n == 0 || n < len(val) {
return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV comment character %q", val)
}
csvConfig.Comment = r
case "header":
if val != "" && val != "true" && val != "false" {
return DefaultMode, CSVInputConfig{}, newError("invalid header value %q", val)
}
csvConfig.Header = val == "" || val == "true"
default:
return DefaultMode, CSVInputConfig{}, newError("invalid input mode key %q", key)
}
}
return mode, csvConfig, nil
}
func outputModeString(mode IOMode, csvConfig CSVOutputConfig) string {
var s string
var defaultSep rune
switch mode {
case CSVMode:
s = "csv"
defaultSep = ','
case TSVMode:
s = "tsv"
defaultSep = '\t'
case DefaultMode:
return ""
}
if csvConfig.Separator != defaultSep {
s += " separator=" + string([]rune{csvConfig.Separator})
}
return s
}
func parseOutputMode(s string) (mode IOMode, csvConfig CSVOutputConfig, err error) {
fields := strings.Fields(s)
if len(fields) == 0 {
return DefaultMode, CSVOutputConfig{}, nil
}
switch fields[0] {
case "csv":
mode = CSVMode
csvConfig.Separator = ','
case "tsv":
mode = TSVMode
csvConfig.Separator = '\t'
default:
return DefaultMode, CSVOutputConfig{}, newError("invalid output mode %q", fields[0])
}
for _, field := range fields[1:] {
key := field
val := ""
equals := strings.IndexByte(field, '=')
if equals >= 0 {
key = field[:equals]
val = field[equals+1:]
}
switch key {
case "separator":
r, n := utf8.DecodeRuneInString(val)
if n == 0 || n < len(val) {
return DefaultMode, CSVOutputConfig{}, newError("invalid CSV/TSV separator %q", val)
}
csvConfig.Separator = r
default:
return DefaultMode, CSVOutputConfig{}, newError("invalid output mode key %q", key)
}
}
return mode, csvConfig, nil
}