diff --git a/play/Containerfile b/play/Containerfile new file mode 100644 index 0000000..ada4f9c --- /dev/null +++ b/play/Containerfile @@ -0,0 +1,16 @@ +FROM docker.io/golang:1.19 AS builder +WORKDIR /src + +COPY play/ /src/ +RUN go build -o /play play.go + +FROM docker.io/debian:stable-slim +WORKDIR /play + +COPY --from=builder /play . +COPY grawkit . + +USER nobody +EXPOSE 8080 + +CMD ["/play/play", "-script-path", "/play/grawkit", "-listen-address", ":8080"] diff --git a/play/Dockerfile b/play/Dockerfile deleted file mode 100644 index f013797..0000000 --- a/play/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM golang:1.13 AS builder -WORKDIR /mnt - -COPY play/go.mod play/go.sum play/play.go /mnt/ -RUN go build -o play play.go - -FROM debian:stable-slim -WORKDIR /play - -COPY --from=builder /mnt/play . -COPY grawkit . -COPY play/static static - -USER nobody -EXPOSE 8080 - -CMD ["/play/play", "-script-path", "grawkit", "-listen-address", ":8080"] diff --git a/play/go.mod b/play/go.mod index 7047bb0..ebb0ecb 100644 --- a/play/go.mod +++ b/play/go.mod @@ -1,5 +1,5 @@ module github.com/deuill/grawkit/play -go 1.17 +go 1.19 -require github.com/benhoyt/goawk v1.13.0 +require github.com/benhoyt/goawk v1.20.0 diff --git a/play/go.sum b/play/go.sum index 2a43f42..fdc853e 100644 --- a/play/go.sum +++ b/play/go.sum @@ -1,2 +1,2 @@ -github.com/benhoyt/goawk v1.13.0 h1:/Iu42ErHsT5vHrpWyewpI98hB2PHBk66o+oLZs4drPs= -github.com/benhoyt/goawk v1.13.0/go.mod h1:UKzPyqDh9O7HZ/ftnU33MYlAP2rPbXdwQ+OVlEOPsjM= +github.com/benhoyt/goawk v1.20.0 h1:oz81agTfP/8Z7afMvmOwX4Ms9qTtGhZxPEzHCycIFds= +github.com/benhoyt/goawk v1.20.0/go.mod h1:Dp3jBsApuiItYR9atsCm//q/70OnqjihLh5WkU6eW7U= diff --git a/play/play.go b/play/play.go index 3355996..d38ccc0 100644 --- a/play/play.go +++ b/play/play.go @@ -5,7 +5,6 @@ import ( "bytes" "errors" "flag" - "io/ioutil" "log" "net" "net/http" @@ -17,6 +16,9 @@ import ( "text/template" "time" + // Internal packages. + "github.com/deuill/grawkit/play/static" + // Third-party packages "github.com/benhoyt/goawk/interp" "github.com/benhoyt/goawk/parser" @@ -33,21 +35,13 @@ const ( ) var ( - // Command-line flags to parse. scriptPath = flag.String("script-path", "../grawkit", "The path to the Grawkit script") - staticDir = flag.String("static-dir", "static", "The directory under which static files can be found") listenAddress = flag.String("listen-address", "localhost:8080", "The default address to listen on") index *template.Template // The base template to render. program *parser.Program // The parsed version of the Grawkit script. ) -type templateData struct { - Content string - Preview string - Error string -} - // ParseContent accepts un-filtered POST form content, and returns the content to render as a string. // An error is returned if the content is missing or otherwise invalid. func parseContent(form url.Values) (string, error) { @@ -70,7 +64,11 @@ func parseContent(form url.Values) (string, error) { func handleRequest(w http.ResponseWriter, r *http.Request) { // Handle template rendering on root path. if r.URL.Path == "/" { - var data templateData + var data struct { + Content string + Preview string + Error string + } var outbuf, errbuf bytes.Buffer switch r.Method { @@ -117,22 +115,8 @@ func handleRequest(w http.ResponseWriter, r *http.Request) { return } - // Get sanitized filename for request path given. - name := path.Join(*staticDir, path.Clean(r.URL.Path)) - - // Check if a file exists for the path requested. - stat, err := os.Stat(name) - if os.IsNotExist(err) || stat != nil && stat.IsDir() { - http.NotFound(w, r) - return - } else if err != nil { - code := http.StatusInternalServerError - http.Error(w, http.StatusText(code), code) - return - } - // Serve file as fallback. - http.ServeFile(w, r, name) + http.FileServer(http.FS(static.FS)).ServeHTTP(w, r) } // Setup reads configuration flags and initializes global state for the service, returning an error @@ -144,17 +128,17 @@ func setup() error { // Set up and parse known template files. var err error var files = []string{ - path.Join(*staticDir, "template", "index.template"), - path.Join(*staticDir, "template", "default-content.template"), - path.Join(*staticDir, "template", "default-preview.template"), + path.Join("template", "index.template"), + path.Join("template", "default-content.template"), + path.Join("template", "default-preview.template"), } - if index, err = template.ParseFiles(files...); err != nil { + if index, err = template.ParseFS(static.FS, files...); err != nil { return err } // Parse Grawkit script into concrete representation. - if script, err := ioutil.ReadFile(*scriptPath); err != nil { + if script, err := os.ReadFile(*scriptPath); err != nil { return err } else if program, err = parser.ParseProgram(script, nil); err != nil { return err diff --git a/play/static/css/main.css b/play/static/css/main.css index d10684b..d514682 100644 --- a/play/static/css/main.css +++ b/play/static/css/main.css @@ -51,10 +51,8 @@ a:hover, a:active { border: 0.2rem solid #333; border-radius: 0; color: #fefefe; - font-family: monospace; font-weight: bold; padding: 0 1rem; - text-transform: none; transition: background 0.2s ease, border 0.2s ease, color 0.2s ease; } diff --git a/play/static/static.go b/play/static/static.go new file mode 100644 index 0000000..2b7f342 --- /dev/null +++ b/play/static/static.go @@ -0,0 +1,9 @@ +package static + +import "embed" + +// FS is an [fs.FS] implementation containing all static files needed for serving the Grawkit +// playground. +// +//go:embed * +var FS embed.FS diff --git a/play/vendor/github.com/benhoyt/goawk/LICENSE.txt b/play/vendor/github.com/benhoyt/goawk/LICENSE.txt index 620ad38..e39bc70 100644 --- a/play/vendor/github.com/benhoyt/goawk/LICENSE.txt +++ b/play/vendor/github.com/benhoyt/goawk/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2019 Ben Hoyt +Copyright (c) 2022 Ben Hoyt Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go b/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go index 1007d44..8232765 100644 --- a/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/ast/ast.go @@ -10,6 +10,35 @@ import ( . "github.com/benhoyt/goawk/lexer" ) +// Program is an entire AWK program. +type Program struct { + Begin []Stmts + Actions []Action + End []Stmts + Functions []Function + Scalars map[string]int + Arrays map[string]int +} + +// String returns an indented, pretty-printed version of the parsed +// program. +func (p *Program) String() string { + parts := []string{} + for _, ss := range p.Begin { + parts = append(parts, "BEGIN {\n"+ss.String()+"}") + } + for _, a := range p.Actions { + parts = append(parts, a.String()) + } + for _, ss := range p.End { + parts = append(parts, "END {\n"+ss.String()+"}") + } + for _, function := range p.Functions { + parts = append(parts, function.String()) + } + return strings.Join(parts, "\n\n") +} + // Stmts is a block containing multiple statements. type Stmts []Stmt @@ -53,24 +82,25 @@ type Expr interface { } // All these types implement the Expr interface. -func (e *FieldExpr) expr() {} -func (e *UnaryExpr) expr() {} -func (e *BinaryExpr) expr() {} -func (e *ArrayExpr) expr() {} -func (e *InExpr) expr() {} -func (e *CondExpr) expr() {} -func (e *NumExpr) expr() {} -func (e *StrExpr) expr() {} -func (e *RegExpr) expr() {} -func (e *VarExpr) expr() {} -func (e *IndexExpr) expr() {} -func (e *AssignExpr) expr() {} -func (e *AugAssignExpr) expr() {} -func (e *IncrExpr) expr() {} -func (e *CallExpr) expr() {} -func (e *UserCallExpr) expr() {} -func (e *MultiExpr) expr() {} -func (e *GetlineExpr) expr() {} +func (e *FieldExpr) expr() {} +func (e *NamedFieldExpr) expr() {} +func (e *UnaryExpr) expr() {} +func (e *BinaryExpr) expr() {} +func (e *ArrayExpr) expr() {} +func (e *InExpr) expr() {} +func (e *CondExpr) expr() {} +func (e *NumExpr) expr() {} +func (e *StrExpr) expr() {} +func (e *RegExpr) expr() {} +func (e *VarExpr) expr() {} +func (e *IndexExpr) expr() {} +func (e *AssignExpr) expr() {} +func (e *AugAssignExpr) expr() {} +func (e *IncrExpr) expr() {} +func (e *CallExpr) expr() {} +func (e *UserCallExpr) expr() {} +func (e *MultiExpr) expr() {} +func (e *GetlineExpr) expr() {} // FieldExpr is an expression like $0. type FieldExpr struct { @@ -81,6 +111,15 @@ func (e *FieldExpr) String() string { return "$" + e.Index.String() } +// NamedFieldExpr is an expression like @"name". +type NamedFieldExpr struct { + Field Expr +} + +func (e *NamedFieldExpr) String() string { + return "@" + e.Field.String() +} + // UnaryExpr is an expression like -1234. type UnaryExpr struct { Op Token @@ -155,7 +194,11 @@ type NumExpr struct { } func (e *NumExpr) String() string { - return fmt.Sprintf("%.6g", e.Value) + if e.Value == float64(int(e.Value)) { + return strconv.Itoa(int(e.Value)) + } else { + return fmt.Sprintf("%.6g", e.Value) + } } // StrExpr is a literal string like "foo". diff --git a/play/vendor/github.com/benhoyt/goawk/internal/ast/specialvars.go b/play/vendor/github.com/benhoyt/goawk/internal/ast/specialvars.go index 1ea06e4..c8207e4 100644 --- a/play/vendor/github.com/benhoyt/goawk/internal/ast/specialvars.go +++ b/play/vendor/github.com/benhoyt/goawk/internal/ast/specialvars.go @@ -2,6 +2,10 @@ package ast +import ( + "fmt" +) + const ( V_ILLEGAL = iota V_ARGC @@ -9,11 +13,13 @@ const ( V_FILENAME V_FNR V_FS + V_INPUTMODE V_NF V_NR V_OFMT V_OFS V_ORS + V_OUTPUTMODE V_RLENGTH V_RS V_RSTART @@ -24,21 +30,23 @@ const ( ) var specialVars = map[string]int{ - "ARGC": V_ARGC, - "CONVFMT": V_CONVFMT, - "FILENAME": V_FILENAME, - "FNR": V_FNR, - "FS": V_FS, - "NF": V_NF, - "NR": V_NR, - "OFMT": V_OFMT, - "OFS": V_OFS, - "ORS": V_ORS, - "RLENGTH": V_RLENGTH, - "RS": V_RS, - "RSTART": V_RSTART, - "RT": V_RT, - "SUBSEP": V_SUBSEP, + "ARGC": V_ARGC, + "CONVFMT": V_CONVFMT, + "FILENAME": V_FILENAME, + "FNR": V_FNR, + "FS": V_FS, + "INPUTMODE": V_INPUTMODE, + "NF": V_NF, + "NR": V_NR, + "OFMT": V_OFMT, + "OFS": V_OFS, + "ORS": V_ORS, + "OUTPUTMODE": V_OUTPUTMODE, + "RLENGTH": V_RLENGTH, + "RS": V_RS, + "RSTART": V_RSTART, + "RT": V_RT, + "SUBSEP": V_SUBSEP, } // SpecialVarIndex returns the "index" of the special variable, or 0 @@ -46,3 +54,47 @@ var specialVars = map[string]int{ func SpecialVarIndex(name string) int { return specialVars[name] } + +// SpecialVarName returns the name of the special variable by index. +func SpecialVarName(index int) string { + switch index { + case V_ILLEGAL: + return "ILLEGAL" + case V_ARGC: + return "ARGC" + case V_CONVFMT: + return "CONVFMT" + case V_FILENAME: + return "FILENAME" + case V_FNR: + return "FNR" + case V_FS: + return "FS" + case V_INPUTMODE: + return "INPUTMODE" + case V_NF: + return "NF" + case V_NR: + return "NR" + case V_OFMT: + return "OFMT" + case V_OFS: + return "OFS" + case V_ORS: + return "ORS" + case V_OUTPUTMODE: + return "OUTPUTMODE" + case V_RLENGTH: + return "RLENGTH" + case V_RS: + return "RS" + case V_RSTART: + return "RSTART" + case V_RT: + return "RT" + case V_SUBSEP: + return "SUBSEP" + default: + return fmt.Sprintf("", index) + } +} diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/compiler.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/compiler.go new file mode 100644 index 0000000..40b2f89 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/compiler.go @@ -0,0 +1,1005 @@ +// Package compiler compiles an AST to virtual machine instructions. +package compiler + +import ( + "fmt" + "math" + "regexp" + + "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/lexer" +) + +// Program holds an entire compiled program. +type Program struct { + Begin []Opcode + Actions []Action + End []Opcode + Functions []Function + Nums []float64 + Strs []string + Regexes []*regexp.Regexp + + // For disassembly + scalarNames []string + arrayNames []string + nativeFuncNames []string +} + +// Action holds a compiled pattern-action block. +type Action struct { + Pattern [][]Opcode + Body []Opcode +} + +// Function holds a compiled function. +type Function struct { + Name string + Params []string + Arrays []bool + NumScalars int + NumArrays int + Body []Opcode +} + +// compileError is the internal error type raised in the rare cases when +// compilation can't succeed, such as program too large (jump offsets greater +// than 2GB). Most actual problems are caught as parse time. +type compileError struct { + message string +} + +func (e *compileError) Error() string { + return e.message +} + +// Compile compiles an AST (parsed program) into virtual machine instructions. +func Compile(prog *ast.Program) (compiledProg *Program, err error) { + defer func() { + // The compiler uses panic with a *compileError to signal compile + // errors internally, and they're caught here. This avoids the + // need to check errors everywhere. + if r := recover(); r != nil { + // Convert to compileError or re-panic + err = r.(*compileError) + } + }() + + p := &Program{} + + // Reuse identical constants across entire program. + indexes := constantIndexes{ + nums: make(map[float64]int), + strs: make(map[string]int), + regexes: make(map[string]int), + } + + // Compile functions. For functions called before they're defined or + // recursive functions, we have to set most p.Functions data first, then + // compile Body afterward. + p.Functions = make([]Function, len(prog.Functions)) + for i, astFunc := range prog.Functions { + numArrays := 0 + for _, a := range astFunc.Arrays { + if a { + numArrays++ + } + } + compiledFunc := Function{ + Name: astFunc.Name, + Params: astFunc.Params, + Arrays: astFunc.Arrays, + NumScalars: len(astFunc.Arrays) - numArrays, + NumArrays: numArrays, + } + p.Functions[i] = compiledFunc + } + for i, astFunc := range prog.Functions { + c := &compiler{program: p, indexes: indexes} + c.stmts(astFunc.Body) + p.Functions[i].Body = c.finish() + } + + // Compile BEGIN blocks. + for _, stmts := range prog.Begin { + c := &compiler{program: p, indexes: indexes} + c.stmts(stmts) + p.Begin = append(p.Begin, c.finish()...) + } + + // Compile pattern-action blocks. + for _, action := range prog.Actions { + var pattern [][]Opcode + switch len(action.Pattern) { + case 0: + // Always considered a match + case 1: + c := &compiler{program: p, indexes: indexes} + c.expr(action.Pattern[0]) + pattern = [][]Opcode{c.finish()} + case 2: + c := &compiler{program: p, indexes: indexes} + c.expr(action.Pattern[0]) + pattern = append(pattern, c.finish()) + c = &compiler{program: p, indexes: indexes} + c.expr(action.Pattern[1]) + pattern = append(pattern, c.finish()) + } + var body []Opcode + if len(action.Stmts) > 0 { + c := &compiler{program: p, indexes: indexes} + c.stmts(action.Stmts) + body = c.finish() + } + p.Actions = append(p.Actions, Action{ + Pattern: pattern, + Body: body, + }) + } + + // Compile END blocks. + for _, stmts := range prog.End { + c := &compiler{program: p, indexes: indexes} + c.stmts(stmts) + p.End = append(p.End, c.finish()...) + } + + // These are only used for disassembly, but set them up here. + p.scalarNames = make([]string, len(prog.Scalars)) + for name, index := range prog.Scalars { + p.scalarNames[index] = name + } + p.arrayNames = make([]string, len(prog.Arrays)) + for name, index := range prog.Arrays { + p.arrayNames[index] = name + } + + return p, nil +} + +// So we can look up the indexes of constants that have been used before. +type constantIndexes struct { + nums map[float64]int + strs map[string]int + regexes map[string]int +} + +// Holds the compilation state. +type compiler struct { + program *Program + indexes constantIndexes + code []Opcode + breaks [][]int + continues [][]int +} + +func (c *compiler) add(ops ...Opcode) { + c.code = append(c.code, ops...) +} + +func (c *compiler) finish() []Opcode { + return c.code +} + +func (c *compiler) stmts(stmts []ast.Stmt) { + for _, stmt := range stmts { + c.stmt(stmt) + } +} + +func (c *compiler) stmt(stmt ast.Stmt) { + switch s := stmt.(type) { + case *ast.ExprStmt: + // Optimize assignment expressions to avoid the extra Dupe and Drop + switch expr := s.Expr.(type) { + case *ast.AssignExpr: + c.expr(expr.Right) + c.assign(expr.Left) + return + + case *ast.IncrExpr: + // Pre or post doesn't matter for an assignment expression + switch target := expr.Expr.(type) { + case *ast.VarExpr: + switch target.Scope { + case ast.ScopeGlobal: + c.add(IncrGlobal, incrAmount(expr.Op), opcodeInt(target.Index)) + case ast.ScopeLocal: + c.add(IncrLocal, incrAmount(expr.Op), opcodeInt(target.Index)) + default: // ScopeSpecial + c.add(IncrSpecial, incrAmount(expr.Op), opcodeInt(target.Index)) + } + case *ast.FieldExpr: + c.expr(target.Index) + c.add(IncrField, incrAmount(expr.Op)) + case *ast.IndexExpr: + c.index(target.Index) + switch target.Array.Scope { + case ast.ScopeGlobal: + c.add(IncrArrayGlobal, incrAmount(expr.Op), opcodeInt(target.Array.Index)) + default: // ScopeLocal + c.add(IncrArrayLocal, incrAmount(expr.Op), opcodeInt(target.Array.Index)) + } + } + return + + case *ast.AugAssignExpr: + c.expr(expr.Right) + + var augOp AugOp + switch expr.Op { + case lexer.ADD: + augOp = AugOpAdd + case lexer.SUB: + augOp = AugOpSub + case lexer.MUL: + augOp = AugOpMul + case lexer.DIV: + augOp = AugOpDiv + case lexer.POW: + augOp = AugOpPow + default: // MOD + augOp = AugOpMod + } + + switch target := expr.Left.(type) { + case *ast.VarExpr: + switch target.Scope { + case ast.ScopeGlobal: + c.add(AugAssignGlobal, Opcode(augOp), opcodeInt(target.Index)) + case ast.ScopeLocal: + c.add(AugAssignLocal, Opcode(augOp), opcodeInt(target.Index)) + default: // ScopeSpecial + c.add(AugAssignSpecial, Opcode(augOp), opcodeInt(target.Index)) + } + case *ast.FieldExpr: + c.expr(target.Index) + c.add(AugAssignField, Opcode(augOp)) + case *ast.IndexExpr: + c.index(target.Index) + switch target.Array.Scope { + case ast.ScopeGlobal: + c.add(AugAssignArrayGlobal, Opcode(augOp), opcodeInt(target.Array.Index)) + default: // ScopeLocal + c.add(AugAssignArrayLocal, Opcode(augOp), opcodeInt(target.Array.Index)) + } + } + return + } + + // Non-optimized ExprStmt: push value and then drop it + c.expr(s.Expr) + c.add(Drop) + + case *ast.PrintStmt: + if s.Redirect != lexer.ILLEGAL { + c.expr(s.Dest) // redirect destination + } + for _, a := range s.Args { + c.expr(a) + } + c.add(Print, opcodeInt(len(s.Args)), Opcode(s.Redirect)) + + case *ast.PrintfStmt: + if s.Redirect != lexer.ILLEGAL { + c.expr(s.Dest) // redirect destination + } + for _, a := range s.Args { + c.expr(a) + } + c.add(Printf, opcodeInt(len(s.Args)), Opcode(s.Redirect)) + + case *ast.IfStmt: + if len(s.Else) == 0 { + jumpOp := c.condition(s.Cond, true) + ifMark := c.jumpForward(jumpOp) + c.stmts(s.Body) + c.patchForward(ifMark) + } else { + jumpOp := c.condition(s.Cond, true) + ifMark := c.jumpForward(jumpOp) + c.stmts(s.Body) + elseMark := c.jumpForward(Jump) + c.patchForward(ifMark) + c.stmts(s.Else) + c.patchForward(elseMark) + } + + case *ast.ForStmt: + if s.Pre != nil { + c.stmt(s.Pre) + } + c.breaks = append(c.breaks, []int{}) + c.continues = append(c.continues, []int{}) + + // Optimization: include condition once before loop and at the end. + // This avoids one jump (a conditional jump at the top and an + // unconditional one at the end). This idea was stolen from an + // optimization CPython did recently in its "while" loop. + var mark int + if s.Cond != nil { + jumpOp := c.condition(s.Cond, true) + mark = c.jumpForward(jumpOp) + } + + loopStart := c.labelBackward() + c.stmts(s.Body) + c.patchContinues() + if s.Post != nil { + c.stmt(s.Post) + } + + if s.Cond != nil { + jumpOp := c.condition(s.Cond, false) + c.jumpBackward(loopStart, jumpOp) + c.patchForward(mark) + } else { + c.jumpBackward(loopStart, Jump) + } + + c.patchBreaks() + + case *ast.ForInStmt: + // ForIn is handled a bit differently from the other loops, because we + // want to use Go's "for range" construct directly in the interpreter. + // Otherwise we'd need to build a slice of all keys rather than + // iterating, or write our own hash table that has a more flexible + // iterator. + mark := c.jumpForward(ForIn, opcodeInt(int(s.Var.Scope)), opcodeInt(s.Var.Index), + Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) + + c.breaks = append(c.breaks, nil) // nil tells BreakStmt it's a for-in loop + c.continues = append(c.continues, []int{}) + + c.stmts(s.Body) + + c.patchForward(mark) + c.patchContinues() + c.breaks = c.breaks[:len(c.breaks)-1] + + case *ast.ReturnStmt: + if s.Value != nil { + c.expr(s.Value) + c.add(Return) + } else { + c.add(ReturnNull) + } + + case *ast.WhileStmt: + c.breaks = append(c.breaks, []int{}) + c.continues = append(c.continues, []int{}) + + // Optimization: include condition once before loop and at the end. + // See ForStmt for more details. + jumpOp := c.condition(s.Cond, true) + mark := c.jumpForward(jumpOp) + + loopStart := c.labelBackward() + c.stmts(s.Body) + c.patchContinues() + + jumpOp = c.condition(s.Cond, false) + c.jumpBackward(loopStart, jumpOp) + c.patchForward(mark) + + c.patchBreaks() + + case *ast.DoWhileStmt: + c.breaks = append(c.breaks, []int{}) + c.continues = append(c.continues, []int{}) + + loopStart := c.labelBackward() + c.stmts(s.Body) + c.patchContinues() + + jumpOp := c.condition(s.Cond, false) + c.jumpBackward(loopStart, jumpOp) + + c.patchBreaks() + + case *ast.BreakStmt: + i := len(c.breaks) - 1 + if c.breaks[i] == nil { + // Break in for-in loop is executed differently, use errBreak to exit + c.add(BreakForIn) + } else { + mark := c.jumpForward(Jump) + c.breaks[i] = append(c.breaks[i], mark) + } + + case *ast.ContinueStmt: + i := len(c.continues) - 1 + mark := c.jumpForward(Jump) + c.continues[i] = append(c.continues[i], mark) + + case *ast.NextStmt: + c.add(Next) + + case *ast.ExitStmt: + if s.Status != nil { + c.expr(s.Status) + } else { + c.expr(&ast.NumExpr{0}) + } + c.add(Exit) + + case *ast.DeleteStmt: + if len(s.Index) > 0 { + c.index(s.Index) + c.add(Delete, Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) + } else { + c.add(DeleteAll, Opcode(s.Array.Scope), opcodeInt(s.Array.Index)) + } + + case *ast.BlockStmt: + c.stmts(s.Body) + + default: + // Should never happen + panic(fmt.Sprintf("unexpected stmt type: %T", stmt)) + } +} + +// Return the amount (+1 or -1) to add for an increment expression. +func incrAmount(op lexer.Token) Opcode { + if op == lexer.INCR { + return 1 + } else { + return -1 // DECR + } +} + +// Generate opcodes for an assignment. +func (c *compiler) assign(target ast.Expr) { + switch target := target.(type) { + case *ast.VarExpr: + switch target.Scope { + case ast.ScopeGlobal: + c.add(AssignGlobal, opcodeInt(target.Index)) + case ast.ScopeLocal: + c.add(AssignLocal, opcodeInt(target.Index)) + case ast.ScopeSpecial: + c.add(AssignSpecial, opcodeInt(target.Index)) + } + case *ast.FieldExpr: + c.expr(target.Index) + c.add(AssignField) + case *ast.IndexExpr: + c.index(target.Index) + switch target.Array.Scope { + case ast.ScopeGlobal: + c.add(AssignArrayGlobal, opcodeInt(target.Array.Index)) + case ast.ScopeLocal: + c.add(AssignArrayLocal, opcodeInt(target.Array.Index)) + } + } +} + +// Convert int to Opcode, raising a *compileError if it doesn't fit. +func opcodeInt(n int) Opcode { + if n > math.MaxInt32 || n < math.MinInt32 { + // Two billion should be enough for anybody. + panic(&compileError{message: fmt.Sprintf("program too large (constant index or jump offset %d doesn't fit in int32)", n)}) + } + return Opcode(n) +} + +// Patch jump addresses for break statements in a loop. +func (c *compiler) patchBreaks() { + breaks := c.breaks[len(c.breaks)-1] + for _, mark := range breaks { + c.patchForward(mark) + } + c.breaks = c.breaks[:len(c.breaks)-1] +} + +// Patch jump addresses for continue statements in a loop +func (c *compiler) patchContinues() { + continues := c.continues[len(c.continues)-1] + for _, mark := range continues { + c.patchForward(mark) + } + c.continues = c.continues[:len(c.continues)-1] +} + +// Generate a forward jump (patched later) and return a "mark". +func (c *compiler) jumpForward(jumpOp Opcode, args ...Opcode) int { + c.add(jumpOp) + c.add(args...) + c.add(0) + return len(c.code) +} + +// Patch a previously-generated forward jump. +func (c *compiler) patchForward(mark int) { + offset := len(c.code) - mark + c.code[mark-1] = opcodeInt(offset) +} + +// Return a "label" for a subsequent backward jump. +func (c *compiler) labelBackward() int { + return len(c.code) +} + +// Jump to a previously-created label. +func (c *compiler) jumpBackward(label int, jumpOp Opcode, args ...Opcode) { + offset := label - (len(c.code) + len(args) + 2) + c.add(jumpOp) + c.add(args...) + c.add(opcodeInt(offset)) +} + +// Generate opcodes for a boolean condition. +func (c *compiler) condition(expr ast.Expr, invert bool) Opcode { + jumpOp := func(normal, inverted Opcode) Opcode { + if invert { + return inverted + } + return normal + } + + switch cond := expr.(type) { + case *ast.BinaryExpr: + // Optimize binary comparison expressions like "x < 10" into just + // JumpLess instead of two instructions (Less and JumpTrue). + switch cond.Op { + case lexer.EQUALS: + c.expr(cond.Left) + c.expr(cond.Right) + return jumpOp(JumpEquals, JumpNotEquals) + + case lexer.NOT_EQUALS: + c.expr(cond.Left) + c.expr(cond.Right) + return jumpOp(JumpNotEquals, JumpEquals) + + case lexer.LESS: + c.expr(cond.Left) + c.expr(cond.Right) + return jumpOp(JumpLess, JumpGreaterOrEqual) + + case lexer.LTE: + c.expr(cond.Left) + c.expr(cond.Right) + return jumpOp(JumpLessOrEqual, JumpGreater) + + case lexer.GREATER: + c.expr(cond.Left) + c.expr(cond.Right) + return jumpOp(JumpGreater, JumpLessOrEqual) + + case lexer.GTE: + c.expr(cond.Left) + c.expr(cond.Right) + return jumpOp(JumpGreaterOrEqual, JumpLess) + } + } + + // Fall back to evaluating the expression normally, followed by JumpTrue + // or JumpFalse. + c.expr(expr) + return jumpOp(JumpTrue, JumpFalse) +} + +func (c *compiler) expr(expr ast.Expr) { + switch e := expr.(type) { + case *ast.NumExpr: + c.add(Num, opcodeInt(c.numIndex(e.Value))) + + case *ast.StrExpr: + c.add(Str, opcodeInt(c.strIndex(e.Value))) + + case *ast.FieldExpr: + switch index := e.Index.(type) { + case *ast.NumExpr: + if index.Value == float64(Opcode(index.Value)) { + // Optimize $i to FieldInt opcode with integer argument + c.add(FieldInt, opcodeInt(int(index.Value))) + return + } + } + c.expr(e.Index) + c.add(Field) + + case *ast.NamedFieldExpr: + switch index := e.Field.(type) { + case *ast.StrExpr: + c.add(FieldByNameStr, opcodeInt(c.strIndex(index.Value))) + return + } + c.expr(e.Field) + c.add(FieldByName) + + case *ast.VarExpr: + switch e.Scope { + case ast.ScopeGlobal: + c.add(Global, opcodeInt(e.Index)) + case ast.ScopeLocal: + c.add(Local, opcodeInt(e.Index)) + case ast.ScopeSpecial: + c.add(Special, opcodeInt(e.Index)) + } + + case *ast.RegExpr: + c.add(Regex, opcodeInt(c.regexIndex(e.Regex))) + + case *ast.BinaryExpr: + // && and || are special cases as they're short-circuit operators. + switch e.Op { + case lexer.AND: + c.expr(e.Left) + c.add(Dupe) + mark := c.jumpForward(JumpFalse) + c.add(Drop) + c.expr(e.Right) + c.patchForward(mark) + c.add(Boolean) + case lexer.OR: + c.expr(e.Left) + c.add(Dupe) + mark := c.jumpForward(JumpTrue) + c.add(Drop) + c.expr(e.Right) + c.patchForward(mark) + c.add(Boolean) + case lexer.CONCAT: + c.concatOp(e) + default: + // All other binary expressions + c.expr(e.Left) + c.expr(e.Right) + c.binaryOp(e.Op) + } + + case *ast.IncrExpr: + // Most IncrExpr (standalone) will be handled by the ExprStmt special case + op := Add + if e.Op == lexer.DECR { + op = Subtract + } + if e.Pre { + c.expr(e.Expr) + c.expr(&ast.NumExpr{1}) + c.add(op) + c.add(Dupe) + } else { + c.expr(e.Expr) + c.expr(&ast.NumExpr{0}) + c.add(Add) + c.add(Dupe) + c.expr(&ast.NumExpr{1}) + c.add(op) + } + c.assign(e.Expr) + + case *ast.AssignExpr: + // Most AssignExpr (standalone) will be handled by the ExprStmt special case + c.expr(e.Right) + c.add(Dupe) + c.assign(e.Left) + + case *ast.AugAssignExpr: + // Most AugAssignExpr (standalone) will be handled by the ExprStmt special case + c.expr(e.Right) + c.expr(e.Left) + c.add(Swap) + c.binaryOp(e.Op) + c.add(Dupe) + c.assign(e.Left) + + case *ast.CondExpr: + jump := c.condition(e.Cond, true) + ifMark := c.jumpForward(jump) + c.expr(e.True) + elseMark := c.jumpForward(Jump) + c.patchForward(ifMark) + c.expr(e.False) + c.patchForward(elseMark) + + case *ast.IndexExpr: + c.index(e.Index) + switch e.Array.Scope { + case ast.ScopeGlobal: + c.add(ArrayGlobal, opcodeInt(e.Array.Index)) + case ast.ScopeLocal: + c.add(ArrayLocal, opcodeInt(e.Array.Index)) + } + + case *ast.CallExpr: + // split and sub/gsub require special cases as they have lvalue arguments + switch e.Func { + case lexer.F_SPLIT: + c.expr(e.Args[0]) + arrayExpr := e.Args[1].(*ast.ArrayExpr) + if len(e.Args) > 2 { + c.expr(e.Args[2]) + c.add(CallSplitSep, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index)) + } else { + c.add(CallSplit, Opcode(arrayExpr.Scope), opcodeInt(arrayExpr.Index)) + } + return + case lexer.F_SUB, lexer.F_GSUB: + op := BuiltinSub + if e.Func == lexer.F_GSUB { + op = BuiltinGsub + } + var target ast.Expr = &ast.FieldExpr{&ast.NumExpr{0}} // default value and target is $0 + if len(e.Args) == 3 { + target = e.Args[2] + } + c.expr(e.Args[0]) + c.expr(e.Args[1]) + c.expr(target) + c.add(CallBuiltin, Opcode(op)) + c.assign(target) + return + } + + for _, arg := range e.Args { + c.expr(arg) + } + switch e.Func { + case lexer.F_ATAN2: + c.add(CallBuiltin, Opcode(BuiltinAtan2)) + case lexer.F_CLOSE: + c.add(CallBuiltin, Opcode(BuiltinClose)) + case lexer.F_COS: + c.add(CallBuiltin, Opcode(BuiltinCos)) + case lexer.F_EXP: + c.add(CallBuiltin, Opcode(BuiltinExp)) + case lexer.F_FFLUSH: + if len(e.Args) > 0 { + c.add(CallBuiltin, Opcode(BuiltinFflush)) + } else { + c.add(CallBuiltin, Opcode(BuiltinFflushAll)) + } + case lexer.F_INDEX: + c.add(CallBuiltin, Opcode(BuiltinIndex)) + case lexer.F_INT: + c.add(CallBuiltin, Opcode(BuiltinInt)) + case lexer.F_LENGTH: + if len(e.Args) > 0 { + c.add(CallBuiltin, Opcode(BuiltinLengthArg)) + } else { + c.add(CallBuiltin, Opcode(BuiltinLength)) + } + case lexer.F_LOG: + c.add(CallBuiltin, Opcode(BuiltinLog)) + case lexer.F_MATCH: + c.add(CallBuiltin, Opcode(BuiltinMatch)) + case lexer.F_RAND: + c.add(CallBuiltin, Opcode(BuiltinRand)) + case lexer.F_SIN: + c.add(CallBuiltin, Opcode(BuiltinSin)) + case lexer.F_SPRINTF: + c.add(CallSprintf, opcodeInt(len(e.Args))) + case lexer.F_SQRT: + c.add(CallBuiltin, Opcode(BuiltinSqrt)) + case lexer.F_SRAND: + if len(e.Args) > 0 { + c.add(CallBuiltin, Opcode(BuiltinSrandSeed)) + } else { + c.add(CallBuiltin, Opcode(BuiltinSrand)) + } + case lexer.F_SUBSTR: + if len(e.Args) > 2 { + c.add(CallBuiltin, Opcode(BuiltinSubstrLength)) + } else { + c.add(CallBuiltin, Opcode(BuiltinSubstr)) + } + case lexer.F_SYSTEM: + c.add(CallBuiltin, Opcode(BuiltinSystem)) + case lexer.F_TOLOWER: + c.add(CallBuiltin, Opcode(BuiltinTolower)) + case lexer.F_TOUPPER: + c.add(CallBuiltin, Opcode(BuiltinToupper)) + default: + panic(fmt.Sprintf("unexpected function: %s", e.Func)) + } + + case *ast.UnaryExpr: + c.expr(e.Value) + switch e.Op { + case lexer.SUB: + c.add(UnaryMinus) + case lexer.NOT: + c.add(Not) + default: // ADD + c.add(UnaryPlus) + } + + case *ast.InExpr: + c.index(e.Index) + switch e.Array.Scope { + case ast.ScopeGlobal: + c.add(InGlobal, opcodeInt(e.Array.Index)) + default: // ScopeLocal + c.add(InLocal, opcodeInt(e.Array.Index)) + } + + case *ast.UserCallExpr: + if e.Native { + for _, arg := range e.Args { + c.expr(arg) + } + c.add(CallNative, opcodeInt(e.Index), opcodeInt(len(e.Args))) + for len(c.program.nativeFuncNames) <= e.Index { + c.program.nativeFuncNames = append(c.program.nativeFuncNames, "") + } + c.program.nativeFuncNames[e.Index] = e.Name + } else { + f := c.program.Functions[e.Index] + var arrayOpcodes []Opcode + numScalarArgs := 0 + for i, arg := range e.Args { + if f.Arrays[i] { + a := arg.(*ast.VarExpr) + arrayOpcodes = append(arrayOpcodes, Opcode(a.Scope), opcodeInt(a.Index)) + } else { + c.expr(arg) + numScalarArgs++ + } + } + if numScalarArgs < f.NumScalars { + c.add(Nulls, opcodeInt(f.NumScalars-numScalarArgs)) + } + c.add(CallUser, opcodeInt(e.Index), opcodeInt(len(arrayOpcodes)/2)) + c.add(arrayOpcodes...) + } + + case *ast.GetlineExpr: + redirect := func() Opcode { + switch { + case e.Command != nil: + c.expr(e.Command) + return Opcode(lexer.PIPE) + case e.File != nil: + c.expr(e.File) + return Opcode(lexer.LESS) + default: + return Opcode(lexer.ILLEGAL) + } + } + switch target := e.Target.(type) { + case *ast.VarExpr: + switch target.Scope { + case ast.ScopeGlobal: + c.add(GetlineGlobal, redirect(), opcodeInt(target.Index)) + case ast.ScopeLocal: + c.add(GetlineLocal, redirect(), opcodeInt(target.Index)) + case ast.ScopeSpecial: + c.add(GetlineSpecial, redirect(), opcodeInt(target.Index)) + } + case *ast.FieldExpr: + c.expr(target.Index) + c.add(GetlineField, redirect()) + case *ast.IndexExpr: + c.index(target.Index) + c.add(GetlineArray, redirect(), Opcode(target.Array.Scope), opcodeInt(target.Array.Index)) + default: + c.add(Getline, redirect()) + } + + default: + // Should never happen + panic(fmt.Sprintf("unexpected expr type: %T", expr)) + } +} + +// Generate a Concat opcode or, if possible, compact multiple Concats into one +// ConcatMulti opcode. +func (c *compiler) concatOp(expr *ast.BinaryExpr) { + var values []ast.Expr + for { + values = append(values, expr.Right) + left, isBinary := expr.Left.(*ast.BinaryExpr) + if !isBinary || left.Op != lexer.CONCAT { + break + } + expr = left + } + values = append(values, expr.Left) + + // values are appended right to left + // but need to pushed left to right + + if len(values) == 2 { + c.expr(values[1]) + c.expr(values[0]) + c.add(Concat) + return + } + + for i := len(values) - 1; i >= 0; i-- { + c.expr(values[i]) + } + + c.add(ConcatMulti, opcodeInt(len(values))) +} + +// Add (or reuse) a number constant and returns its index. +func (c *compiler) numIndex(n float64) int { + if index, ok := c.indexes.nums[n]; ok { + return index // reuse existing constant + } + index := len(c.program.Nums) + c.program.Nums = append(c.program.Nums, n) + c.indexes.nums[n] = index + return index +} + +// Add (or reuse) a string constant and returns its index. +func (c *compiler) strIndex(s string) int { + if index, ok := c.indexes.strs[s]; ok { + return index // reuse existing constant + } + index := len(c.program.Strs) + c.program.Strs = append(c.program.Strs, s) + c.indexes.strs[s] = index + return index +} + +// Add (or reuse) a regex constant and returns its index. +func (c *compiler) regexIndex(r string) int { + if index, ok := c.indexes.regexes[r]; ok { + return index // reuse existing constant + } + index := len(c.program.Regexes) + c.program.Regexes = append(c.program.Regexes, regexp.MustCompile(AddRegexFlags(r))) + c.indexes.regexes[r] = index + return index +} + +// AddRegexFlags add the necessary flags to regex to make it work like other +// AWKs (exported so we can also use this in the interpreter). +func AddRegexFlags(regex string) string { + // "s" flag lets . match \n (multi-line matching like other AWKs) + return "(?s:" + regex + ")" +} + +func (c *compiler) binaryOp(op lexer.Token) { + var opcode Opcode + switch op { + case lexer.ADD: + opcode = Add + case lexer.SUB: + opcode = Subtract + case lexer.EQUALS: + opcode = Equals + case lexer.LESS: + opcode = Less + case lexer.LTE: + opcode = LessOrEqual + case lexer.MUL: + opcode = Multiply + case lexer.DIV: + opcode = Divide + case lexer.GREATER: + opcode = Greater + case lexer.GTE: + opcode = GreaterOrEqual + case lexer.NOT_EQUALS: + opcode = NotEquals + case lexer.MATCH: + opcode = Match + case lexer.NOT_MATCH: + opcode = NotMatch + case lexer.POW: + opcode = Power + case lexer.MOD: + opcode = Modulo + default: + panic(fmt.Sprintf("unexpected binary operation: %s", op)) + } + c.add(opcode) +} + +// Generate an array index, handling multi-indexes properly. +func (c *compiler) index(index []ast.Expr) { + for _, expr := range index { + c.expr(expr) + } + if len(index) > 1 { + c.add(IndexMulti, opcodeInt(len(index))) + } +} diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/disassembler.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/disassembler.go new file mode 100644 index 0000000..d5dc959 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/disassembler.go @@ -0,0 +1,495 @@ +// Disassembles compiled program to text assembly instructions + +package compiler + +import ( + "fmt" + "io" + "strings" + + "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/lexer" +) + +// Disassemble writes a human-readable form of the program's virtual machine +// instructions to writer. +func (p *Program) Disassemble(writer io.Writer) error { + if p.Begin != nil { + d := &disassembler{ + program: p, + writer: writer, + code: p.Begin, + nativeFuncNames: p.nativeFuncNames, + } + err := d.disassemble("BEGIN") + if err != nil { + return err + } + } + + for _, action := range p.Actions { + switch len(action.Pattern) { + case 0: + // Nothing to do here. + case 1: + d := &disassembler{ + program: p, + writer: writer, + code: action.Pattern[0], + nativeFuncNames: p.nativeFuncNames, + } + err := d.disassemble("pattern") + if err != nil { + return err + } + case 2: + d := &disassembler{ + program: p, + writer: writer, + code: action.Pattern[0], + nativeFuncNames: p.nativeFuncNames, + } + err := d.disassemble("start") + if err != nil { + return err + } + d = &disassembler{ + program: p, + writer: writer, + code: action.Pattern[1], + nativeFuncNames: p.nativeFuncNames, + } + err = d.disassemble("stop") + if err != nil { + return err + } + } + if len(action.Body) > 0 { + d := &disassembler{ + program: p, + writer: writer, + code: action.Body, + nativeFuncNames: p.nativeFuncNames, + } + err := d.disassemble("{ body }") + if err != nil { + return err + } + } + } + + if p.End != nil { + d := &disassembler{ + program: p, + writer: writer, + code: p.End, + nativeFuncNames: p.nativeFuncNames, + } + err := d.disassemble("END") + if err != nil { + return err + } + } + + for i, f := range p.Functions { + d := &disassembler{ + program: p, + writer: writer, + code: f.Body, + nativeFuncNames: p.nativeFuncNames, + funcIndex: i, + } + err := d.disassemble("function " + f.Name) + if err != nil { + return err + } + } + + return nil +} + +// Disassembles a single block of opcodes. +type disassembler struct { + program *Program + writer io.Writer + code []Opcode + nativeFuncNames []string + funcIndex int + ip int + opAddr int + err error +} + +func (d *disassembler) disassemble(prefix string) error { + if prefix != "" { + d.writef(" // %s\n", prefix) + } + + for d.ip < len(d.code) && d.err == nil { + d.opAddr = d.ip + op := d.fetch() + + switch op { + case Num: + index := d.fetch() + num := d.program.Nums[index] + if num == float64(int(num)) { + d.writeOpf("Num %d (%d)", int(num), index) + } else { + d.writeOpf("Num %.6g (%d)", num, index) + } + + case Str: + index := d.fetch() + d.writeOpf("Str %q (%d)", d.program.Strs[index], index) + + case FieldInt: + index := d.fetch() + d.writeOpf("FieldInt %d", index) + + case FieldByNameStr: + index := d.fetch() + d.writeOpf("FieldByNameStr %q (%d)", d.program.Strs[index], index) + + case Global: + index := d.fetch() + d.writeOpf("Global %s", d.program.scalarNames[index]) + + case Local: + index := int(d.fetch()) + d.writeOpf("Local %s", d.localName(index)) + + case Special: + index := d.fetch() + d.writeOpf("Special %s", ast.SpecialVarName(int(index))) + + case ArrayGlobal: + arrayIndex := d.fetch() + d.writeOpf("ArrayGlobal %s", d.program.arrayNames[arrayIndex]) + + case ArrayLocal: + arrayIndex := d.fetch() + d.writeOpf("ArrayLocal %s", d.localArrayName(int(arrayIndex))) + + case InGlobal: + arrayIndex := d.fetch() + d.writeOpf("InGlobal %s", d.program.arrayNames[arrayIndex]) + + case InLocal: + arrayIndex := int(d.fetch()) + d.writeOpf("InLocal %s", d.localArrayName(arrayIndex)) + + case AssignGlobal: + index := d.fetch() + d.writeOpf("AssignGlobal %s", d.program.scalarNames[index]) + + case AssignLocal: + index := int(d.fetch()) + d.writeOpf("AssignLocal %s", d.localName(index)) + + case AssignSpecial: + index := d.fetch() + d.writeOpf("AssignSpecial %s", ast.SpecialVarName(int(index))) + + case AssignArrayGlobal: + arrayIndex := d.fetch() + d.writeOpf("AssignArrayGlobal %s", d.program.arrayNames[arrayIndex]) + + case AssignArrayLocal: + arrayIndex := int(d.fetch()) + d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex)) + + case Delete: + arrayScope := ast.VarScope(d.fetch()) + arrayIndex := int(d.fetch()) + d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex)) + + case DeleteAll: + arrayScope := ast.VarScope(d.fetch()) + arrayIndex := int(d.fetch()) + d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex)) + + case IncrField: + amount := d.fetch() + d.writeOpf("IncrField %d", amount) + + case IncrGlobal: + amount := d.fetch() + index := d.fetch() + d.writeOpf("IncrGlobal %d %s", amount, d.program.scalarNames[index]) + + case IncrLocal: + amount := d.fetch() + index := int(d.fetch()) + d.writeOpf("IncrLocal %d %s", amount, d.localName(index)) + + case IncrSpecial: + amount := d.fetch() + index := d.fetch() + d.writeOpf("IncrSpecial %d %s", amount, ast.SpecialVarName(int(index))) + + case IncrArrayGlobal: + amount := d.fetch() + arrayIndex := d.fetch() + d.writeOpf("IncrArrayGlobal %d %s", amount, d.program.arrayNames[arrayIndex]) + + case IncrArrayLocal: + amount := d.fetch() + arrayIndex := int(d.fetch()) + d.writeOpf("IncrArrayLocal %d %s", amount, d.localArrayName(arrayIndex)) + + case AugAssignField: + operation := AugOp(d.fetch()) + d.writeOpf("AugAssignField %s", operation) + + case AugAssignGlobal: + operation := AugOp(d.fetch()) + index := d.fetch() + d.writeOpf("AugAssignGlobal %s %s", operation, d.program.scalarNames[index]) + + case AugAssignLocal: + operation := AugOp(d.fetch()) + index := int(d.fetch()) + d.writeOpf("AugAssignLocal %s %s", operation, d.localName(index)) + + case AugAssignSpecial: + operation := AugOp(d.fetch()) + index := d.fetch() + d.writeOpf("AugAssignSpecial %s %d", operation, ast.SpecialVarName(int(index))) + + case AugAssignArrayGlobal: + operation := AugOp(d.fetch()) + arrayIndex := d.fetch() + d.writeOpf("AugAssignArrayGlobal %s %s", operation, d.program.arrayNames[arrayIndex]) + + case AugAssignArrayLocal: + operation := AugOp(d.fetch()) + arrayIndex := int(d.fetch()) + d.writeOpf("AugAssignArrayLocal %s %s", operation, d.localArrayName(arrayIndex)) + + case Regex: + regexIndex := d.fetch() + d.writeOpf("Regex %q (%d)", d.program.Regexes[regexIndex], regexIndex) + + case IndexMulti: + num := d.fetch() + d.writeOpf("IndexMulti %d", num) + + case ConcatMulti: + num := d.fetch() + d.writeOpf("ConcatMulti %d", num) + + case Jump: + offset := d.fetch() + d.writeOpf("Jump 0x%04x", d.ip+int(offset)) + + case JumpFalse: + offset := d.fetch() + d.writeOpf("JumpFalse 0x%04x", d.ip+int(offset)) + + case JumpTrue: + offset := d.fetch() + d.writeOpf("JumpTrue 0x%04x", d.ip+int(offset)) + + case JumpEquals: + offset := d.fetch() + d.writeOpf("JumpEquals 0x%04x", d.ip+int(offset)) + + case JumpNotEquals: + offset := d.fetch() + d.writeOpf("JumpNotEquals 0x%04x", d.ip+int(offset)) + + case JumpLess: + offset := d.fetch() + d.writeOpf("JumpLess 0x%04x", d.ip+int(offset)) + + case JumpGreater: + offset := d.fetch() + d.writeOpf("JumpGreater 0x%04x", d.ip+int(offset)) + + case JumpLessOrEqual: + offset := d.fetch() + d.writeOpf("JumpLessOrEqual 0x%04x", d.ip+int(offset)) + + case JumpGreaterOrEqual: + offset := d.fetch() + d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset)) + + case ForIn: + varScope := ast.VarScope(d.fetch()) + varIndex := int(d.fetch()) + arrayScope := ast.VarScope(d.fetch()) + arrayIndex := int(d.fetch()) + offset := d.fetch() + d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset)) + + case CallBuiltin: + builtinOp := BuiltinOp(d.fetch()) + d.writeOpf("CallBuiltin %s", builtinOp) + + case CallSplit: + arrayScope := ast.VarScope(d.fetch()) + arrayIndex := int(d.fetch()) + d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex)) + + case CallSplitSep: + arrayScope := ast.VarScope(d.fetch()) + arrayIndex := int(d.fetch()) + d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex)) + + case CallSprintf: + numArgs := d.fetch() + d.writeOpf("CallSprintf %d", numArgs) + + case CallUser: + funcIndex := d.fetch() + numArrayArgs := int(d.fetch()) + var arrayArgs []string + for i := 0; i < numArrayArgs; i++ { + arrayScope := ast.VarScope(d.fetch()) + arrayIndex := int(d.fetch()) + arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex)) + } + d.writeOpf("CallUser %s [%s]", d.program.Functions[funcIndex].Name, strings.Join(arrayArgs, ", ")) + + case CallNative: + funcIndex := d.fetch() + numArgs := d.fetch() + d.writeOpf("CallNative %s %d", d.nativeFuncNames[funcIndex], numArgs) + + case Nulls: + numNulls := d.fetch() + d.writeOpf("Nulls %d", numNulls) + + case Print: + numArgs := d.fetch() + redirect := lexer.Token(d.fetch()) + if redirect == lexer.ILLEGAL { + d.writeOpf("Print %d", numArgs) + } else { + d.writeOpf("Print %d %s", numArgs, redirect) + } + + case Printf: + numArgs := d.fetch() + redirect := lexer.Token(d.fetch()) + if redirect == lexer.ILLEGAL { + d.writeOpf("Printf %d", numArgs) + } else { + d.writeOpf("Printf %d %s", numArgs, redirect) + } + + case Getline: + redirect := lexer.Token(d.fetch()) + d.writeOpf("Getline %s", redirect) + + case GetlineField: + redirect := lexer.Token(d.fetch()) + d.writeOpf("GetlineField %s", redirect) + + case GetlineGlobal: + redirect := lexer.Token(d.fetch()) + index := d.fetch() + d.writeOpf("GetlineGlobal %s %s", redirect, d.program.scalarNames[index]) + + case GetlineLocal: + redirect := lexer.Token(d.fetch()) + index := int(d.fetch()) + d.writeOpf("GetlineLocal %s %s", redirect, d.localName(index)) + + case GetlineSpecial: + redirect := lexer.Token(d.fetch()) + index := d.fetch() + d.writeOpf("GetlineSpecial %s %s", redirect, ast.SpecialVarName(int(index))) + + case GetlineArray: + redirect := lexer.Token(d.fetch()) + arrayScope := ast.VarScope(d.fetch()) + arrayIndex := int(d.fetch()) + d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex)) + + default: + // Handles all other opcodes with no arguments + d.writeOpf("%s", op) + } + } + + d.writef("\n") + return d.err +} + +// Fetch the next opcode and increment the "instruction pointer". +func (d *disassembler) fetch() Opcode { + op := d.code[d.ip] + d.ip++ + return op +} + +// Write formatted string to the disassembly output. +func (d *disassembler) writef(format string, args ...interface{}) { + if d.err != nil { + return + } + _, d.err = fmt.Fprintf(d.writer, format, args...) +} + +// Write formatted opcode (with address and newline) to disassembly output. +func (d *disassembler) writeOpf(format string, args ...interface{}) { + if d.err != nil { + return + } + addrStr := fmt.Sprintf("%04x", d.opAddr) + _, d.err = fmt.Fprintf(d.writer, addrStr+" "+format+"\n", args...) +} + +// Return the scalar variable name described by scope and index. +func (d *disassembler) varName(scope ast.VarScope, index int) string { + switch scope { + case ast.ScopeGlobal: + return d.program.scalarNames[index] + case ast.ScopeLocal: + return d.localName(index) + default: // ScopeSpecial + return ast.SpecialVarName(index) + } +} + +// Return the local variable name with the given index. +func (d *disassembler) localName(index int) string { + f := d.program.Functions[d.funcIndex] + n := 0 + for i, p := range f.Params { + if f.Arrays[i] { + continue + } + if n == index { + return p + } + n++ + } + panic(fmt.Sprintf("unexpected local variable index %d", index)) +} + +// Return the array variable name describes by scope and index. +func (d *disassembler) arrayName(scope ast.VarScope, index int) string { + if scope == ast.ScopeLocal { + return d.localArrayName(index) + } + return d.program.arrayNames[index] +} + +// Return the local array name with the given index. +func (d *disassembler) localArrayName(index int) string { + f := d.program.Functions[d.funcIndex] + n := 0 + for i, p := range f.Params { + if !f.Arrays[i] { + continue + } + if n == index { + return p + } + n++ + } + panic(fmt.Sprintf("unexpected local array index %d", index)) +} diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcode_string.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcode_string.go new file mode 100644 index 0000000..bfa2f0c --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcode_string.go @@ -0,0 +1,174 @@ +// Code generated by "stringer -type=Opcode,AugOp,BuiltinOp"; DO NOT EDIT. + +package compiler + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[Nop-0] + _ = x[Num-1] + _ = x[Str-2] + _ = x[Dupe-3] + _ = x[Drop-4] + _ = x[Swap-5] + _ = x[Field-6] + _ = x[FieldInt-7] + _ = x[FieldByName-8] + _ = x[FieldByNameStr-9] + _ = x[Global-10] + _ = x[Local-11] + _ = x[Special-12] + _ = x[ArrayGlobal-13] + _ = x[ArrayLocal-14] + _ = x[InGlobal-15] + _ = x[InLocal-16] + _ = x[AssignField-17] + _ = x[AssignGlobal-18] + _ = x[AssignLocal-19] + _ = x[AssignSpecial-20] + _ = x[AssignArrayGlobal-21] + _ = x[AssignArrayLocal-22] + _ = x[Delete-23] + _ = x[DeleteAll-24] + _ = x[IncrField-25] + _ = x[IncrGlobal-26] + _ = x[IncrLocal-27] + _ = x[IncrSpecial-28] + _ = x[IncrArrayGlobal-29] + _ = x[IncrArrayLocal-30] + _ = x[AugAssignField-31] + _ = x[AugAssignGlobal-32] + _ = x[AugAssignLocal-33] + _ = x[AugAssignSpecial-34] + _ = x[AugAssignArrayGlobal-35] + _ = x[AugAssignArrayLocal-36] + _ = x[Regex-37] + _ = x[IndexMulti-38] + _ = x[ConcatMulti-39] + _ = x[Add-40] + _ = x[Subtract-41] + _ = x[Multiply-42] + _ = x[Divide-43] + _ = x[Power-44] + _ = x[Modulo-45] + _ = x[Equals-46] + _ = x[NotEquals-47] + _ = x[Less-48] + _ = x[Greater-49] + _ = x[LessOrEqual-50] + _ = x[GreaterOrEqual-51] + _ = x[Concat-52] + _ = x[Match-53] + _ = x[NotMatch-54] + _ = x[Not-55] + _ = x[UnaryMinus-56] + _ = x[UnaryPlus-57] + _ = x[Boolean-58] + _ = x[Jump-59] + _ = x[JumpFalse-60] + _ = x[JumpTrue-61] + _ = x[JumpEquals-62] + _ = x[JumpNotEquals-63] + _ = x[JumpLess-64] + _ = x[JumpGreater-65] + _ = x[JumpLessOrEqual-66] + _ = x[JumpGreaterOrEqual-67] + _ = x[Next-68] + _ = x[Exit-69] + _ = x[ForIn-70] + _ = x[BreakForIn-71] + _ = x[CallBuiltin-72] + _ = x[CallSplit-73] + _ = x[CallSplitSep-74] + _ = x[CallSprintf-75] + _ = x[CallUser-76] + _ = x[CallNative-77] + _ = x[Return-78] + _ = x[ReturnNull-79] + _ = x[Nulls-80] + _ = x[Print-81] + _ = x[Printf-82] + _ = x[Getline-83] + _ = x[GetlineField-84] + _ = x[GetlineGlobal-85] + _ = x[GetlineLocal-86] + _ = x[GetlineSpecial-87] + _ = x[GetlineArray-88] + _ = x[EndOpcode-89] +} + +const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode" + +var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826} + +func (i Opcode) String() string { + if i < 0 || i >= Opcode(len(_Opcode_index)-1) { + return "Opcode(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _Opcode_name[_Opcode_index[i]:_Opcode_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[AugOpAdd-0] + _ = x[AugOpSub-1] + _ = x[AugOpMul-2] + _ = x[AugOpDiv-3] + _ = x[AugOpPow-4] + _ = x[AugOpMod-5] +} + +const _AugOp_name = "AugOpAddAugOpSubAugOpMulAugOpDivAugOpPowAugOpMod" + +var _AugOp_index = [...]uint8{0, 8, 16, 24, 32, 40, 48} + +func (i AugOp) String() string { + if i < 0 || i >= AugOp(len(_AugOp_index)-1) { + return "AugOp(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _AugOp_name[_AugOp_index[i]:_AugOp_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[BuiltinAtan2-0] + _ = x[BuiltinClose-1] + _ = x[BuiltinCos-2] + _ = x[BuiltinExp-3] + _ = x[BuiltinFflush-4] + _ = x[BuiltinFflushAll-5] + _ = x[BuiltinGsub-6] + _ = x[BuiltinIndex-7] + _ = x[BuiltinInt-8] + _ = x[BuiltinLength-9] + _ = x[BuiltinLengthArg-10] + _ = x[BuiltinLog-11] + _ = x[BuiltinMatch-12] + _ = x[BuiltinRand-13] + _ = x[BuiltinSin-14] + _ = x[BuiltinSqrt-15] + _ = x[BuiltinSrand-16] + _ = x[BuiltinSrandSeed-17] + _ = x[BuiltinSub-18] + _ = x[BuiltinSubstr-19] + _ = x[BuiltinSubstrLength-20] + _ = x[BuiltinSystem-21] + _ = x[BuiltinTolower-22] + _ = x[BuiltinToupper-23] +} + +const _BuiltinOp_name = "BuiltinAtan2BuiltinCloseBuiltinCosBuiltinExpBuiltinFflushBuiltinFflushAllBuiltinGsubBuiltinIndexBuiltinIntBuiltinLengthBuiltinLengthArgBuiltinLogBuiltinMatchBuiltinRandBuiltinSinBuiltinSqrtBuiltinSrandBuiltinSrandSeedBuiltinSubBuiltinSubstrBuiltinSubstrLengthBuiltinSystemBuiltinTolowerBuiltinToupper" + +var _BuiltinOp_index = [...]uint16{0, 12, 24, 34, 44, 57, 73, 84, 96, 106, 119, 135, 145, 157, 168, 178, 189, 201, 217, 227, 240, 259, 272, 286, 300} + +func (i BuiltinOp) String() string { + if i < 0 || i >= BuiltinOp(len(_BuiltinOp_index)-1) { + return "BuiltinOp(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _BuiltinOp_name[_BuiltinOp_index[i]:_BuiltinOp_index[i+1]] +} diff --git a/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcodes.go b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcodes.go new file mode 100644 index 0000000..36c4c93 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/internal/compiler/opcodes.go @@ -0,0 +1,180 @@ +package compiler + +//go:generate go run golang.org/x/tools/cmd/stringer@v0.1.8 -type=Opcode,AugOp,BuiltinOp + +// Opcode represents a single virtual machine instruction (or argument). The +// comments beside each opcode show any arguments that instruction consumes. +// +// Normally this is called "bytecode", but I've avoided that term here as each +// opcode is a 32-bit word, not an 8-bit byte. +// +// I tested various bit widths, and I believe 32 bit was the fastest, but also +// means we don't have to worry about jump offsets overflowing. That's tested +// in the compiler, but who's going to have an AWK program bigger than 2GB? +type Opcode int32 + +const ( + Nop Opcode = iota + + // Stack operations + Num // numIndex + Str // strIndex + Dupe + Drop + Swap + + // Fetch a field, variable, or array item + Field + FieldInt // index + FieldByName + FieldByNameStr // strIndex + Global // index + Local // index + Special // index + ArrayGlobal // arrayIndex + ArrayLocal // arrayIndex + InGlobal // arrayIndex + InLocal // arrayIndex + + // Assign a field, variable, or array item + AssignField + AssignGlobal // index + AssignLocal // index + AssignSpecial // index + AssignArrayGlobal // arrayIndex + AssignArrayLocal // arrayIndex + + // Delete statement + Delete // arrayScope arrayIndex + DeleteAll // arrayScope arrayIndex + + // Post-increment and post-decrement + IncrField // amount + IncrGlobal // amount index + IncrLocal // amount index + IncrSpecial // amount index + IncrArrayGlobal // amount arrayIndex + IncrArrayLocal // amount arrayIndex + + // Augmented assignment (also used for pre-increment and pre-decrement) + AugAssignField // augOp + AugAssignGlobal // augOp index + AugAssignLocal // augOp index + AugAssignSpecial // augOp index + AugAssignArrayGlobal // augOp arrayIndex + AugAssignArrayLocal // augOp arrayIndex + + // Stand-alone regex expression /foo/ + Regex // regexIndex + + // Multi-index concatenation + IndexMulti // num + + // Multi-value concatenation + ConcatMulti // num + + // Binary operators + Add + Subtract + Multiply + Divide + Power + Modulo + Equals + NotEquals + Less + Greater + LessOrEqual + GreaterOrEqual + Concat + Match + NotMatch + + // Unary operators + Not + UnaryMinus + UnaryPlus + Boolean + + // Control flow + Jump // offset + JumpFalse // offset + JumpTrue // offset + JumpEquals // offset + JumpNotEquals // offset + JumpLess // offset + JumpGreater // offset + JumpLessOrEqual // offset + JumpGreaterOrEqual // offset + Next + Exit + ForIn // varScope varIndex arrayScope arrayIndex offset + BreakForIn + + // Builtin functions + CallBuiltin // builtinOp + CallSplit // arrayScope arrayIndex + CallSplitSep // arrayScope arrayIndex + CallSprintf // numArgs + + // User and native functions + CallUser // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...] + CallNative // funcIndex numArgs + Return + ReturnNull + Nulls // numNulls + + // Print, printf, and getline + Print // numArgs redirect + Printf // numArgs redirect + Getline // redirect + GetlineField // redirect + GetlineGlobal // redirect index + GetlineLocal // redirect index + GetlineSpecial // redirect index + GetlineArray // redirect arrayScope arrayIndex + + EndOpcode +) + +// AugOp represents an augmented assignment operation. +type AugOp Opcode + +const ( + AugOpAdd AugOp = iota + AugOpSub + AugOpMul + AugOpDiv + AugOpPow + AugOpMod +) + +// BuiltinOp represents a builtin function call. +type BuiltinOp Opcode + +const ( + BuiltinAtan2 BuiltinOp = iota + BuiltinClose + BuiltinCos + BuiltinExp + BuiltinFflush + BuiltinFflushAll + BuiltinGsub + BuiltinIndex + BuiltinInt + BuiltinLength + BuiltinLengthArg + BuiltinLog + BuiltinMatch + BuiltinRand + BuiltinSin + BuiltinSqrt + BuiltinSrand + BuiltinSrandSeed + BuiltinSub + BuiltinSubstr + BuiltinSubstrLength + BuiltinSystem + BuiltinTolower + BuiltinToupper +) diff --git a/play/vendor/github.com/benhoyt/goawk/interp/functions.go b/play/vendor/github.com/benhoyt/goawk/interp/functions.go index 26b446c..4eff792 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/functions.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/functions.go @@ -1,4 +1,4 @@ -// Evaluate builtin and user-defined function calls +// Call native Go functions; helpers for some builtin function calls. package interp @@ -6,391 +6,19 @@ import ( "bytes" "errors" "fmt" - "io" - "math" - "os/exec" "reflect" "sort" "strconv" "strings" - "time" "unicode/utf8" - . "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/ast" . "github.com/benhoyt/goawk/lexer" ) -// Call builtin function specified by "op" with given args -func (p *interp) callBuiltin(op Token, argExprs []Expr) (value, error) { - // split() has an array arg (not evaluated) and [g]sub() have an - // lvalue arg, so handle them as special cases - switch op { - case F_SPLIT: - strValue, err := p.eval(argExprs[0]) - if err != nil { - return null(), err - } - str := p.toString(strValue) - var fieldSep string - if len(argExprs) == 3 { - sepValue, err := p.eval(argExprs[2]) - if err != nil { - return null(), err - } - fieldSep = p.toString(sepValue) - } else { - fieldSep = p.fieldSep - } - arrayExpr := argExprs[1].(*ArrayExpr) - n, err := p.split(str, arrayExpr.Scope, arrayExpr.Index, fieldSep) - if err != nil { - return null(), err - } - return num(float64(n)), nil - - case F_SUB, F_GSUB: - regexValue, err := p.eval(argExprs[0]) - if err != nil { - return null(), err - } - regex := p.toString(regexValue) - replValue, err := p.eval(argExprs[1]) - if err != nil { - return null(), err - } - repl := p.toString(replValue) - var in string - if len(argExprs) == 3 { - inValue, err := p.eval(argExprs[2]) - if err != nil { - return null(), err - } - in = p.toString(inValue) - } else { - in = p.line - } - out, n, err := p.sub(regex, repl, in, op == F_GSUB) - if err != nil { - return null(), err - } - if len(argExprs) == 3 { - err := p.assign(argExprs[2], str(out)) - if err != nil { - return null(), err - } - } else { - p.setLine(out, true) - } - return num(float64(n)), nil - } - - // Now evaluate the argExprs (calls with up to 7 args don't - // require heap allocation) - args := make([]value, 0, 7) - for _, a := range argExprs { - arg, err := p.eval(a) - if err != nil { - return null(), err - } - args = append(args, arg) - } - - // Then switch on the function for the ordinary functions - switch op { - case F_LENGTH: - var s string - if len(args) > 0 { - s = p.toString(args[0]) - } else { - s = p.line - } - var n int - if p.bytes { - n = len(s) - } else { - n = utf8.RuneCountInString(s) - } - return num(float64(n)), nil - - case F_MATCH: - re, err := p.compileRegex(p.toString(args[1])) - if err != nil { - return null(), err - } - s := p.toString(args[0]) - loc := re.FindStringIndex(s) - if loc == nil { - p.matchStart = 0 - p.matchLength = -1 - return num(0), nil - } - if p.bytes { - p.matchStart = loc[0] + 1 - p.matchLength = loc[1] - loc[0] - } else { - p.matchStart = utf8.RuneCountInString(s[:loc[0]]) + 1 - p.matchLength = utf8.RuneCountInString(s[loc[0]:loc[1]]) - } - return num(float64(p.matchStart)), nil - - case F_SUBSTR: - s := p.toString(args[0]) - pos := int(args[1].num()) - if p.bytes { - if pos > len(s) { - pos = len(s) + 1 - } - if pos < 1 { - pos = 1 - } - maxLength := len(s) - pos + 1 - length := maxLength - if len(args) == 3 { - length = int(args[2].num()) - if length < 0 { - length = 0 - } - if length > maxLength { - length = maxLength - } - } - return str(s[pos-1 : pos-1+length]), nil - } else { - // Count characters till we get to pos. - chars := 1 - start := 0 - for start = range s { - chars++ - if chars > pos { - break - } - } - if pos >= chars { - start = len(s) - } - - // Count characters from start till we reach length. - var end int - if len(args) == 3 { - length := int(args[2].num()) - chars = 0 - for end = range s[start:] { - chars++ - if chars > length { - break - } - } - if length >= chars { - end = len(s) - } else { - end += start - } - } else { - end = len(s) - } - return str(s[start:end]), nil - } - - case F_SPRINTF: - s, err := p.sprintf(p.toString(args[0]), args[1:]) - if err != nil { - return null(), err - } - return str(s), nil - - case F_INDEX: - s := p.toString(args[0]) - substr := p.toString(args[1]) - index := strings.Index(s, substr) - if p.bytes { - return num(float64(index + 1)), nil - } else { - if index < 0 { - return num(float64(0)), nil - } - index = utf8.RuneCountInString(s[:index]) - return num(float64(index + 1)), nil - } - - case F_TOLOWER: - return str(strings.ToLower(p.toString(args[0]))), nil - case F_TOUPPER: - return str(strings.ToUpper(p.toString(args[0]))), nil - - case F_ATAN2: - return num(math.Atan2(args[0].num(), args[1].num())), nil - case F_COS: - return num(math.Cos(args[0].num())), nil - case F_EXP: - return num(math.Exp(args[0].num())), nil - case F_INT: - return num(float64(int(args[0].num()))), nil - case F_LOG: - return num(math.Log(args[0].num())), nil - case F_SQRT: - return num(math.Sqrt(args[0].num())), nil - case F_RAND: - return num(p.random.Float64()), nil - case F_SIN: - return num(math.Sin(args[0].num())), nil - - case F_SRAND: - prevSeed := p.randSeed - switch len(args) { - case 0: - p.random.Seed(time.Now().UnixNano()) - case 1: - p.randSeed = args[0].num() - p.random.Seed(int64(math.Float64bits(p.randSeed))) - } - return num(prevSeed), nil - - case F_SYSTEM: - if p.noExec { - return null(), newError("can't call system() due to NoExec") - } - cmdline := p.toString(args[0]) - cmd := p.execShell(cmdline) - cmd.Stdout = p.output - cmd.Stderr = p.errorOutput - _ = p.flushAll() // ensure synchronization - err := cmd.Start() - if err != nil { - p.printErrorf("%s\n", err) - return num(-1), nil - } - err = cmd.Wait() - if err != nil { - if exitErr, ok := err.(*exec.ExitError); ok { - code := exitErr.ProcessState.ExitCode() - return num(float64(code)), nil - } else { - p.printErrorf("unexpected error running command %q: %v\n", cmdline, err) - return num(-1), nil - } - } - return num(0), nil - - case F_CLOSE: - name := p.toString(args[0]) - var c io.Closer = p.inputStreams[name] - if c != nil { - // Close input stream - delete(p.inputStreams, name) - err := c.Close() - if err != nil { - return num(-1), nil - } - return num(0), nil - } - c = p.outputStreams[name] - if c != nil { - // Close output stream - delete(p.outputStreams, name) - err := c.Close() - if err != nil { - return num(-1), nil - } - return num(0), nil - } - // Nothing to close - return num(-1), nil - - case F_FFLUSH: - var name string - if len(args) > 0 { - name = p.toString(args[0]) - } - var ok bool - if name != "" { - // Flush a single, named output stream - ok = p.flushStream(name) - } else { - // fflush() or fflush("") flushes all output streams - ok = p.flushAll() - } - if !ok { - return num(-1), nil - } - return num(0), nil - - default: - // Shouldn't happen - panic(fmt.Sprintf("unexpected function: %s", op)) - } -} - -// Executes code using configured system shell -func (p *interp) execShell(code string) *exec.Cmd { - executable := p.shellCommand[0] - args := p.shellCommand[1:] - args = append(args, code) - cmd := exec.Command(executable, args...) - return cmd -} - -// Call user-defined function with given index and arguments, return -// its return value (or null value if it doesn't return anything) -func (p *interp) callUser(index int, args []Expr) (value, error) { - f := p.program.Functions[index] - - if p.callDepth >= maxCallDepth { - return null(), newError("calling %q exceeded maximum call depth of %d", f.Name, maxCallDepth) - } - - // Evaluate the arguments and push them onto the locals stack - oldFrame := p.frame - newFrameStart := len(p.stack) - var arrays []int - for i, arg := range args { - if f.Arrays[i] { - a := arg.(*VarExpr) - arrays = append(arrays, p.getArrayIndex(a.Scope, a.Index)) - } else { - argValue, err := p.eval(arg) - if err != nil { - return null(), err - } - p.stack = append(p.stack, argValue) - } - } - // Push zero value for any additional parameters (it's valid to - // call a function with fewer arguments than it has parameters) - oldArraysLen := len(p.arrays) - for i := len(args); i < len(f.Params); i++ { - if f.Arrays[i] { - arrays = append(arrays, len(p.arrays)) - p.arrays = append(p.arrays, make(map[string]value)) - } else { - p.stack = append(p.stack, null()) - } - } - p.frame = p.stack[newFrameStart:] - p.localArrays = append(p.localArrays, arrays) - - // Execute the function! - p.callDepth++ - err := p.executes(f.Body) - p.callDepth-- - - // Pop the locals off the stack - p.stack = p.stack[:newFrameStart] - p.frame = oldFrame - p.localArrays = p.localArrays[:len(p.localArrays)-1] - p.arrays = p.arrays[:oldArraysLen] - - if r, ok := err.(returnValue); ok { - return r.Value, nil - } - if err != nil { - return null(), err - } - return null(), nil -} - // Call native-defined function with given name and arguments, return // its return value (or null value if it doesn't return anything). -func (p *interp) callNative(index int, args []Expr) (value, error) { +func (p *interp) callNative(index int, args []value) (value, error) { f := p.nativeFuncs[index] minIn := len(f.in) // Minimum number of args we should pass var variadicType reflect.Type @@ -401,11 +29,7 @@ func (p *interp) callNative(index int, args []Expr) (value, error) { // Build list of args to pass to function values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation - for i, arg := range args { - a, err := p.eval(arg) - if err != nil { - return null(), err - } + for i, a := range args { var argType reflect.Type if !f.isVariadic || i < len(f.in)-1 { argType = f.in[i] @@ -618,12 +242,12 @@ func validNativeType(typ reflect.Type) bool { } // Guts of the split() function -func (p *interp) split(s string, scope VarScope, index int, fs string) (int, error) { +func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) { var parts []string if fs == " " { parts = strings.Fields(s) } else if s == "" { - // NF should be 0 on empty line + // Leave parts 0 length on empty string } else if utf8.RuneCountInString(fs) <= 1 { parts = strings.Split(s, fs) } else { @@ -637,7 +261,7 @@ func (p *interp) split(s string, scope VarScope, index int, fs string) (int, err for i, part := range parts { array[strconv.Itoa(i+1)] = numStr(part) } - p.arrays[p.getArrayIndex(scope, index)] = array + p.arrays[p.arrayIndex(scope, index)] = array return len(array), nil } @@ -753,7 +377,7 @@ func (p *interp) sprintf(format string, args []value) (string, error) { if len(types) > len(args) { return "", newError("format error: got %d args, expected %d", len(args), len(types)) } - converted := make([]interface{}, len(types)) + converted := make([]interface{}, 0, 7) // up to 7 args won't require heap allocation for i, t := range types { a := args[i] var v interface{} @@ -765,7 +389,7 @@ func (p *interp) sprintf(format string, args []value) (string, error) { case 'f': v = a.num() case 'u': - v = uint32(a.num()) + v = uint(a.num()) case 'c': var c []byte n, isStr := a.isTrueStr() @@ -783,7 +407,7 @@ func (p *interp) sprintf(format string, args []value) (string, error) { } v = c } - converted[i] = v + converted = append(converted, v) } return fmt.Sprintf(format, converted...), nil } diff --git a/play/vendor/github.com/benhoyt/goawk/interp/interp.go b/play/vendor/github.com/benhoyt/goawk/interp/interp.go index ee2dbaf..af97cf1 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/interp.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/interp.go @@ -1,14 +1,19 @@ -// Package interp is the GoAWK interpreter (a simple tree-walker). +// Package interp is the GoAWK interpreter. // // For basic usage, use the Exec function. For more complicated use // cases and configuration options, first use the parser package to // parse the AWK source, and then use ExecProgram to execute it with // a specific configuration. // +// If you need to re-run the same parsed program repeatedly on different +// inputs or with different variables, use New to instantiate an Interpreter +// and then call the Interpreter.Execute method as many times as you need. package interp import ( "bufio" + "bytes" + "context" "errors" "fmt" "io" @@ -23,23 +28,26 @@ import ( "strings" "unicode/utf8" - . "github.com/benhoyt/goawk/internal/ast" - . "github.com/benhoyt/goawk/lexer" - . "github.com/benhoyt/goawk/parser" + "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/compiler" + "github.com/benhoyt/goawk/parser" ) var ( - errExit = errors.New("exit") - errBreak = errors.New("break") - errContinue = errors.New("continue") - errNext = errors.New("next") + errExit = errors.New("exit") + errBreak = errors.New("break") + errNext = errors.New("next") + + errCSVSeparator = errors.New("invalid CSV field separator or comment delimiter") crlfNewline = runtime.GOOS == "windows" varRegex = regexp.MustCompile(`^([_a-zA-Z][_a-zA-Z0-9]*)=(.*)`) + + defaultShellCommand = getDefaultShellCommand() ) // Error (actually *Error) is returned by Exec and Eval functions on -// interpreter error, for example a negative field index. +// interpreter error, for example FS being set to an invalid regex. type Error struct { message string } @@ -70,6 +78,7 @@ type interp struct { filenameIndex int hadFiles bool input io.Reader + inputBuffer []byte inputStreams map[string]io.ReadCloser outputStreams map[string]io.WriteCloser commands map[string]*exec.Cmd @@ -77,10 +86,13 @@ type interp struct { noFileWrites bool noFileReads bool shellCommand []string + csvOutput *bufio.Writer + noArgVars bool // Scalars, arrays, and function state globals []value stack []value + sp int frame []value arrays []map[string]value localArrays [][]int @@ -97,6 +109,9 @@ type interp struct { fieldsIsTrueStr []bool numFields int haveFields bool + fieldNames []string + fieldIndexes map[string]int + reparseCSV bool // Built-in variables argc int @@ -112,15 +127,31 @@ type interp struct { subscriptSep string matchLength int matchStart int + inputMode IOMode + csvInputConfig CSVInputConfig + outputMode IOMode + csvOutputConfig CSVOutputConfig + + // Parsed program, compiled functions and constants + program *parser.Program + functions []compiler.Function + nums []float64 + strs []string + regexes []*regexp.Regexp + + // Context support (for Interpreter.ExecuteContext) + checkCtx bool + ctx context.Context + ctxDone <-chan struct{} + ctxOps int // Misc pieces of state - program *Program - random *rand.Rand - randSeed float64 - exitStatus int - regexCache map[string]*regexp.Regexp - formatCache map[string]cachedFormat - bytes bool + random *rand.Rand + randSeed float64 + exitStatus int + regexCache map[string]*regexp.Regexp + formatCache map[string]cachedFormat + csvJoinFieldsBuf bytes.Buffer } // Various const configuration. Could make these part of Config if @@ -141,8 +172,9 @@ type Config struct { // Standard input reader (defaults to os.Stdin) Stdin io.Reader - // Writer for normal output (defaults to a buffered version of - // os.Stdout) + // Writer for normal output (defaults to a buffered version of os.Stdout). + // If you need to write to stdout but want control over the buffer size or + // allocation, wrap os.Stdout yourself and set Output to that. Output io.Writer // Writer for non-fatal error messages (defaults to os.Stderr) @@ -154,8 +186,14 @@ type Config struct { // Input arguments (usually filenames): empty slice means read // only from Stdin, and a filename of "-" means read from Stdin // instead of a real file. + // + // Arguments of the form "var=value" are treated as variable + // assignments. Args []string + // Set to true to disable "var=value" assignments in Args. + NoArgVars bool + // List of name-value pairs for variables to set before executing // the program (useful for setting FS and other built-in // variables, for example []string{"FS", ",", "OFS", ","}). @@ -203,31 +241,125 @@ type Config struct { // List of name-value pairs to be assigned to the ENVIRON special // array, for example []string{"USER", "bob", "HOME", "/home/bob"}. // If nil (the default), values from os.Environ() are used. + // + // If the script doesn't need environment variables, set Environ to a + // non-nil empty slice, []string{}. Environ []string - // Set to true to use byte indexes instead of character indexes for - // the index, length, match, and substr functions. Note: the default - // was changed from bytes to characters in GoAWK version 1.11. - Bytes bool + // Mode for parsing input fields and record: default is to use normal FS + // and RS behaviour. If set to CSVMode or TSVMode, FS and RS are ignored, + // and input records are parsed as comma-separated values or tab-separated + // values, respectively. Parsing is done as per RFC 4180 and the + // "encoding/csv" package, but FieldsPerRecord is not supported, + // LazyQuotes is always on, and TrimLeadingSpace is always off. + // + // You can also enable CSV or TSV input mode by setting INPUTMODE to "csv" + // or "tsv" in Vars or in the BEGIN block (those override this setting). + // + // For further documentation about GoAWK's CSV support, see the full docs: + // https://github.com/benhoyt/goawk/blob/master/csv.md + InputMode IOMode + + // Additional options if InputMode is CSVMode or TSVMode. The zero value + // is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode. + // + // You can also specify these options by setting INPUTMODE in the BEGIN + // block, for example, to use '|' as the field separator, '#' as the + // comment character, and enable header row parsing: + // + // BEGIN { INPUTMODE="csv separator=| comment=# header" } + CSVInput CSVInputConfig + + // Mode for print output: default is to use normal OFS and ORS + // behaviour. If set to CSVMode or TSVMode, the "print" statement with one + // or more arguments outputs fields using CSV or TSV formatting, + // respectively. Output is written as per RFC 4180 and the "encoding/csv" + // package. + // + // You can also enable CSV or TSV output mode by setting OUTPUTMODE to + // "csv" or "tsv" in Vars or in the BEGIN block (those override this + // setting). + OutputMode IOMode + + // Additional options if OutputMode is CSVMode or TSVMode. The zero value + // is valid, specifying a separator of ',' in CSVMode and '\t' in TSVMode. + // + // You can also specify these options by setting OUTPUTMODE in the BEGIN + // block, for example, to use '|' as the output field separator: + // + // BEGIN { OUTPUTMODE="csv separator=|" } + CSVOutput CSVOutputConfig +} + +// IOMode specifies the input parsing or print output mode. +type IOMode int + +const ( + // DefaultMode uses normal AWK field and record separators: FS and RS for + // input, OFS and ORS for print output. + DefaultMode IOMode = 0 + + // CSVMode uses comma-separated value mode for input or output. + CSVMode IOMode = 1 + + // TSVMode uses tab-separated value mode for input or output. + TSVMode IOMode = 2 +) + +// CSVInputConfig holds additional configuration for when InputMode is CSVMode +// or TSVMode. +type CSVInputConfig struct { + // Input field separator character. If this is zero, it defaults to ',' + // when InputMode is CSVMode and '\t' when InputMode is TSVMode. + Separator rune + + // If nonzero, specifies that lines beginning with this character (and no + // leading whitespace) should be ignored as comments. + Comment rune + + // If true, parse the first row in each input file as a header row (that + // is, a list of field names), and enable the @"field" syntax to get a + // field by name as well as the FIELDS special array. + Header bool +} + +// CSVOutputConfig holds additional configuration for when OutputMode is +// CSVMode or TSVMode. +type CSVOutputConfig struct { + // Output field separator character. If this is zero, it defaults to ',' + // when OutputMode is CSVMode and '\t' when OutputMode is TSVMode. + Separator rune } // ExecProgram executes the parsed program using the given interpreter // config, returning the exit status code of the program. Error is nil // on successful execution of the program, even if the program returns // a non-zero status code. -func ExecProgram(program *Program, config *Config) (int, error) { - if len(config.Vars)%2 != 0 { - return 0, newError("length of config.Vars must be a multiple of 2, not %d", len(config.Vars)) +// +// As of GoAWK version v1.16.0, a nil config is valid and will use the +// defaults (zero values). However, it may be simpler to use Exec in that +// case. +func ExecProgram(program *parser.Program, config *Config) (int, error) { + p := newInterp(program) + err := p.setExecuteConfig(config) + if err != nil { + return 0, err } - if len(config.Environ)%2 != 0 { - return 0, newError("length of config.Environ must be a multiple of 2, not %d", len(config.Environ)) + return p.executeAll() +} + +func newInterp(program *parser.Program) *interp { + p := &interp{ + program: program, + functions: program.Compiled.Functions, + nums: program.Compiled.Nums, + strs: program.Compiled.Strs, + regexes: program.Compiled.Regexes, } - p := &interp{program: program} - - // Allocate memory for variables + // Allocate memory for variables and virtual machine stack p.globals = make([]value, len(program.Scalars)) - p.stack = make([]value, 0, initialStackSize) + p.stack = make([]value, initialStackSize) p.arrays = make([]map[string]value, len(program.Arrays), len(program.Arrays)+initialStackSize) for i := 0; i < len(program.Arrays); i++ { p.arrays[i] = make(map[string]value) @@ -246,58 +378,113 @@ func ExecProgram(program *Program, config *Config) (int, error) { p.outputFieldSep = " " p.outputRecordSep = "\n" p.subscriptSep = "\x1c" - p.noExec = config.NoExec - p.noFileWrites = config.NoFileWrites - p.noFileReads = config.NoFileReads - p.bytes = config.Bytes - err := p.initNativeFuncs(config.Funcs) - if err != nil { - return 0, err + + p.inputStreams = make(map[string]io.ReadCloser) + p.outputStreams = make(map[string]io.WriteCloser) + p.commands = make(map[string]*exec.Cmd) + p.scanners = make(map[string]*bufio.Scanner) + + return p +} + +func (p *interp) setExecuteConfig(config *Config) error { + if config == nil { + config = &Config{} + } + if len(config.Vars)%2 != 0 { + return newError("length of config.Vars must be a multiple of 2, not %d", len(config.Vars)) + } + if len(config.Environ)%2 != 0 { + return newError("length of config.Environ must be a multiple of 2, not %d", len(config.Environ)) } - // Setup ARGV and other variables from config - argvIndex := program.Arrays["ARGV"] - p.setArrayValue(ScopeGlobal, argvIndex, "0", str(config.Argv0)) + // Set up I/O mode config (Vars will override) + p.inputMode = config.InputMode + p.csvInputConfig = config.CSVInput + switch p.inputMode { + case CSVMode: + if p.csvInputConfig.Separator == 0 { + p.csvInputConfig.Separator = ',' + } + case TSVMode: + if p.csvInputConfig.Separator == 0 { + p.csvInputConfig.Separator = '\t' + } + case DefaultMode: + if p.csvInputConfig != (CSVInputConfig{}) { + return newError("input mode configuration not valid in default input mode") + } + } + p.outputMode = config.OutputMode + p.csvOutputConfig = config.CSVOutput + switch p.outputMode { + case CSVMode: + if p.csvOutputConfig.Separator == 0 { + p.csvOutputConfig.Separator = ',' + } + case TSVMode: + if p.csvOutputConfig.Separator == 0 { + p.csvOutputConfig.Separator = '\t' + } + case DefaultMode: + if p.csvOutputConfig != (CSVOutputConfig{}) { + return newError("output mode configuration not valid in default output mode") + } + } + + // Set up ARGV and other variables from config + argvIndex := p.program.Arrays["ARGV"] + p.setArrayValue(ast.ScopeGlobal, argvIndex, "0", str(config.Argv0)) p.argc = len(config.Args) + 1 for i, arg := range config.Args { - p.setArrayValue(ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg)) + p.setArrayValue(ast.ScopeGlobal, argvIndex, strconv.Itoa(i+1), numStr(arg)) } + p.noArgVars = config.NoArgVars p.filenameIndex = 1 p.hadFiles = false for i := 0; i < len(config.Vars); i += 2 { err := p.setVarByName(config.Vars[i], config.Vars[i+1]) if err != nil { - return 0, err + return err } } - // Setup ENVIRON from config or environment variables - environIndex := program.Arrays["ENVIRON"] + // After Vars has been handled, validate CSV configuration. + err := validateCSVInputConfig(p.inputMode, p.csvInputConfig) + if err != nil { + return err + } + err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig) + if err != nil { + return err + } + + // Set up ENVIRON from config or environment variables + environIndex := p.program.Arrays["ENVIRON"] if config.Environ != nil { for i := 0; i < len(config.Environ); i += 2 { - p.setArrayValue(ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1])) + p.setArrayValue(ast.ScopeGlobal, environIndex, config.Environ[i], numStr(config.Environ[i+1])) } } else { for _, kv := range os.Environ() { eq := strings.IndexByte(kv, '=') if eq >= 0 { - p.setArrayValue(ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:])) + p.setArrayValue(ast.ScopeGlobal, environIndex, kv[:eq], numStr(kv[eq+1:])) } } } - // Setup system shell command + // Set up system shell command if len(config.ShellCommand) != 0 { p.shellCommand = config.ShellCommand } else { - executable := "/bin/sh" - if runtime.GOOS == "windows" { - executable = "sh" - } - p.shellCommand = []string{executable, "-c"} + p.shellCommand = defaultShellCommand } - // Setup I/O structures + // Set up I/O structures + p.noExec = config.NoExec + p.noFileWrites = config.NoFileWrites + p.noFileReads = config.NoFileReads p.stdin = config.Stdin if p.stdin == nil { p.stdin = os.Stdin @@ -310,28 +497,80 @@ func ExecProgram(program *Program, config *Config) (int, error) { if p.errorOutput == nil { p.errorOutput = os.Stderr } - p.inputStreams = make(map[string]io.ReadCloser) - p.outputStreams = make(map[string]io.WriteCloser) - p.commands = make(map[string]*exec.Cmd) - p.scanners = make(map[string]*bufio.Scanner) + + // Initialize native Go functions + if p.nativeFuncs == nil { + err := p.initNativeFuncs(config.Funcs) + if err != nil { + return err + } + } + + return nil +} + +func validateCSVInputConfig(mode IOMode, config CSVInputConfig) error { + if mode != CSVMode && mode != TSVMode { + return nil + } + if config.Separator == config.Comment || !validCSVSeparator(config.Separator) || + config.Comment != 0 && !validCSVSeparator(config.Comment) { + return errCSVSeparator + } + return nil +} + +func validateCSVOutputConfig(mode IOMode, config CSVOutputConfig) error { + if mode != CSVMode && mode != TSVMode { + return nil + } + if !validCSVSeparator(config.Separator) { + return errCSVSeparator + } + return nil +} + +func validCSVSeparator(r rune) bool { + return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError +} + +func (p *interp) executeAll() (int, error) { defer p.closeAll() - // Execute the program! BEGIN, then pattern/actions, then END - err = p.execBeginEnd(program.Begin) + // Execute the program: BEGIN, then pattern/actions, then END + err := p.execute(p.program.Compiled.Begin) if err != nil && err != errExit { + if p.checkCtx { + ctxErr := p.checkContextNow() + if ctxErr != nil { + return 0, ctxErr + } + } return 0, err } - if program.Actions == nil && program.End == nil { - return p.exitStatus, nil + if p.program.Actions == nil && p.program.End == nil { + return p.exitStatus, nil // only BEGIN specified, don't process input } if err != errExit { - err = p.execActions(program.Actions) + err = p.execActions(p.program.Compiled.Actions) if err != nil && err != errExit { + if p.checkCtx { + ctxErr := p.checkContextNow() + if ctxErr != nil { + return 0, ctxErr + } + } return 0, err } } - err = p.execBeginEnd(program.End) + err = p.execute(p.program.Compiled.End) if err != nil && err != errExit { + if p.checkCtx { + ctxErr := p.checkContextNow() + if ctxErr != nil { + return 0, ctxErr + } + } return 0, err } return p.exitStatus, nil @@ -342,7 +581,7 @@ func ExecProgram(program *Program, config *Config) (int, error) { // reader (nil means use os.Stdin) and writes output to stdout (nil // means use a buffered version of os.Stdout). func Exec(source, fieldSep string, input io.Reader, output io.Writer) error { - prog, err := ParseProgram([]byte(source), nil) + prog, err := parser.ParseProgram([]byte(source), nil) if err != nil { return err } @@ -356,20 +595,9 @@ func Exec(source, fieldSep string, input io.Reader, output io.Writer) error { return err } -// Execute BEGIN or END blocks (may be multiple) -func (p *interp) execBeginEnd(beginEnd []Stmts) error { - for _, statements := range beginEnd { - err := p.executes(statements) - if err != nil { - return err - } - } - return nil -} - // Execute pattern-action blocks (may be multiple) -func (p *interp) execActions(actions []Action) error { - inRange := make([]bool, len(actions)) +func (p *interp) execActions(actions []compiler.Action) error { + var inRange []bool lineLoop: for { // Read and setup next line of input @@ -381,6 +609,7 @@ lineLoop: return err } p.setLine(line, false) + p.reparseCSV = false // Execute all the pattern-action blocks for each line for i, action := range actions { @@ -392,27 +621,30 @@ lineLoop: matched = true case 1: // Single boolean pattern - v, err := p.eval(action.Pattern[0]) + err := p.execute(action.Pattern[0]) if err != nil { return err } - matched = v.boolean() + matched = p.pop().boolean() case 2: // Range pattern (matches between start and stop lines) + if inRange == nil { + inRange = make([]bool, len(actions)) + } if !inRange[i] { - v, err := p.eval(action.Pattern[0]) + err := p.execute(action.Pattern[0]) if err != nil { return err } - inRange[i] = v.boolean() + inRange[i] = p.pop().boolean() } matched = inRange[i] if inRange[i] { - v, err := p.eval(action.Pattern[1]) + err := p.execute(action.Pattern[1]) if err != nil { return err } - inRange[i] = !v.boolean() + inRange[i] = !p.pop().boolean() } } if !matched { @@ -420,7 +652,7 @@ lineLoop: } // No action is equivalent to { print $0 } - if action.Stmts == nil { + if len(action.Body) == 0 { err := p.printLine(p.output, p.line) if err != nil { return err @@ -429,7 +661,7 @@ lineLoop: } // Execute the body statements - err := p.executes(action.Stmts) + err := p.execute(action.Body) if err == errNext { // "next" statement skips straight to next line continue lineLoop @@ -442,743 +674,251 @@ lineLoop: return nil } -// Execute a block of multiple statements -func (p *interp) executes(stmts Stmts) error { - for _, s := range stmts { - err := p.execute(s) - if err != nil { - return err - } - } - return nil -} - -// Execute a single statement -func (p *interp) execute(stmt Stmt) error { - switch s := stmt.(type) { - case *ExprStmt: - // Expression statement: simply throw away the expression value - _, err := p.eval(s.Expr) - return err - - case *PrintStmt: - // Print OFS-separated args followed by ORS (usually newline) - var line string - if len(s.Args) > 0 { - strs := make([]string, len(s.Args)) - for i, a := range s.Args { - v, err := p.eval(a) - if err != nil { - return err - } - strs[i] = v.str(p.outputFormat) - } - line = strings.Join(strs, p.outputFieldSep) - } else { - // "print" with no args is equivalent to "print $0" - line = p.line - } - output, err := p.getOutputStream(s.Redirect, s.Dest) - if err != nil { - return err - } - return p.printLine(output, line) - - case *PrintfStmt: - // printf(fmt, arg1, arg2, ...): uses our version of sprintf - // to build the formatted string and then print that - formatValue, err := p.eval(s.Args[0]) - if err != nil { - return err - } - format := p.toString(formatValue) - args := make([]value, len(s.Args)-1) - for i, a := range s.Args[1:] { - args[i], err = p.eval(a) - if err != nil { - return err - } - } - output, err := p.getOutputStream(s.Redirect, s.Dest) - if err != nil { - return err - } - str, err := p.sprintf(format, args) - if err != nil { - return err - } - err = writeOutput(output, str) - if err != nil { - return err - } - - case *IfStmt: - v, err := p.eval(s.Cond) - if err != nil { - return err - } - if v.boolean() { - return p.executes(s.Body) - } else { - // Doesn't do anything if s.Else is nil - return p.executes(s.Else) - } - - case *ForStmt: - // C-like for loop with pre-statement, cond, and post-statement - if s.Pre != nil { - err := p.execute(s.Pre) - if err != nil { - return err - } - } - for { - if s.Cond != nil { - v, err := p.eval(s.Cond) - if err != nil { - return err - } - if !v.boolean() { - break - } - } - err := p.executes(s.Body) - if err == errBreak { - break - } - if err != nil && err != errContinue { - return err - } - if s.Post != nil { - err := p.execute(s.Post) - if err != nil { - return err - } - } - } - - case *ForInStmt: - // Foreach-style "for (key in array)" loop - array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)] - for index := range array { - err := p.setVar(s.Var.Scope, s.Var.Index, str(index)) - if err != nil { - return err - } - err = p.executes(s.Body) - if err == errBreak { - break - } - if err == errContinue { - continue - } - if err != nil { - return err - } - } - - case *ReturnStmt: - // Return statement uses special error value which is "caught" - // by the callUser function - var v value - if s.Value != nil { - var err error - v, err = p.eval(s.Value) - if err != nil { - return err - } - } - return returnValue{v} - - case *WhileStmt: - // Simple "while (cond)" loop - for { - v, err := p.eval(s.Cond) - if err != nil { - return err - } - if !v.boolean() { - break - } - err = p.executes(s.Body) - if err == errBreak { - break - } - if err == errContinue { - continue - } - if err != nil { - return err - } - } - - case *DoWhileStmt: - // Do-while loop (tests condition after executing body) - for { - err := p.executes(s.Body) - if err == errBreak { - break - } - if err == errContinue { - continue - } - if err != nil { - return err - } - v, err := p.eval(s.Cond) - if err != nil { - return err - } - if !v.boolean() { - break - } - } - - // Break, continue, next, and exit statements - case *BreakStmt: - return errBreak - case *ContinueStmt: - return errContinue - case *NextStmt: - return errNext - case *ExitStmt: - if s.Status != nil { - status, err := p.eval(s.Status) - if err != nil { - return err - } - p.exitStatus = int(status.num()) - } - // Return special errExit value "caught" by top-level executor - return errExit - - case *DeleteStmt: - if len(s.Index) > 0 { - // Delete single key from array - index, err := p.evalIndex(s.Index) - if err != nil { - return err - } - array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)] - delete(array, index) // Does nothing if key isn't present - } else { - // Delete entire array - array := p.arrays[p.getArrayIndex(s.Array.Scope, s.Array.Index)] - for k := range array { - delete(array, k) - } - } - - case *BlockStmt: - // Nested block (just syntax, doesn't do anything) - return p.executes(s.Body) - +// Get a special variable by index +func (p *interp) getSpecial(index int) value { + switch index { + case ast.V_NF: + p.ensureFields() + return num(float64(p.numFields)) + case ast.V_NR: + return num(float64(p.lineNum)) + case ast.V_RLENGTH: + return num(float64(p.matchLength)) + case ast.V_RSTART: + return num(float64(p.matchStart)) + case ast.V_FNR: + return num(float64(p.fileLineNum)) + case ast.V_ARGC: + return num(float64(p.argc)) + case ast.V_CONVFMT: + return str(p.convertFormat) + case ast.V_FILENAME: + return p.filename + case ast.V_FS: + return str(p.fieldSep) + case ast.V_OFMT: + return str(p.outputFormat) + case ast.V_OFS: + return str(p.outputFieldSep) + case ast.V_ORS: + return str(p.outputRecordSep) + case ast.V_RS: + return str(p.recordSep) + case ast.V_RT: + return str(p.recordTerminator) + case ast.V_SUBSEP: + return str(p.subscriptSep) + case ast.V_INPUTMODE: + return str(inputModeString(p.inputMode, p.csvInputConfig)) + case ast.V_OUTPUTMODE: + return str(outputModeString(p.outputMode, p.csvOutputConfig)) default: - // Should never happen - panic(fmt.Sprintf("unexpected stmt type: %T", stmt)) - } - return nil -} - -// Evaluate a single expression, return expression value and error -func (p *interp) eval(expr Expr) (value, error) { - switch e := expr.(type) { - case *NumExpr: - // Number literal - return num(e.Value), nil - - case *StrExpr: - // String literal - return str(e.Value), nil - - case *FieldExpr: - // $n field expression - index, err := p.eval(e.Index) - if err != nil { - return null(), err - } - return p.getField(int(index.num())) - - case *VarExpr: - // Variable read expression (scope is global, local, or special) - return p.getVar(e.Scope, e.Index), nil - - case *RegExpr: - // Stand-alone /regex/ is equivalent to: $0 ~ /regex/ - re, err := p.compileRegex(e.Regex) - if err != nil { - return null(), err - } - return boolean(re.MatchString(p.line)), nil - - case *BinaryExpr: - // Binary expression. Note that && and || are special cases - // as they're short-circuit operators. - left, err := p.eval(e.Left) - if err != nil { - return null(), err - } - switch e.Op { - case AND: - if !left.boolean() { - return num(0), nil - } - right, err := p.eval(e.Right) - if err != nil { - return null(), err - } - return boolean(right.boolean()), nil - case OR: - if left.boolean() { - return num(1), nil - } - right, err := p.eval(e.Right) - if err != nil { - return null(), err - } - return boolean(right.boolean()), nil - default: - right, err := p.eval(e.Right) - if err != nil { - return null(), err - } - return p.evalBinary(e.Op, left, right) - } - - case *IncrExpr: - // Pre-increment, post-increment, pre-decrement, post-decrement - - // First evaluate the expression, but remember array or field - // index, so we don't evaluate part of the expression twice - exprValue, arrayIndex, fieldIndex, err := p.evalForAugAssign(e.Expr) - if err != nil { - return null(), err - } - - // Then convert to number and increment or decrement - exprNum := exprValue.num() - var incr float64 - if e.Op == INCR { - incr = exprNum + 1 - } else { - incr = exprNum - 1 - } - incrValue := num(incr) - - // Finally, assign back to expression and return the correct value - err = p.assignAug(e.Expr, arrayIndex, fieldIndex, incrValue) - if err != nil { - return null(), err - } - if e.Pre { - return incrValue, nil - } else { - return num(exprNum), nil - } - - case *AssignExpr: - // Assignment expression (returns right-hand side) - right, err := p.eval(e.Right) - if err != nil { - return null(), err - } - err = p.assign(e.Left, right) - if err != nil { - return null(), err - } - return right, nil - - case *AugAssignExpr: - // Augmented assignment like += (returns right-hand side) - right, err := p.eval(e.Right) - if err != nil { - return null(), err - } - left, arrayIndex, fieldIndex, err := p.evalForAugAssign(e.Left) - if err != nil { - return null(), err - } - right, err = p.evalBinary(e.Op, left, right) - if err != nil { - return null(), err - } - err = p.assignAug(e.Left, arrayIndex, fieldIndex, right) - if err != nil { - return null(), err - } - return right, nil - - case *CondExpr: - // C-like ?: ternary conditional operator - cond, err := p.eval(e.Cond) - if err != nil { - return null(), err - } - if cond.boolean() { - return p.eval(e.True) - } else { - return p.eval(e.False) - } - - case *IndexExpr: - // Read value from array by index - index, err := p.evalIndex(e.Index) - if err != nil { - return null(), err - } - return p.getArrayValue(e.Array.Scope, e.Array.Index, index), nil - - case *CallExpr: - // Call a builtin function - return p.callBuiltin(e.Func, e.Args) - - case *UnaryExpr: - // Unary ! or + or - - v, err := p.eval(e.Value) - if err != nil { - return null(), err - } - return p.evalUnary(e.Op, v), nil - - case *InExpr: - // "key in array" expression - index, err := p.evalIndex(e.Index) - if err != nil { - return null(), err - } - array := p.arrays[p.getArrayIndex(e.Array.Scope, e.Array.Index)] - _, ok := array[index] - return boolean(ok), nil - - case *UserCallExpr: - // Call user-defined or native Go function - if e.Native { - return p.callNative(e.Index, e.Args) - } else { - return p.callUser(e.Index, e.Args) - } - - case *GetlineExpr: - // Getline: read line from input - var line string - switch { - case e.Command != nil: - nameValue, err := p.eval(e.Command) - if err != nil { - return null(), err - } - name := p.toString(nameValue) - scanner, err := p.getInputScannerPipe(name) - if err != nil { - return null(), err - } - if !scanner.Scan() { - if err := scanner.Err(); err != nil { - return num(-1), nil - } - return num(0), nil - } - line = scanner.Text() - case e.File != nil: - nameValue, err := p.eval(e.File) - if err != nil { - return null(), err - } - name := p.toString(nameValue) - scanner, err := p.getInputScannerFile(name) - if err != nil { - if _, ok := err.(*os.PathError); ok { - // File not found is not a hard error, getline just returns -1. - // See: https://github.com/benhoyt/goawk/issues/41 - return num(-1), nil - } - return null(), err - } - if !scanner.Scan() { - if err := scanner.Err(); err != nil { - return num(-1), nil - } - return num(0), nil - } - line = scanner.Text() - default: - p.flushOutputAndError() // Flush output in case they've written a prompt - var err error - line, err = p.nextLine() - if err == io.EOF { - return num(0), nil - } - if err != nil { - return num(-1), nil - } - } - if e.Target != nil { - err := p.assign(e.Target, numStr(line)) - if err != nil { - return null(), err - } - } else { - p.setLine(line, false) - } - return num(1), nil - - default: - // Should never happen - panic(fmt.Sprintf("unexpected expr type: %T", expr)) - } -} - -func (p *interp) evalForAugAssign(expr Expr) (v value, arrayIndex string, fieldIndex int, err error) { - switch expr := expr.(type) { - case *VarExpr: - v = p.getVar(expr.Scope, expr.Index) - case *IndexExpr: - arrayIndex, err = p.evalIndex(expr.Index) - if err != nil { - return null(), "", 0, err - } - v = p.getArrayValue(expr.Array.Scope, expr.Array.Index, arrayIndex) - case *FieldExpr: - index, err := p.eval(expr.Index) - if err != nil { - return null(), "", 0, err - } - fieldIndex = int(index.num()) - v, err = p.getField(fieldIndex) - if err != nil { - return null(), "", 0, err - } - } - return v, arrayIndex, fieldIndex, nil -} - -func (p *interp) assignAug(expr Expr, arrayIndex string, fieldIndex int, v value) error { - switch expr := expr.(type) { - case *VarExpr: - return p.setVar(expr.Scope, expr.Index, v) - case *IndexExpr: - p.setArrayValue(expr.Array.Scope, expr.Array.Index, arrayIndex, v) - default: // *FieldExpr - return p.setField(fieldIndex, p.toString(v)) - } - return nil -} - -// Get a variable's value by index in given scope -func (p *interp) getVar(scope VarScope, index int) value { - switch scope { - case ScopeGlobal: - return p.globals[index] - case ScopeLocal: - return p.frame[index] - default: // ScopeSpecial - switch index { - case V_NF: - p.ensureFields() - return num(float64(p.numFields)) - case V_NR: - return num(float64(p.lineNum)) - case V_RLENGTH: - return num(float64(p.matchLength)) - case V_RSTART: - return num(float64(p.matchStart)) - case V_FNR: - return num(float64(p.fileLineNum)) - case V_ARGC: - return num(float64(p.argc)) - case V_CONVFMT: - return str(p.convertFormat) - case V_FILENAME: - return p.filename - case V_FS: - return str(p.fieldSep) - case V_OFMT: - return str(p.outputFormat) - case V_OFS: - return str(p.outputFieldSep) - case V_ORS: - return str(p.outputRecordSep) - case V_RS: - return str(p.recordSep) - case V_RT: - return str(p.recordTerminator) - case V_SUBSEP: - return str(p.subscriptSep) - default: - panic(fmt.Sprintf("unexpected special variable index: %d", index)) - } + panic(fmt.Sprintf("unexpected special variable index: %d", index)) } } // Set a variable by name (specials and globals only) func (p *interp) setVarByName(name, value string) error { - index := SpecialVarIndex(name) + index := ast.SpecialVarIndex(name) if index > 0 { - return p.setVar(ScopeSpecial, index, numStr(value)) + return p.setSpecial(index, numStr(value)) } index, ok := p.program.Scalars[name] if ok { - return p.setVar(ScopeGlobal, index, numStr(value)) + p.globals[index] = numStr(value) + return nil } // Ignore variables that aren't defined in program return nil } -// Set a variable by index in given scope to given value -func (p *interp) setVar(scope VarScope, index int, v value) error { - switch scope { - case ScopeGlobal: - p.globals[index] = v - return nil - case ScopeLocal: - p.frame[index] = v - return nil - default: // ScopeSpecial - switch index { - case V_NF: - numFields := int(v.num()) - if numFields < 0 { - return newError("NF set to negative value: %d", numFields) - } - if numFields > maxFieldIndex { - return newError("NF set too large: %d", numFields) - } - p.ensureFields() - p.numFields = numFields - if p.numFields < len(p.fields) { - p.fields = p.fields[:p.numFields] - p.fieldsIsTrueStr = p.fieldsIsTrueStr[:p.numFields] - } - for i := len(p.fields); i < p.numFields; i++ { - p.fields = append(p.fields, "") - p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false) - } - p.line = strings.Join(p.fields, p.outputFieldSep) - p.lineIsTrueStr = true - case V_NR: - p.lineNum = int(v.num()) - case V_RLENGTH: - p.matchLength = int(v.num()) - case V_RSTART: - p.matchStart = int(v.num()) - case V_FNR: - p.fileLineNum = int(v.num()) - case V_ARGC: - p.argc = int(v.num()) - case V_CONVFMT: - p.convertFormat = p.toString(v) - case V_FILENAME: - p.filename = v - case V_FS: - p.fieldSep = p.toString(v) - if utf8.RuneCountInString(p.fieldSep) > 1 { // compare to interp.ensureFields - re, err := regexp.Compile(p.fieldSep) - if err != nil { - return newError("invalid regex %q: %s", p.fieldSep, err) - } - p.fieldSepRegex = re - } - case V_OFMT: - p.outputFormat = p.toString(v) - case V_OFS: - p.outputFieldSep = p.toString(v) - case V_ORS: - p.outputRecordSep = p.toString(v) - case V_RS: - p.recordSep = p.toString(v) - switch { // compare to interp.newScanner - case len(p.recordSep) <= 1: - // Simple cases use specialized splitters, not regex - case utf8.RuneCountInString(p.recordSep) == 1: - // Multi-byte unicode char falls back to regex splitter - sep := regexp.QuoteMeta(p.recordSep) // not strictly necessary as no multi-byte chars are regex meta chars - p.recordSepRegex = regexp.MustCompile(sep) - default: - re, err := regexp.Compile(p.recordSep) - if err != nil { - return newError("invalid regex %q: %s", p.recordSep, err) - } - p.recordSepRegex = re - } - case V_RT: - p.recordTerminator = p.toString(v) - case V_SUBSEP: - p.subscriptSep = p.toString(v) - default: - panic(fmt.Sprintf("unexpected special variable index: %d", index)) +// Set special variable by index to given value +func (p *interp) setSpecial(index int, v value) error { + switch index { + case ast.V_NF: + numFields := int(v.num()) + if numFields < 0 { + return newError("NF set to negative value: %d", numFields) } - return nil + if numFields > maxFieldIndex { + return newError("NF set too large: %d", numFields) + } + p.ensureFields() + p.numFields = numFields + if p.numFields < len(p.fields) { + p.fields = p.fields[:p.numFields] + p.fieldsIsTrueStr = p.fieldsIsTrueStr[:p.numFields] + } + for i := len(p.fields); i < p.numFields; i++ { + p.fields = append(p.fields, "") + p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false) + } + p.line = p.joinFields(p.fields) + p.lineIsTrueStr = true + case ast.V_NR: + p.lineNum = int(v.num()) + case ast.V_RLENGTH: + p.matchLength = int(v.num()) + case ast.V_RSTART: + p.matchStart = int(v.num()) + case ast.V_FNR: + p.fileLineNum = int(v.num()) + case ast.V_ARGC: + p.argc = int(v.num()) + case ast.V_CONVFMT: + p.convertFormat = p.toString(v) + case ast.V_FILENAME: + p.filename = v + case ast.V_FS: + p.fieldSep = p.toString(v) + if utf8.RuneCountInString(p.fieldSep) > 1 { // compare to interp.ensureFields + re, err := regexp.Compile(compiler.AddRegexFlags(p.fieldSep)) + if err != nil { + return newError("invalid regex %q: %s", p.fieldSep, err) + } + p.fieldSepRegex = re + } + case ast.V_OFMT: + p.outputFormat = p.toString(v) + case ast.V_OFS: + p.outputFieldSep = p.toString(v) + case ast.V_ORS: + p.outputRecordSep = p.toString(v) + case ast.V_RS: + p.recordSep = p.toString(v) + switch { // compare to interp.newScanner + case len(p.recordSep) <= 1: + // Simple cases use specialized splitters, not regex + case utf8.RuneCountInString(p.recordSep) == 1: + // Multi-byte unicode char falls back to regex splitter + sep := regexp.QuoteMeta(p.recordSep) // not strictly necessary as no multi-byte chars are regex meta chars + p.recordSepRegex = regexp.MustCompile(sep) + default: + re, err := regexp.Compile(compiler.AddRegexFlags(p.recordSep)) + if err != nil { + return newError("invalid regex %q: %s", p.recordSep, err) + } + p.recordSepRegex = re + } + case ast.V_RT: + p.recordTerminator = p.toString(v) + case ast.V_SUBSEP: + p.subscriptSep = p.toString(v) + case ast.V_INPUTMODE: + var err error + p.inputMode, p.csvInputConfig, err = parseInputMode(p.toString(v)) + if err != nil { + return err + } + err = validateCSVInputConfig(p.inputMode, p.csvInputConfig) + if err != nil { + return err + } + case ast.V_OUTPUTMODE: + var err error + p.outputMode, p.csvOutputConfig, err = parseOutputMode(p.toString(v)) + if err != nil { + return err + } + err = validateCSVOutputConfig(p.outputMode, p.csvOutputConfig) + if err != nil { + return err + } + default: + panic(fmt.Sprintf("unexpected special variable index: %d", index)) } + return nil } // Determine the index of given array into the p.arrays slice. Global // arrays are just at p.arrays[index], local arrays have to be looked // up indirectly. -func (p *interp) getArrayIndex(scope VarScope, index int) int { - if scope == ScopeGlobal { +func (p *interp) arrayIndex(scope ast.VarScope, index int) int { + if scope == ast.ScopeGlobal { return index } else { return p.localArrays[len(p.localArrays)-1][index] } } -// Get a value from given array by key (index) -func (p *interp) getArrayValue(scope VarScope, arrayIndex int, index string) value { - resolved := p.getArrayIndex(scope, arrayIndex) - array := p.arrays[resolved] - v, ok := array[index] - if !ok { - // Strangely, per the POSIX spec, "Any other reference to a - // nonexistent array element [apart from "in" expressions] - // shall automatically create it." - array[index] = v - } - return v +// Return array with given scope and index. +func (p *interp) array(scope ast.VarScope, index int) map[string]value { + return p.arrays[p.arrayIndex(scope, index)] +} + +// Return local array with given index. +func (p *interp) localArray(index int) map[string]value { + return p.arrays[p.localArrays[len(p.localArrays)-1][index]] } // Set a value in given array by key (index) -func (p *interp) setArrayValue(scope VarScope, arrayIndex int, index string, v value) { - resolved := p.getArrayIndex(scope, arrayIndex) - p.arrays[resolved][index] = v +func (p *interp) setArrayValue(scope ast.VarScope, arrayIndex int, index string, v value) { + array := p.array(scope, arrayIndex) + array[index] = v } // Get the value of given numbered field, equivalent to "$index" -func (p *interp) getField(index int) (value, error) { - if index < 0 { - return null(), newError("field index negative: %d", index) - } +func (p *interp) getField(index int) value { if index == 0 { if p.lineIsTrueStr { - return str(p.line), nil + return str(p.line) } else { - return numStr(p.line), nil + return numStr(p.line) } } p.ensureFields() + if index < 1 { + index = len(p.fields) + 1 + index + if index < 1 { + return str("") + } + } if index > len(p.fields) { - return str(""), nil + return str("") } if p.fieldsIsTrueStr[index-1] { - return str(p.fields[index-1]), nil + return str(p.fields[index-1]) } else { - return numStr(p.fields[index-1]), nil + return numStr(p.fields[index-1]) } } +// Get the value of a field by name (for CSV/TSV mode), as in @"name". +func (p *interp) getFieldByName(name string) (value, error) { + if p.fieldIndexes == nil { + // Lazily create map of field names to indexes. + if p.fieldNames == nil { + return null(), newError(`@ only supported if header parsing enabled; use -H or add "header" to INPUTMODE`) + } + p.fieldIndexes = make(map[string]int, len(p.fieldNames)) + for i, n := range p.fieldNames { + p.fieldIndexes[n] = i + 1 + } + } + index := p.fieldIndexes[name] + if index == 0 { + return str(""), nil + } + return p.getField(index), nil +} + // Sets a single field, equivalent to "$index = value" func (p *interp) setField(index int, value string) error { if index == 0 { p.setLine(value, true) return nil } - if index < 0 { - return newError("field index negative: %d", index) - } if index > maxFieldIndex { return newError("field index too large: %d", index) } // If there aren't enough fields, add empty string fields in between p.ensureFields() + if index < 1 { + index = len(p.fields) + 1 + index + if index < 1 { + return nil + } + } for i := len(p.fields); i < index; i++ { p.fields = append(p.fields, "") p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, true) @@ -1186,11 +926,24 @@ func (p *interp) setField(index int, value string) error { p.fields[index-1] = value p.fieldsIsTrueStr[index-1] = true p.numFields = len(p.fields) - p.line = strings.Join(p.fields, p.outputFieldSep) + p.line = p.joinFields(p.fields) p.lineIsTrueStr = true return nil } +func (p *interp) joinFields(fields []string) string { + switch p.outputMode { + case CSVMode, TSVMode: + p.csvJoinFieldsBuf.Reset() + _ = p.writeCSV(&p.csvJoinFieldsBuf, fields) + line := p.csvJoinFieldsBuf.Bytes() + line = line[:len(line)-lenNewline(line)] + return string(line) + default: + return strings.Join(fields, p.outputFieldSep) + } +} + // Convert value to string using current CONVFMT func (p *interp) toString(v value) string { return v.str(p.convertFormat) @@ -1201,7 +954,7 @@ func (p *interp) compileRegex(regex string) (*regexp.Regexp, error) { if re, ok := p.regexCache[regex]; ok { return re, nil } - re, err := regexp.Compile(regex) + re, err := regexp.Compile(compiler.AddRegexFlags(regex)) if err != nil { return nil, newError("invalid regex %q: %s", regex, err) } @@ -1212,158 +965,139 @@ func (p *interp) compileRegex(regex string) (*regexp.Regexp, error) { return re, nil } -// Evaluate simple binary expression and return result -func (p *interp) evalBinary(op Token, l, r value) (value, error) { - // Note: cases are ordered (very roughly) in order of frequency - // of occurrence for performance reasons. Benchmark on common code - // before changing the order. - switch op { - case ADD: - return num(l.num() + r.num()), nil - case SUB: - return num(l.num() - r.num()), nil - case EQUALS: - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - return boolean(p.toString(l) == p.toString(r)), nil - } else { - return boolean(ln == rn), nil - } - case LESS: - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - return boolean(p.toString(l) < p.toString(r)), nil - } else { - return boolean(ln < rn), nil - } - case LTE: - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - return boolean(p.toString(l) <= p.toString(r)), nil - } else { - return boolean(ln <= rn), nil - } - case CONCAT: - return str(p.toString(l) + p.toString(r)), nil - case MUL: - return num(l.num() * r.num()), nil - case DIV: - rf := r.num() - if rf == 0.0 { - return null(), newError("division by zero") - } - return num(l.num() / rf), nil - case GREATER: - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - return boolean(p.toString(l) > p.toString(r)), nil - } else { - return boolean(ln > rn), nil - } - case GTE: - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - return boolean(p.toString(l) >= p.toString(r)), nil - } else { - return boolean(ln >= rn), nil - } - case NOT_EQUALS: - ln, lIsStr := l.isTrueStr() - rn, rIsStr := r.isTrueStr() - if lIsStr || rIsStr { - return boolean(p.toString(l) != p.toString(r)), nil - } else { - return boolean(ln != rn), nil - } - case MATCH: - re, err := p.compileRegex(p.toString(r)) - if err != nil { - return null(), err - } - matched := re.MatchString(p.toString(l)) - return boolean(matched), nil - case NOT_MATCH: - re, err := p.compileRegex(p.toString(r)) - if err != nil { - return null(), err - } - matched := re.MatchString(p.toString(l)) - return boolean(!matched), nil - case POW: - return num(math.Pow(l.num(), r.num())), nil - case MOD: - rf := r.num() - if rf == 0.0 { - return null(), newError("division by zero in mod") - } - return num(math.Mod(l.num(), rf)), nil +func getDefaultShellCommand() []string { + executable := "/bin/sh" + if runtime.GOOS == "windows" { + executable = "sh" + } + return []string{executable, "-c"} +} + +func inputModeString(mode IOMode, csvConfig CSVInputConfig) string { + var s string + var defaultSep rune + switch mode { + case CSVMode: + s = "csv" + defaultSep = ',' + case TSVMode: + s = "tsv" + defaultSep = '\t' + case DefaultMode: + return "" + } + if csvConfig.Separator != defaultSep { + s += " separator=" + string([]rune{csvConfig.Separator}) + } + if csvConfig.Comment != 0 { + s += " comment=" + string([]rune{csvConfig.Comment}) + } + if csvConfig.Header { + s += " header" + } + return s +} + +func parseInputMode(s string) (mode IOMode, csvConfig CSVInputConfig, err error) { + fields := strings.Fields(s) + if len(fields) == 0 { + return DefaultMode, CSVInputConfig{}, nil + } + switch fields[0] { + case "csv": + mode = CSVMode + csvConfig.Separator = ',' + case "tsv": + mode = TSVMode + csvConfig.Separator = '\t' default: - panic(fmt.Sprintf("unexpected binary operation: %s", op)) + return DefaultMode, CSVInputConfig{}, newError("invalid input mode %q", fields[0]) } + for _, field := range fields[1:] { + key := field + val := "" + equals := strings.IndexByte(field, '=') + if equals >= 0 { + key = field[:equals] + val = field[equals+1:] + } + switch key { + case "separator": + r, n := utf8.DecodeRuneInString(val) + if n == 0 || n < len(val) { + return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV separator %q", val) + } + csvConfig.Separator = r + case "comment": + r, n := utf8.DecodeRuneInString(val) + if n == 0 || n < len(val) { + return DefaultMode, CSVInputConfig{}, newError("invalid CSV/TSV comment character %q", val) + } + csvConfig.Comment = r + case "header": + if val != "" && val != "true" && val != "false" { + return DefaultMode, CSVInputConfig{}, newError("invalid header value %q", val) + } + csvConfig.Header = val == "" || val == "true" + default: + return DefaultMode, CSVInputConfig{}, newError("invalid input mode key %q", key) + } + } + return mode, csvConfig, nil } -// Evaluate unary expression and return result -func (p *interp) evalUnary(op Token, v value) value { - switch op { - case SUB: - return num(-v.num()) - case NOT: - return boolean(!v.boolean()) - case ADD: - return num(v.num()) +func outputModeString(mode IOMode, csvConfig CSVOutputConfig) string { + var s string + var defaultSep rune + switch mode { + case CSVMode: + s = "csv" + defaultSep = ',' + case TSVMode: + s = "tsv" + defaultSep = '\t' + case DefaultMode: + return "" + } + if csvConfig.Separator != defaultSep { + s += " separator=" + string([]rune{csvConfig.Separator}) + } + return s +} + +func parseOutputMode(s string) (mode IOMode, csvConfig CSVOutputConfig, err error) { + fields := strings.Fields(s) + if len(fields) == 0 { + return DefaultMode, CSVOutputConfig{}, nil + } + switch fields[0] { + case "csv": + mode = CSVMode + csvConfig.Separator = ',' + case "tsv": + mode = TSVMode + csvConfig.Separator = '\t' default: - panic(fmt.Sprintf("unexpected unary operation: %s", op)) + return DefaultMode, CSVOutputConfig{}, newError("invalid output mode %q", fields[0]) } -} - -// Perform an assignment: can assign to var, array[key], or $field -func (p *interp) assign(left Expr, right value) error { - switch left := left.(type) { - case *VarExpr: - return p.setVar(left.Scope, left.Index, right) - case *IndexExpr: - index, err := p.evalIndex(left.Index) - if err != nil { - return err - } - p.setArrayValue(left.Array.Scope, left.Array.Index, index, right) - return nil - case *FieldExpr: - index, err := p.eval(left.Index) - if err != nil { - return err - } - return p.setField(int(index.num()), p.toString(right)) - } - // Shouldn't happen - panic(fmt.Sprintf("unexpected lvalue type: %T", left)) -} - -// Evaluate an index expression to a string. Multi-valued indexes are -// separated by SUBSEP. -func (p *interp) evalIndex(indexExprs []Expr) (string, error) { - // Optimize the common case of a 1-dimensional index - if len(indexExprs) == 1 { - v, err := p.eval(indexExprs[0]) - if err != nil { - return "", err - } - return p.toString(v), nil - } - - // Up to 3-dimensional indices won't require heap allocation - indices := make([]string, 0, 3) - for _, expr := range indexExprs { - v, err := p.eval(expr) - if err != nil { - return "", err - } - indices = append(indices, p.toString(v)) - } - return strings.Join(indices, p.subscriptSep), nil + for _, field := range fields[1:] { + key := field + val := "" + equals := strings.IndexByte(field, '=') + if equals >= 0 { + key = field[:equals] + val = field[equals+1:] + } + switch key { + case "separator": + r, n := utf8.DecodeRuneInString(val) + if n == 0 || n < len(val) { + return DefaultMode, CSVOutputConfig{}, newError("invalid CSV/TSV separator %q", val) + } + csvConfig.Separator = r + default: + return DefaultMode, CSVOutputConfig{}, newError("invalid output mode key %q", key) + } + } + return mode, csvConfig, nil } diff --git a/play/vendor/github.com/benhoyt/goawk/interp/io.go b/play/vendor/github.com/benhoyt/goawk/interp/io.go index ca6b1e0..ec23c21 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/io.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/io.go @@ -5,16 +5,19 @@ package interp import ( "bufio" "bytes" + "encoding/csv" "fmt" "io" "io/ioutil" "os" + "os/exec" "regexp" + "runtime" "strconv" "strings" "unicode/utf8" - . "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/ast" . "github.com/benhoyt/goawk/lexer" ) @@ -27,6 +30,73 @@ func (p *interp) printLine(writer io.Writer, line string) error { return writeOutput(writer, p.outputRecordSep) } +// Print given arguments followed by a newline (for "print" statement). +func (p *interp) printArgs(writer io.Writer, args []value) error { + switch p.outputMode { + case CSVMode, TSVMode: + fields := make([]string, 0, 7) // up to 7 args won't require a heap allocation + for _, arg := range args { + fields = append(fields, arg.str(p.outputFormat)) + } + err := p.writeCSV(writer, fields) + if err != nil { + return err + } + default: + // Print OFS-separated args followed by ORS (usually newline). + for i, arg := range args { + if i > 0 { + err := writeOutput(writer, p.outputFieldSep) + if err != nil { + return err + } + } + err := writeOutput(writer, arg.str(p.outputFormat)) + if err != nil { + return err + } + } + err := writeOutput(writer, p.outputRecordSep) + if err != nil { + return err + } + } + return nil +} + +func (p *interp) writeCSV(output io.Writer, fields []string) error { + // If output is already a *bufio.Writer (the common case), csv.NewWriter + // will use it directly. This is not explicitly documented, but + // csv.NewWriter calls bufio.NewWriter which calls bufio.NewWriterSize + // with a 4KB buffer, and bufio.NewWriterSize is documented as returning + // the underlying bufio.Writer if it's passed a large enough one. + var flush func() error + _, isBuffered := output.(*bufio.Writer) + if !isBuffered { + // Otherwise create a new buffered writer and flush after writing. + if p.csvOutput == nil { + p.csvOutput = bufio.NewWriterSize(output, 4096) + } else { + p.csvOutput.Reset(output) + } + output = p.csvOutput + flush = p.csvOutput.Flush + } + + // Given the above, creating a new one of these is cheap. + writer := csv.NewWriter(output) + writer.Comma = p.csvOutputConfig.Separator + writer.UseCRLF = runtime.GOOS == "windows" + err := writer.Write(fields) + if err != nil { + return err + } + if flush != nil { + return flush() + } + return nil +} + // Implement a buffered version of WriteCloser so output is buffered // when redirecting to a file (eg: print >"out") type bufferedWriteCloser struct { @@ -49,16 +119,7 @@ func (wc *bufferedWriteCloser) Close() error { // Determine the output stream for given redirect token and // destination (file or pipe name) -func (p *interp) getOutputStream(redirect Token, dest Expr) (io.Writer, error) { - if redirect == ILLEGAL { - // Token "ILLEGAL" means send to standard output - return p.output, nil - } - - destValue, err := p.eval(dest) - if err != nil { - return nil, err - } +func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) { name := p.toString(destValue) if _, ok := p.inputStreams[name]; ok { return nil, newError("can't write to reader stream") @@ -121,6 +182,18 @@ func (p *interp) getOutputStream(redirect Token, dest Expr) (io.Writer, error) { } } +// Executes code using configured system shell +func (p *interp) execShell(code string) *exec.Cmd { + executable := p.shellCommand[0] + args := p.shellCommand[1:] + args = append(args, code) + if p.checkCtx { + return exec.CommandContext(p.ctx, executable, args...) + } else { + return exec.Command(executable, args...) + } +} + // Get input Scanner to use for "getline" based on file name func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) { if _, ok := p.outputStreams[name]; ok { @@ -134,7 +207,7 @@ func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) { if scanner, ok := p.scanners["-"]; ok { return scanner, nil } - scanner := p.newScanner(p.stdin) + scanner := p.newScanner(p.stdin, make([]byte, inputBufSize)) p.scanners[name] = scanner return scanner, nil } @@ -145,7 +218,7 @@ func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) { if err != nil { return nil, err // *os.PathError is handled by caller (getline returns -1) } - scanner := p.newScanner(r) + scanner := p.newScanner(r, make([]byte, inputBufSize)) p.scanners[name] = scanner p.inputStreams[name] = r return scanner, nil @@ -175,7 +248,7 @@ func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) { p.printErrorf("%s\n", err) return bufio.NewScanner(strings.NewReader("")), nil } - scanner := p.newScanner(r) + scanner := p.newScanner(r, make([]byte, inputBufSize)) p.commands[name] = cmd p.inputStreams[name] = r p.scanners[name] = scanner @@ -183,28 +256,53 @@ func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) { } // Create a new buffered Scanner for reading input records -func (p *interp) newScanner(input io.Reader) *bufio.Scanner { +func (p *interp) newScanner(input io.Reader, buffer []byte) *bufio.Scanner { scanner := bufio.NewScanner(input) switch { + case p.inputMode == CSVMode || p.inputMode == TSVMode: + splitter := csvSplitter{ + separator: p.csvInputConfig.Separator, + sepLen: utf8.RuneLen(p.csvInputConfig.Separator), + comment: p.csvInputConfig.Comment, + header: p.csvInputConfig.Header, + fields: &p.fields, + setFieldNames: p.setFieldNames, + } + scanner.Split(splitter.scan) case p.recordSep == "\n": // Scanner default is to split on newlines case p.recordSep == "": // Empty string for RS means split on \n\n (blank lines) - splitter := blankLineSplitter{&p.recordTerminator} + splitter := blankLineSplitter{terminator: &p.recordTerminator} scanner.Split(splitter.scan) case len(p.recordSep) == 1: - splitter := byteSplitter{p.recordSep[0]} + splitter := byteSplitter{sep: p.recordSep[0]} scanner.Split(splitter.scan) case utf8.RuneCountInString(p.recordSep) >= 1: // Multi-byte and single char but multi-byte RS use regex - splitter := regexSplitter{p.recordSepRegex, &p.recordTerminator} + splitter := regexSplitter{re: p.recordSepRegex, terminator: &p.recordTerminator} scanner.Split(splitter.scan) } - buffer := make([]byte, inputBufSize) scanner.Buffer(buffer, maxRecordLength) return scanner } +// setFieldNames is called by csvSplitter.scan on the first row (if the +// "header" option is specified). +func (p *interp) setFieldNames(names []string) { + p.fieldNames = names + p.fieldIndexes = nil // clear name-to-index cache + + // Populate FIELDS array (mapping of field indexes to field names). + fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"]) + for k := range fieldsArray { + delete(fieldsArray, k) + } + for i, name := range names { + fieldsArray[strconv.Itoa(i+1)] = str(name) + } +} + // Copied from bufio/scan.go in the stdlib: I guess it's a bit more // efficient than bytes.TrimSuffix(data, []byte("\r")) func dropCR(data []byte) []byte { @@ -323,10 +421,222 @@ func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, return 0, nil, nil } +// Splitter that splits records in CSV or TSV format. +type csvSplitter struct { + separator rune + sepLen int + comment rune + header bool + + recordBuffer []byte + fieldIndexes []int + noBOMCheck bool + + fields *[]string + setFieldNames func(names []string) + rowNum int +} + +// The structure of this code is taken from the stdlib encoding/csv Reader +// code, which is licensed under a compatible BSD-style license. +// +// We don't support all encoding/csv features: FieldsPerRecord is not +// supported, LazyQuotes is always on, and TrimLeadingSpace is always off. +func (s *csvSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) { + // Some CSV files are saved with a UTF-8 BOM at the start; skip it. + if !s.noBOMCheck && len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF { + data = data[3:] + advance = 3 + s.noBOMCheck = true + } + + origData := data + if atEOF && len(data) == 0 { + // No more data, tell Scanner to stop. + return 0, nil, nil + } + + readLine := func() []byte { + newline := bytes.IndexByte(data, '\n') + var line []byte + switch { + case newline >= 0: + // Process a single line (including newline). + line = data[:newline+1] + data = data[newline+1:] + case atEOF: + // If at EOF, we have a final record without a newline. + line = data + data = data[len(data):] + default: + // Need more data + return nil + } + + // For backwards compatibility, drop trailing \r before EOF. + if len(line) > 0 && atEOF && line[len(line)-1] == '\r' { + line = line[:len(line)-1] + advance++ + } + + return line + } + + // Read line (automatically skipping past empty lines and any comments). + skip := 0 + var line []byte + for { + line = readLine() + if len(line) == 0 { + return 0, nil, nil // Request more data + } + if s.comment != 0 && nextRune(line) == s.comment { + advance += len(line) + skip += len(line) + continue // Skip comment lines + } + if len(line) == lenNewline(line) { + advance += len(line) + skip += len(line) + continue // Skip empty lines + } + break + } + + // Parse each field in the record. + const quoteLen = len(`"`) + tokenHasCR := false + s.recordBuffer = s.recordBuffer[:0] + s.fieldIndexes = s.fieldIndexes[:0] +parseField: + for { + if len(line) == 0 || line[0] != '"' { + // Non-quoted string field + i := bytes.IndexRune(line, s.separator) + field := line + if i >= 0 { + advance += i + s.sepLen + field = field[:i] + } else { + advance += len(field) + field = field[:len(field)-lenNewline(field)] + } + s.recordBuffer = append(s.recordBuffer, field...) + s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) + if i >= 0 { + line = line[i+s.sepLen:] + continue parseField + } + break parseField + } else { + // Quoted string field + line = line[quoteLen:] + advance += quoteLen + for { + i := bytes.IndexByte(line, '"') + if i >= 0 { + // Hit next quote. + s.recordBuffer = append(s.recordBuffer, line[:i]...) + line = line[i+quoteLen:] + advance += i + quoteLen + switch rn := nextRune(line); { + case rn == '"': + // `""` sequence (append quote). + s.recordBuffer = append(s.recordBuffer, '"') + line = line[quoteLen:] + advance += quoteLen + case rn == s.separator: + // `",` sequence (end of field). + line = line[s.sepLen:] + s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) + advance += s.sepLen + continue parseField + case lenNewline(line) == len(line): + // `"\n` sequence (end of line). + s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) + advance += len(line) + break parseField + default: + // `"` sequence (bare quote). + s.recordBuffer = append(s.recordBuffer, '"') + } + } else if len(line) > 0 { + // Hit end of line (copy all data so far). + advance += len(line) + newlineLen := lenNewline(line) + if newlineLen == 2 { + tokenHasCR = true + s.recordBuffer = append(s.recordBuffer, line[:len(line)-2]...) + s.recordBuffer = append(s.recordBuffer, '\n') + } else { + s.recordBuffer = append(s.recordBuffer, line...) + } + line = readLine() + if line == nil { + return 0, nil, nil // Request more data + } + } else { + // Abrupt end of file. + s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer)) + advance += len(line) + break parseField + } + } + } + } + + // Create a single string and create slices out of it. + // This pins the memory of the fields together, but allocates once. + strBuf := string(s.recordBuffer) // Convert to string once to batch allocations + fields := make([]string, len(s.fieldIndexes)) + preIdx := 0 + for i, idx := range s.fieldIndexes { + fields[i] = strBuf[preIdx:idx] + preIdx = idx + } + + s.noBOMCheck = true + + if s.rowNum == 0 && s.header { + // Set header field names and advance, but don't return a line (token). + s.rowNum++ + s.setFieldNames(fields) + return advance, nil, nil + } + + // Normal row, set fields and return a line (token). + s.rowNum++ + *s.fields = fields + token = origData[skip:advance] + token = token[:len(token)-lenNewline(token)] + if tokenHasCR { + token = bytes.ReplaceAll(token, []byte{'\r'}, nil) + } + return advance, token, nil +} + +// lenNewline reports the number of bytes for the trailing \n. +func lenNewline(b []byte) int { + if len(b) > 0 && b[len(b)-1] == '\n' { + if len(b) > 1 && b[len(b)-2] == '\r' { + return 2 + } + return 1 + } + return 0 +} + +// nextRune returns the next rune in b or utf8.RuneError. +func nextRune(b []byte) rune { + r, _ := utf8.DecodeRune(b) + return r +} + // Setup for a new input file with given name (empty string if stdin) func (p *interp) setFile(filename string) { p.filename = numStr(filename) p.fileLineNum = 0 + p.hadFiles = true } // Setup for a new input line (but don't parse it into fields till we @@ -335,6 +645,7 @@ func (p *interp) setLine(line string, isTrueStr bool) { p.line = line p.lineIsTrueStr = isTrueStr p.haveFields = false + p.reparseCSV = true } // Ensure that the current line is parsed into fields, splitting it @@ -346,6 +657,23 @@ func (p *interp) ensureFields() { p.haveFields = true switch { + case p.inputMode == CSVMode || p.inputMode == TSVMode: + if p.reparseCSV { + scanner := bufio.NewScanner(strings.NewReader(p.line)) + scanner.Buffer(nil, maxRecordLength) + splitter := csvSplitter{ + separator: p.csvInputConfig.Separator, + sepLen: utf8.RuneLen(p.csvInputConfig.Separator), + comment: p.csvInputConfig.Comment, + fields: &p.fields, + } + scanner.Split(splitter.scan) + if !scanner.Scan() { + p.fields = nil + } + } else { + // Normally fields have already been parsed by csvSplitter + } case p.fieldSep == " ": // FS space (default) means split fields on any whitespace p.fields = strings.Fields(p.line) @@ -362,7 +690,7 @@ func (p *interp) ensureFields() { // Special case for when RS=="" and FS is single character, // split on newline in addition to FS. See more here: // https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html - if p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 { + if p.inputMode == DefaultMode && p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 { fields := make([]string, 0, len(p.fields)) for _, field := range p.fields { lines := strings.Split(field, "\n") @@ -374,7 +702,10 @@ func (p *interp) ensureFields() { p.fields = fields } - p.fieldsIsTrueStr = make([]bool, len(p.fields)) + p.fieldsIsTrueStr = p.fieldsIsTrueStr[:0] // avoid allocation most of the time + for range p.fields { + p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false) + } p.numFields = len(p.fields) } @@ -391,8 +722,7 @@ func (p *interp) nextLine() (string, error) { // Moved past number of ARGV args and haven't seen // any files yet, use stdin p.input = p.stdin - p.setFile("") - p.hadFiles = true + p.setFile("-") } else { if p.filenameIndex >= p.argc { // Done with ARGV args, all done with input @@ -403,15 +733,24 @@ func (p *interp) nextLine() (string, error) { // not present index := strconv.Itoa(p.filenameIndex) argvIndex := p.program.Arrays["ARGV"] - argvArray := p.arrays[p.getArrayIndex(ScopeGlobal, argvIndex)] + argvArray := p.array(ast.ScopeGlobal, argvIndex) filename := p.toString(argvArray[index]) p.filenameIndex++ // Is it actually a var=value assignment? - matches := varRegex.FindStringSubmatch(filename) + var matches []string + if !p.noArgVars { + matches = varRegex.FindStringSubmatch(filename) + } if len(matches) >= 3 { // Yep, set variable to value and keep going - err := p.setVarByName(matches[1], matches[2]) + name, val := matches[1], matches[2] + // Oddly, var=value args must interpret escapes (issue #129) + unescaped, err := Unescape(val) + if err == nil { + val = unescaped + } + err = p.setVarByName(name, val) if err != nil { return "", err } @@ -423,7 +762,7 @@ func (p *interp) nextLine() (string, error) { } else if filename == "-" { // ARGV arg is "-" meaning stdin p.input = p.stdin - p.setFile("") + p.setFile("-") } else { // A regular file name, open it if p.noFileReads { @@ -435,10 +774,12 @@ func (p *interp) nextLine() (string, error) { } p.input = input p.setFile(filename) - p.hadFiles = true } } - p.scanner = p.newScanner(p.input) + if p.inputBuffer == nil { // reuse buffer from last input file + p.inputBuffer = make([]byte, inputBufSize) + } + p.scanner = p.newScanner(p.input, p.inputBuffer) } p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars if p.scanner.Scan() { diff --git a/play/vendor/github.com/benhoyt/goawk/interp/newexecute.go b/play/vendor/github.com/benhoyt/goawk/interp/newexecute.go new file mode 100644 index 0000000..438fe6d --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/interp/newexecute.go @@ -0,0 +1,176 @@ +// The New...Execute API (allows you to efficiently execute the same program repeatedly). + +package interp + +import ( + "context" + "math" + + "github.com/benhoyt/goawk/parser" +) + +const checkContextOps = 1000 // for efficiency, only check context every N instructions + +// Interpreter is an interpreter for a specific program, allowing you to +// efficiently execute the same program over and over with different inputs. +// Use New to create an Interpreter. +// +// Most programs won't need reusable execution, and should use the simpler +// Exec or ExecProgram functions instead. +type Interpreter struct { + interp *interp +} + +// New creates a reusable interpreter for the given program. +// +// Most programs won't need reusable execution, and should use the simpler +// Exec or ExecProgram functions instead. +func New(program *parser.Program) (*Interpreter, error) { + p := newInterp(program) + return &Interpreter{interp: p}, nil +} + +// Execute runs this program with the given execution configuration (input, +// output, and variables) and returns the exit status code of the program. A +// nil config is valid and will use the defaults (zero values). +// +// Internal memory allocations are reused, so calling Execute on the same +// Interpreter instance is significantly more efficient than calling +// ExecProgram multiple times. +// +// I/O state is reset between each run, but variables and the random number +// generator seed are not; use ResetVars and ResetRand to reset those. +// +// It's best to set config.Environ to a non-nil slice, otherwise Execute will +// call the relatively inefficient os.Environ each time. Set config.Environ to +// []string{} if the script doesn't need environment variables, or call +// os.Environ once and set config.Environ to that value each execution. +// +// Note that config.Funcs must be the same value provided to +// parser.ParseProgram, and must not change between calls to Execute. +func (p *Interpreter) Execute(config *Config) (int, error) { + p.interp.resetCore() + p.interp.checkCtx = false + + err := p.interp.setExecuteConfig(config) + if err != nil { + return 0, err + } + + return p.interp.executeAll() +} + +func (p *interp) resetCore() { + p.scanner = nil + for k := range p.scanners { + delete(p.scanners, k) + } + p.input = nil + for k := range p.inputStreams { + delete(p.inputStreams, k) + } + for k := range p.outputStreams { + delete(p.outputStreams, k) + } + for k := range p.commands { + delete(p.commands, k) + } + + p.sp = 0 + p.localArrays = p.localArrays[:0] + p.callDepth = 0 + + p.filename = null() + p.line = "" + p.lineIsTrueStr = false + p.lineNum = 0 + p.fileLineNum = 0 + p.fields = nil + p.fieldsIsTrueStr = nil + p.numFields = 0 + p.haveFields = false + + p.exitStatus = 0 +} + +func (p *interp) resetVars() { + // Reset global scalars + for i := range p.globals { + p.globals[i] = null() + } + + // Reset global arrays + for _, array := range p.arrays { + for k := range array { + delete(array, k) + } + } + + // Reset special variables + p.convertFormat = "%.6g" + p.outputFormat = "%.6g" + p.fieldSep = " " + p.fieldSepRegex = nil + p.recordSep = "\n" + p.recordSepRegex = nil + p.recordTerminator = "" + p.outputFieldSep = " " + p.outputRecordSep = "\n" + p.subscriptSep = "\x1c" + p.matchLength = 0 + p.matchStart = 0 +} + +// ResetVars resets this interpreter's variables, setting scalar variables to +// null, clearing arrays, and resetting special variables such as FS and RS to +// their defaults. +func (p *Interpreter) ResetVars() { + p.interp.resetVars() +} + +// ResetRand resets this interpreter's random number generator seed, so that +// rand() produces the same sequence it would have after calling New. This is +// a relatively CPU-intensive operation. +func (p *Interpreter) ResetRand() { + p.interp.randSeed = 1.0 + p.interp.random.Seed(int64(math.Float64bits(p.interp.randSeed))) +} + +// ExecuteContext is like Execute, but takes a context to allow the caller to +// set an execution timeout or cancel the execution. For efficiency, the +// context is only tested every 1000 virtual machine instructions. +// +// Context handling is not preemptive: currently long-running operations like +// system() won't be interrupted. +func (p *Interpreter) ExecuteContext(ctx context.Context, config *Config) (int, error) { + p.interp.resetCore() + p.interp.checkCtx = ctx != context.Background() && ctx != context.TODO() + p.interp.ctx = ctx + p.interp.ctxDone = ctx.Done() + p.interp.ctxOps = 0 + + err := p.interp.setExecuteConfig(config) + if err != nil { + return 0, err + } + + return p.interp.executeAll() +} + +func (p *interp) checkContext() error { + p.ctxOps++ + if p.ctxOps < checkContextOps { + return nil + } + p.ctxOps = 0 + return p.checkContextNow() +} + +func (p *interp) checkContextNow() error { + select { + case <-p.ctxDone: + return p.ctx.Err() + default: + return nil + } +} diff --git a/play/vendor/github.com/benhoyt/goawk/interp/value.go b/play/vendor/github.com/benhoyt/goawk/interp/value.go index ef5a422..7ae95d6 100644 --- a/play/vendor/github.com/benhoyt/goawk/interp/value.go +++ b/play/vendor/github.com/benhoyt/goawk/interp/value.go @@ -53,6 +53,20 @@ func boolean(b bool) value { return num(0) } +// String returns a string representation of v for debugging. +func (v value) String() string { + switch v.typ { + case typeStr: + return fmt.Sprintf("str(%q)", v.s) + case typeNum: + return fmt.Sprintf("num(%s)", v.str("%.6g")) + case typeNumStr: + return fmt.Sprintf("numStr(%q)", v.s) + default: + return "null()" + } +} + // Return true if value is a "true string" (a string or a "numeric string" // from an input field that can't be converted to a number). If false, // also return the (possibly converted) number. @@ -61,7 +75,7 @@ func (v value) isTrueStr() (float64, bool) { case typeStr: return 0, true case typeNumStr: - f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64) + f, err := parseFloat(v.s) if err != nil { return 0, true } @@ -79,7 +93,7 @@ func (v value) boolean() bool { case typeStr: return v.s != "" case typeNumStr: - f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64) + f, err := parseFloat(v.s) if err != nil { return v.s != "" } @@ -89,6 +103,30 @@ func (v value) boolean() bool { } } +// Like strconv.ParseFloat, but allow hex floating point without exponent, and +// allow "+nan" and "-nan" (though they both return math.NaN()). Also disallow +// underscore digit separators. +func parseFloat(s string) (float64, error) { + s = strings.TrimSpace(s) + if len(s) > 1 && (s[0] == '+' || s[0] == '-') { + if len(s) == 4 && hasNaNPrefix(s[1:]) { + // ParseFloat doesn't handle "nan" with sign prefix, so handle it here. + return math.NaN(), nil + } + if len(s) > 3 && hasHexPrefix(s[1:]) && strings.IndexByte(s, 'p') < 0 { + s += "p0" + } + } else if len(s) > 2 && hasHexPrefix(s) && strings.IndexByte(s, 'p') < 0 { + s += "p0" + } + n, err := strconv.ParseFloat(s, 64) + if err == nil && strings.IndexByte(s, '_') >= 0 { + // Underscore separators aren't supported by AWK. + return 0, strconv.ErrSyntax + } + return n, err +} + // Return value's string value, or convert to a string using given // format if a number value. Integers are a special case and don't // use floatFormat. @@ -106,6 +144,9 @@ func (v value) str(floatFormat string) string { case v.n == float64(int(v.n)): return strconv.Itoa(int(v.n)) default: + if floatFormat == "%.6g" { + return strconv.FormatFloat(v.n, 'g', 6, 64) + } return fmt.Sprintf(floatFormat, v.n) } } @@ -137,20 +178,35 @@ func parseFloatPrefix(s string) float64 { } start := i - // Parse mantissa: optional sign, initial digit(s), optional '.', - // then more digits - gotDigit := false + // Parse optional sign and check for NaN and Inf. if i < len(s) && (s[i] == '+' || s[i] == '-') { i++ } - for i < len(s) && s[i] >= '0' && s[i] <= '9' { + if i+3 <= len(s) { + if hasNaNPrefix(s[i:]) { + return math.NaN() + } + if hasInfPrefix(s[i:]) { + if s[start] == '-' { + return math.Inf(-1) + } + return math.Inf(1) + } + } + + // Parse mantissa: initial digit(s), optional '.', then more digits + if i+2 < len(s) && hasHexPrefix(s[i:]) { + return parseHexFloatPrefix(s, start, i+2) + } + gotDigit := false + for i < len(s) && isDigit(s[i]) { gotDigit = true i++ } if i < len(s) && s[i] == '.' { i++ } - for i < len(s) && s[i] >= '0' && s[i] <= '9' { + for i < len(s) && isDigit(s[i]) { gotDigit = true i++ } @@ -166,7 +222,7 @@ func parseFloatPrefix(s string) float64 { if i < len(s) && (s[i] == '+' || s[i] == '-') { i++ } - for i < len(s) && s[i] >= '0' && s[i] <= '9' { + for i < len(s) && isDigit(s[i]) { i++ end = i } @@ -176,3 +232,63 @@ func parseFloatPrefix(s string) float64 { f, _ := strconv.ParseFloat(floatStr, 64) return f // Returns infinity in case of "value out of range" error } + +func hasHexPrefix(s string) bool { + return s[0] == '0' && (s[1] == 'x' || s[1] == 'X') +} + +func hasNaNPrefix(s string) bool { + return (s[0] == 'n' || s[0] == 'N') && (s[1] == 'a' || s[1] == 'A') && (s[2] == 'n' || s[2] == 'N') +} + +func hasInfPrefix(s string) bool { + return (s[0] == 'i' || s[0] == 'I') && (s[1] == 'n' || s[1] == 'N') && (s[2] == 'f' || s[2] == 'F') +} + +// Helper used by parseFloatPrefix to handle hexadecimal floating point. +func parseHexFloatPrefix(s string, start, i int) float64 { + gotDigit := false + for i < len(s) && isHexDigit(s[i]) { + gotDigit = true + i++ + } + if i < len(s) && s[i] == '.' { + i++ + } + for i < len(s) && isHexDigit(s[i]) { + gotDigit = true + i++ + } + if !gotDigit { + return 0 + } + + gotExponent := false + end := i + if i < len(s) && (s[i] == 'p' || s[i] == 'P') { + i++ + if i < len(s) && (s[i] == '+' || s[i] == '-') { + i++ + } + for i < len(s) && isDigit(s[i]) { + gotExponent = true + i++ + end = i + } + } + + floatStr := s[start:end] + if !gotExponent { + floatStr += "p0" // AWK allows "0x12", ParseFloat requires "0x12p0" + } + f, _ := strconv.ParseFloat(floatStr, 64) + return f // Returns infinity in case of "value out of range" error +} + +func isDigit(c byte) bool { + return c >= '0' && c <= '9' +} + +func isHexDigit(c byte) bool { + return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' +} diff --git a/play/vendor/github.com/benhoyt/goawk/interp/vm.go b/play/vendor/github.com/benhoyt/goawk/interp/vm.go new file mode 100644 index 0000000..49b07f5 --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/interp/vm.go @@ -0,0 +1,1259 @@ +// Virtual machine: interpret GoAWK compiled opcodes + +package interp + +import ( + "io" + "math" + "os" + "os/exec" + "strings" + "time" + + "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/compiler" + "github.com/benhoyt/goawk/lexer" +) + +// Execute a block of virtual machine instructions. +// +// A big switch seems to be the best way of doing this for now. I also tried +// an array of functions (https://github.com/benhoyt/goawk/commit/8e04b069b621ff9b9456de57a35ff2fe335cf201) +// and it was ever so slightly faster, but the code was harder to work with +// and it won't be improved when Go gets faster switches via jump tables +// (https://go-review.googlesource.com/c/go/+/357330/). +// +// Additionally, I've made this version faster since the above test by +// reducing the number of opcodes (replacing a couple dozen Call* opcodes with +// a single CallBuiltin -- that probably pushed it below a switch binary tree +// branch threshold). +func (p *interp) execute(code []compiler.Opcode) error { + for ip := 0; ip < len(code); { + op := code[ip] + ip++ + + if p.checkCtx { + err := p.checkContext() + if err != nil { + return err + } + } + + switch op { + case compiler.Num: + index := code[ip] + ip++ + p.push(num(p.nums[index])) + + case compiler.Str: + index := code[ip] + ip++ + p.push(str(p.strs[index])) + + case compiler.Dupe: + v := p.peekTop() + p.push(v) + + case compiler.Drop: + p.pop() + + case compiler.Swap: + l, r := p.peekTwo() + p.replaceTwo(r, l) + + case compiler.Field: + index := p.peekTop() + v := p.getField(int(index.num())) + p.replaceTop(v) + + case compiler.FieldInt: + index := code[ip] + ip++ + v := p.getField(int(index)) + p.push(v) + + case compiler.FieldByName: + fieldName := p.peekTop() + field, err := p.getFieldByName(p.toString(fieldName)) + if err != nil { + return err + } + p.replaceTop(field) + + case compiler.FieldByNameStr: + index := code[ip] + fieldName := p.strs[index] + ip++ + field, err := p.getFieldByName(fieldName) + if err != nil { + return err + } + p.push(field) + + case compiler.Global: + index := code[ip] + ip++ + p.push(p.globals[index]) + + case compiler.Local: + index := code[ip] + ip++ + p.push(p.frame[index]) + + case compiler.Special: + index := code[ip] + ip++ + p.push(p.getSpecial(int(index))) + + case compiler.ArrayGlobal: + arrayIndex := code[ip] + ip++ + array := p.arrays[arrayIndex] + index := p.toString(p.peekTop()) + v := arrayGet(array, index) + p.replaceTop(v) + + case compiler.ArrayLocal: + arrayIndex := code[ip] + ip++ + array := p.localArray(int(arrayIndex)) + index := p.toString(p.peekTop()) + v := arrayGet(array, index) + p.replaceTop(v) + + case compiler.InGlobal: + arrayIndex := code[ip] + ip++ + array := p.arrays[arrayIndex] + index := p.toString(p.peekTop()) + _, ok := array[index] + p.replaceTop(boolean(ok)) + + case compiler.InLocal: + arrayIndex := code[ip] + ip++ + array := p.localArray(int(arrayIndex)) + index := p.toString(p.peekTop()) + _, ok := array[index] + p.replaceTop(boolean(ok)) + + case compiler.AssignField: + right, index := p.popTwo() + err := p.setField(int(index.num()), p.toString(right)) + if err != nil { + return err + } + + case compiler.AssignGlobal: + index := code[ip] + ip++ + p.globals[index] = p.pop() + + case compiler.AssignLocal: + index := code[ip] + ip++ + p.frame[index] = p.pop() + + case compiler.AssignSpecial: + index := code[ip] + ip++ + err := p.setSpecial(int(index), p.pop()) + if err != nil { + return err + } + + case compiler.AssignArrayGlobal: + arrayIndex := code[ip] + ip++ + array := p.arrays[arrayIndex] + v, index := p.popTwo() + array[p.toString(index)] = v + + case compiler.AssignArrayLocal: + arrayIndex := code[ip] + ip++ + array := p.localArray(int(arrayIndex)) + v, index := p.popTwo() + array[p.toString(index)] = v + + case compiler.Delete: + arrayScope := code[ip] + arrayIndex := code[ip+1] + ip += 2 + array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + index := p.toString(p.pop()) + delete(array, index) + + case compiler.DeleteAll: + arrayScope := code[ip] + arrayIndex := code[ip+1] + ip += 2 + array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + for k := range array { + delete(array, k) + } + + case compiler.IncrField: + amount := code[ip] + ip++ + index := int(p.pop().num()) + v := p.getField(index) + err := p.setField(index, p.toString(num(v.num()+float64(amount)))) + if err != nil { + return err + } + + case compiler.IncrGlobal: + amount := code[ip] + index := code[ip+1] + ip += 2 + p.globals[index] = num(p.globals[index].num() + float64(amount)) + + case compiler.IncrLocal: + amount := code[ip] + index := code[ip+1] + ip += 2 + p.frame[index] = num(p.frame[index].num() + float64(amount)) + + case compiler.IncrSpecial: + amount := code[ip] + index := int(code[ip+1]) + ip += 2 + v := p.getSpecial(index) + err := p.setSpecial(index, num(v.num()+float64(amount))) + if err != nil { + return err + } + + case compiler.IncrArrayGlobal: + amount := code[ip] + arrayIndex := code[ip+1] + ip += 2 + array := p.arrays[arrayIndex] + index := p.toString(p.pop()) + array[index] = num(array[index].num() + float64(amount)) + + case compiler.IncrArrayLocal: + amount := code[ip] + arrayIndex := code[ip+1] + ip += 2 + array := p.localArray(int(arrayIndex)) + index := p.toString(p.pop()) + array[index] = num(array[index].num() + float64(amount)) + + case compiler.AugAssignField: + operation := compiler.AugOp(code[ip]) + ip++ + right, indexVal := p.popTwo() + index := int(indexVal.num()) + field := p.getField(index) + v, err := p.augAssignOp(operation, field, right) + if err != nil { + return err + } + err = p.setField(index, p.toString(v)) + if err != nil { + return err + } + + case compiler.AugAssignGlobal: + operation := compiler.AugOp(code[ip]) + index := code[ip+1] + ip += 2 + v, err := p.augAssignOp(operation, p.globals[index], p.pop()) + if err != nil { + return err + } + p.globals[index] = v + + case compiler.AugAssignLocal: + operation := compiler.AugOp(code[ip]) + index := code[ip+1] + ip += 2 + v, err := p.augAssignOp(operation, p.frame[index], p.pop()) + if err != nil { + return err + } + p.frame[index] = v + + case compiler.AugAssignSpecial: + operation := compiler.AugOp(code[ip]) + index := int(code[ip+1]) + ip += 2 + v, err := p.augAssignOp(operation, p.getSpecial(index), p.pop()) + if err != nil { + return err + } + err = p.setSpecial(index, v) + if err != nil { + return err + } + + case compiler.AugAssignArrayGlobal: + operation := compiler.AugOp(code[ip]) + arrayIndex := code[ip+1] + ip += 2 + array := p.arrays[arrayIndex] + index := p.toString(p.pop()) + v, err := p.augAssignOp(operation, array[index], p.pop()) + if err != nil { + return err + } + array[index] = v + + case compiler.AugAssignArrayLocal: + operation := compiler.AugOp(code[ip]) + arrayIndex := code[ip+1] + ip += 2 + array := p.localArray(int(arrayIndex)) + right, indexVal := p.popTwo() + index := p.toString(indexVal) + v, err := p.augAssignOp(operation, array[index], right) + if err != nil { + return err + } + array[index] = v + + case compiler.Regex: + // Stand-alone /regex/ is equivalent to: $0 ~ /regex/ + index := code[ip] + ip++ + re := p.regexes[index] + p.push(boolean(re.MatchString(p.line))) + + case compiler.IndexMulti: + numValues := int(code[ip]) + ip++ + values := p.popSlice(numValues) + indices := make([]string, 0, 3) // up to 3-dimensional indices won't require heap allocation + for _, v := range values { + indices = append(indices, p.toString(v)) + } + p.push(str(strings.Join(indices, p.subscriptSep))) + + case compiler.Add: + l, r := p.peekPop() + p.replaceTop(num(l.num() + r.num())) + + case compiler.Subtract: + l, r := p.peekPop() + p.replaceTop(num(l.num() - r.num())) + + case compiler.Multiply: + l, r := p.peekPop() + p.replaceTop(num(l.num() * r.num())) + + case compiler.Divide: + l, r := p.peekPop() + rf := r.num() + if rf == 0.0 { + return newError("division by zero") + } + p.replaceTop(num(l.num() / rf)) + + case compiler.Power: + l, r := p.peekPop() + p.replaceTop(num(math.Pow(l.num(), r.num()))) + + case compiler.Modulo: + l, r := p.peekPop() + rf := r.num() + if rf == 0.0 { + return newError("division by zero in mod") + } + p.replaceTop(num(math.Mod(l.num(), rf))) + + case compiler.Equals: + l, r := p.peekPop() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + p.replaceTop(boolean(p.toString(l) == p.toString(r))) + } else { + p.replaceTop(boolean(ln == rn)) + } + + case compiler.NotEquals: + l, r := p.peekPop() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + p.replaceTop(boolean(p.toString(l) != p.toString(r))) + } else { + p.replaceTop(boolean(ln != rn)) + } + + case compiler.Less: + l, r := p.peekPop() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + p.replaceTop(boolean(p.toString(l) < p.toString(r))) + } else { + p.replaceTop(boolean(ln < rn)) + } + + case compiler.Greater: + l, r := p.peekPop() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + p.replaceTop(boolean(p.toString(l) > p.toString(r))) + } else { + p.replaceTop(boolean(ln > rn)) + } + + case compiler.LessOrEqual: + l, r := p.peekPop() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + p.replaceTop(boolean(p.toString(l) <= p.toString(r))) + } else { + p.replaceTop(boolean(ln <= rn)) + } + + case compiler.GreaterOrEqual: + l, r := p.peekPop() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + if lIsStr || rIsStr { + p.replaceTop(boolean(p.toString(l) >= p.toString(r))) + } else { + p.replaceTop(boolean(ln >= rn)) + } + + case compiler.Concat: + l, r := p.peekPop() + p.replaceTop(str(p.toString(l) + p.toString(r))) + + case compiler.ConcatMulti: + numValues := int(code[ip]) + ip++ + values := p.popSlice(numValues) + var sb strings.Builder + + for _, v := range values { + sb.WriteString(p.toString(v)) + } + p.push(str(sb.String())) + + case compiler.Match: + l, r := p.peekPop() + re, err := p.compileRegex(p.toString(r)) + if err != nil { + return err + } + matched := re.MatchString(p.toString(l)) + p.replaceTop(boolean(matched)) + + case compiler.NotMatch: + l, r := p.peekPop() + re, err := p.compileRegex(p.toString(r)) + if err != nil { + return err + } + matched := re.MatchString(p.toString(l)) + p.replaceTop(boolean(!matched)) + + case compiler.Not: + p.replaceTop(boolean(!p.peekTop().boolean())) + + case compiler.UnaryMinus: + p.replaceTop(num(-p.peekTop().num())) + + case compiler.UnaryPlus: + p.replaceTop(num(p.peekTop().num())) + + case compiler.Boolean: + p.replaceTop(boolean(p.peekTop().boolean())) + + case compiler.Jump: + offset := code[ip] + ip += 1 + int(offset) + + case compiler.JumpFalse: + offset := code[ip] + ip++ + v := p.pop() + if !v.boolean() { + ip += int(offset) + } + + case compiler.JumpTrue: + offset := code[ip] + ip++ + v := p.pop() + if v.boolean() { + ip += int(offset) + } + + case compiler.JumpEquals: + offset := code[ip] + ip++ + l, r := p.popTwo() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + var b bool + if lIsStr || rIsStr { + b = p.toString(l) == p.toString(r) + } else { + b = ln == rn + } + if b { + ip += int(offset) + } + + case compiler.JumpNotEquals: + offset := code[ip] + ip++ + l, r := p.popTwo() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + var b bool + if lIsStr || rIsStr { + b = p.toString(l) != p.toString(r) + } else { + b = ln != rn + } + if b { + ip += int(offset) + } + + case compiler.JumpLess: + offset := code[ip] + ip++ + l, r := p.popTwo() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + var b bool + if lIsStr || rIsStr { + b = p.toString(l) < p.toString(r) + } else { + b = ln < rn + } + if b { + ip += int(offset) + } + + case compiler.JumpGreater: + offset := code[ip] + ip++ + l, r := p.popTwo() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + var b bool + if lIsStr || rIsStr { + b = p.toString(l) > p.toString(r) + } else { + b = ln > rn + } + if b { + ip += int(offset) + } + + case compiler.JumpLessOrEqual: + offset := code[ip] + ip++ + l, r := p.popTwo() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + var b bool + if lIsStr || rIsStr { + b = p.toString(l) <= p.toString(r) + } else { + b = ln <= rn + } + if b { + ip += int(offset) + } + + case compiler.JumpGreaterOrEqual: + offset := code[ip] + ip++ + l, r := p.popTwo() + ln, lIsStr := l.isTrueStr() + rn, rIsStr := r.isTrueStr() + var b bool + if lIsStr || rIsStr { + b = p.toString(l) >= p.toString(r) + } else { + b = ln >= rn + } + if b { + ip += int(offset) + } + + case compiler.Next: + return errNext + + case compiler.Exit: + p.exitStatus = int(p.pop().num()) + // Return special errExit value "caught" by top-level executor + return errExit + + case compiler.ForIn: + varScope := code[ip] + varIndex := code[ip+1] + arrayScope := code[ip+2] + arrayIndex := code[ip+3] + offset := code[ip+4] + ip += 5 + array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + loopCode := code[ip : ip+int(offset)] + for index := range array { + switch ast.VarScope(varScope) { + case ast.ScopeGlobal: + p.globals[varIndex] = str(index) + case ast.ScopeLocal: + p.frame[varIndex] = str(index) + default: // ScopeSpecial + err := p.setSpecial(int(varIndex), str(index)) + if err != nil { + return err + } + } + err := p.execute(loopCode) + if err == errBreak { + break + } + if err != nil { + return err + } + } + ip += int(offset) + + case compiler.BreakForIn: + return errBreak + + case compiler.CallBuiltin: + builtinOp := compiler.BuiltinOp(code[ip]) + ip++ + err := p.callBuiltin(builtinOp) + if err != nil { + return err + } + + case compiler.CallSplit: + arrayScope := code[ip] + arrayIndex := code[ip+1] + ip += 2 + s := p.toString(p.peekTop()) + n, err := p.split(s, ast.VarScope(arrayScope), int(arrayIndex), p.fieldSep) + if err != nil { + return err + } + p.replaceTop(num(float64(n))) + + case compiler.CallSplitSep: + arrayScope := code[ip] + arrayIndex := code[ip+1] + ip += 2 + s, fieldSep := p.peekPop() + n, err := p.split(p.toString(s), ast.VarScope(arrayScope), int(arrayIndex), p.toString(fieldSep)) + if err != nil { + return err + } + p.replaceTop(num(float64(n))) + + case compiler.CallSprintf: + numArgs := code[ip] + ip++ + args := p.popSlice(int(numArgs)) + s, err := p.sprintf(p.toString(args[0]), args[1:]) + if err != nil { + return err + } + p.push(str(s)) + + case compiler.CallUser: + funcIndex := code[ip] + numArrayArgs := int(code[ip+1]) + ip += 2 + + f := p.program.Compiled.Functions[funcIndex] + if p.callDepth >= maxCallDepth { + return newError("calling %q exceeded maximum call depth of %d", f.Name, maxCallDepth) + } + + // Set up frame for scalar arguments + oldFrame := p.frame + p.frame = p.peekSlice(f.NumScalars) + + // Handle array arguments + var arrays []int + for j := 0; j < numArrayArgs; j++ { + arrayScope := ast.VarScope(code[ip]) + arrayIndex := int(code[ip+1]) + ip += 2 + arrays = append(arrays, p.arrayIndex(arrayScope, arrayIndex)) + } + oldArraysLen := len(p.arrays) + for j := numArrayArgs; j < f.NumArrays; j++ { + arrays = append(arrays, len(p.arrays)) + p.arrays = append(p.arrays, make(map[string]value)) + } + p.localArrays = append(p.localArrays, arrays) + + // Execute the function! + p.callDepth++ + err := p.execute(f.Body) + p.callDepth-- + + // Pop the locals off the stack + p.popSlice(f.NumScalars) + p.frame = oldFrame + p.localArrays = p.localArrays[:len(p.localArrays)-1] + p.arrays = p.arrays[:oldArraysLen] + + if r, ok := err.(returnValue); ok { + p.push(r.Value) + } else if err != nil { + return err + } else { + p.push(null()) + } + + case compiler.CallNative: + funcIndex := int(code[ip]) + numArgs := int(code[ip+1]) + ip += 2 + + args := p.popSlice(numArgs) + r, err := p.callNative(funcIndex, args) + if err != nil { + return err + } + p.push(r) + + case compiler.Return: + v := p.pop() + return returnValue{v} + + case compiler.ReturnNull: + return returnValue{null()} + + case compiler.Nulls: + numNulls := int(code[ip]) + ip++ + p.pushNulls(numNulls) + + case compiler.Print: + numArgs := code[ip] + redirect := lexer.Token(code[ip+1]) + ip += 2 + + args := p.popSlice(int(numArgs)) + + // Determine what output stream to write to. + output := p.output + if redirect != lexer.ILLEGAL { + var err error + dest := p.pop() + output, err = p.getOutputStream(redirect, dest) + if err != nil { + return err + } + } + + if numArgs > 0 { + err := p.printArgs(output, args) + if err != nil { + return err + } + } else { + // "print" with no arguments prints the raw value of $0, + // regardless of output mode. + err := p.printLine(output, p.line) + if err != nil { + return err + } + } + + case compiler.Printf: + numArgs := code[ip] + redirect := lexer.Token(code[ip+1]) + ip += 2 + + args := p.popSlice(int(numArgs)) + s, err := p.sprintf(p.toString(args[0]), args[1:]) + if err != nil { + return err + } + + output := p.output + if redirect != lexer.ILLEGAL { + dest := p.pop() + output, err = p.getOutputStream(redirect, dest) + if err != nil { + return err + } + } + err = writeOutput(output, s) + if err != nil { + return err + } + + case compiler.Getline: + redirect := lexer.Token(code[ip]) + ip++ + + ret, line, err := p.getline(redirect) + if err != nil { + return err + } + if ret == 1 { + p.setLine(line, false) + } + p.push(num(ret)) + + case compiler.GetlineField: + redirect := lexer.Token(code[ip]) + ip++ + + ret, line, err := p.getline(redirect) + if err != nil { + return err + } + if ret == 1 { + err := p.setField(0, line) + if err != nil { + return err + } + } + p.push(num(ret)) + + case compiler.GetlineGlobal: + redirect := lexer.Token(code[ip]) + index := code[ip+1] + ip += 2 + + ret, line, err := p.getline(redirect) + if err != nil { + return err + } + if ret == 1 { + p.globals[index] = numStr(line) + } + p.push(num(ret)) + + case compiler.GetlineLocal: + redirect := lexer.Token(code[ip]) + index := code[ip+1] + ip += 2 + + ret, line, err := p.getline(redirect) + if err != nil { + return err + } + if ret == 1 { + p.frame[index] = numStr(line) + } + p.push(num(ret)) + + case compiler.GetlineSpecial: + redirect := lexer.Token(code[ip]) + index := code[ip+1] + ip += 2 + + ret, line, err := p.getline(redirect) + if err != nil { + return err + } + if ret == 1 { + err := p.setSpecial(int(index), numStr(line)) + if err != nil { + return err + } + } + p.push(num(ret)) + + case compiler.GetlineArray: + redirect := lexer.Token(code[ip]) + arrayScope := code[ip+1] + arrayIndex := code[ip+2] + ip += 3 + + ret, line, err := p.getline(redirect) + if err != nil { + return err + } + index := p.toString(p.peekTop()) + if ret == 1 { + array := p.array(ast.VarScope(arrayScope), int(arrayIndex)) + array[index] = numStr(line) + } + p.replaceTop(num(ret)) + } + } + + return nil +} + +func (p *interp) callBuiltin(builtinOp compiler.BuiltinOp) error { + switch builtinOp { + case compiler.BuiltinAtan2: + y, x := p.peekPop() + p.replaceTop(num(math.Atan2(y.num(), x.num()))) + + case compiler.BuiltinClose: + name := p.toString(p.peekTop()) + var c io.Closer = p.inputStreams[name] + if c != nil { + // Close input stream + delete(p.inputStreams, name) + err := c.Close() + if err != nil { + p.replaceTop(num(-1)) + } else { + p.replaceTop(num(0)) + } + } else { + c = p.outputStreams[name] + if c != nil { + // Close output stream + delete(p.outputStreams, name) + err := c.Close() + if err != nil { + p.replaceTop(num(-1)) + } else { + p.replaceTop(num(0)) + } + } else { + // Nothing to close + p.replaceTop(num(-1)) + } + } + + case compiler.BuiltinCos: + p.replaceTop(num(math.Cos(p.peekTop().num()))) + + case compiler.BuiltinExp: + p.replaceTop(num(math.Exp(p.peekTop().num()))) + + case compiler.BuiltinFflush: + name := p.toString(p.peekTop()) + var ok bool + if name != "" { + // Flush a single, named output stream + ok = p.flushStream(name) + } else { + // fflush() or fflush("") flushes all output streams + ok = p.flushAll() + } + if !ok { + p.replaceTop(num(-1)) + } else { + p.replaceTop(num(0)) + } + + case compiler.BuiltinFflushAll: + ok := p.flushAll() + if !ok { + p.push(num(-1)) + } else { + p.push(num(0)) + } + + case compiler.BuiltinGsub: + regex, repl, in := p.peekPeekPop() + out, n, err := p.sub(p.toString(regex), p.toString(repl), p.toString(in), true) + if err != nil { + return err + } + p.replaceTwo(num(float64(n)), str(out)) + + case compiler.BuiltinIndex: + sValue, substr := p.peekPop() + s := p.toString(sValue) + index := strings.Index(s, p.toString(substr)) + p.replaceTop(num(float64(index + 1))) + + case compiler.BuiltinInt: + p.replaceTop(num(float64(int(p.peekTop().num())))) + + case compiler.BuiltinLength: + p.push(num(float64(len(p.line)))) + + case compiler.BuiltinLengthArg: + s := p.toString(p.peekTop()) + p.replaceTop(num(float64(len(s)))) + + case compiler.BuiltinLog: + p.replaceTop(num(math.Log(p.peekTop().num()))) + + case compiler.BuiltinMatch: + sValue, regex := p.peekPop() + s := p.toString(sValue) + re, err := p.compileRegex(p.toString(regex)) + if err != nil { + return err + } + loc := re.FindStringIndex(s) + if loc == nil { + p.matchStart = 0 + p.matchLength = -1 + p.replaceTop(num(0)) + } else { + p.matchStart = loc[0] + 1 + p.matchLength = loc[1] - loc[0] + p.replaceTop(num(float64(p.matchStart))) + } + + case compiler.BuiltinRand: + p.push(num(p.random.Float64())) + + case compiler.BuiltinSin: + p.replaceTop(num(math.Sin(p.peekTop().num()))) + + case compiler.BuiltinSqrt: + p.replaceTop(num(math.Sqrt(p.peekTop().num()))) + + case compiler.BuiltinSrand: + prevSeed := p.randSeed + p.random.Seed(time.Now().UnixNano()) + p.push(num(prevSeed)) + + case compiler.BuiltinSrandSeed: + prevSeed := p.randSeed + p.randSeed = p.peekTop().num() + p.random.Seed(int64(math.Float64bits(p.randSeed))) + p.replaceTop(num(prevSeed)) + + case compiler.BuiltinSub: + regex, repl, in := p.peekPeekPop() + out, n, err := p.sub(p.toString(regex), p.toString(repl), p.toString(in), false) + if err != nil { + return err + } + p.replaceTwo(num(float64(n)), str(out)) + + case compiler.BuiltinSubstr: + sValue, posValue := p.peekPop() + pos := int(posValue.num()) + s := p.toString(sValue) + if pos > len(s) { + pos = len(s) + 1 + } + if pos < 1 { + pos = 1 + } + length := len(s) - pos + 1 + p.replaceTop(str(s[pos-1 : pos-1+length])) + + case compiler.BuiltinSubstrLength: + posValue, lengthValue := p.popTwo() + length := int(lengthValue.num()) + pos := int(posValue.num()) + s := p.toString(p.peekTop()) + if pos > len(s) { + pos = len(s) + 1 + } + if pos < 1 { + pos = 1 + } + maxLength := len(s) - pos + 1 + if length < 0 { + length = 0 + } + if length > maxLength { + length = maxLength + } + p.replaceTop(str(s[pos-1 : pos-1+length])) + + case compiler.BuiltinSystem: + if p.noExec { + return newError("can't call system() due to NoExec") + } + cmdline := p.toString(p.peekTop()) + cmd := p.execShell(cmdline) + cmd.Stdin = p.stdin + cmd.Stdout = p.output + cmd.Stderr = p.errorOutput + _ = p.flushAll() // ensure synchronization + err := cmd.Run() + ret := 0.0 + if err != nil { + if p.checkCtx && p.ctx.Err() != nil { + return p.ctx.Err() + } + if exitErr, ok := err.(*exec.ExitError); ok { + ret = float64(exitErr.ProcessState.ExitCode()) + } else { + p.printErrorf("%v\n", err) + ret = -1 + } + } + p.replaceTop(num(ret)) + + case compiler.BuiltinTolower: + p.replaceTop(str(strings.ToLower(p.toString(p.peekTop())))) + + case compiler.BuiltinToupper: + p.replaceTop(str(strings.ToUpper(p.toString(p.peekTop())))) + } + + return nil +} + +// Fetch the value at the given index from array. This handles the strange +// POSIX behavior of creating a null entry for non-existent array elements. +// Per the POSIX spec, "Any other reference to a nonexistent array element +// [apart from "in" expressions] shall automatically create it." +func arrayGet(array map[string]value, index string) value { + v, ok := array[index] + if !ok { + array[index] = v + } + return v +} + +// Stack operations follow. These should be inlined. Instead of just push and +// pop, for efficiency we have custom operations for when we're replacing the +// top of stack without changing the stack pointer. Primarily this avoids the +// check for append in push. +func (p *interp) push(v value) { + sp := p.sp + if sp >= len(p.stack) { + p.stack = append(p.stack, null()) + } + p.stack[sp] = v + sp++ + p.sp = sp +} + +func (p *interp) pushNulls(num int) { + sp := p.sp + for p.sp+num-1 >= len(p.stack) { + p.stack = append(p.stack, null()) + } + for i := 0; i < num; i++ { + p.stack[sp] = null() + sp++ + } + p.sp = sp +} + +func (p *interp) pop() value { + p.sp-- + return p.stack[p.sp] +} + +func (p *interp) popTwo() (value, value) { + p.sp -= 2 + return p.stack[p.sp], p.stack[p.sp+1] +} + +func (p *interp) peekTop() value { + return p.stack[p.sp-1] +} + +func (p *interp) peekTwo() (value, value) { + return p.stack[p.sp-2], p.stack[p.sp-1] +} + +func (p *interp) peekPop() (value, value) { + p.sp-- + return p.stack[p.sp-1], p.stack[p.sp] +} + +func (p *interp) peekPeekPop() (value, value, value) { + p.sp-- + return p.stack[p.sp-2], p.stack[p.sp-1], p.stack[p.sp] +} + +func (p *interp) replaceTop(v value) { + p.stack[p.sp-1] = v +} + +func (p *interp) replaceTwo(l, r value) { + p.stack[p.sp-2] = l + p.stack[p.sp-1] = r +} + +func (p *interp) popSlice(n int) []value { + p.sp -= n + return p.stack[p.sp : p.sp+n] +} + +func (p *interp) peekSlice(n int) []value { + return p.stack[p.sp-n:] +} + +// Helper for getline operations. This performs the (possibly redirected) read +// of a line, and returns the result. If the result is 1 (success in AWK), the +// caller will set the target to the returned string. +func (p *interp) getline(redirect lexer.Token) (float64, string, error) { + switch redirect { + case lexer.PIPE: // redirect from command + name := p.toString(p.pop()) + scanner, err := p.getInputScannerPipe(name) + if err != nil { + return 0, "", err + } + if !scanner.Scan() { + if err := scanner.Err(); err != nil { + return -1, "", nil + } + return 0, "", nil + } + return 1, scanner.Text(), nil + + case lexer.LESS: // redirect from file + name := p.toString(p.pop()) + scanner, err := p.getInputScannerFile(name) + if err != nil { + if _, ok := err.(*os.PathError); ok { + // File not found is not a hard error, getline just returns -1. + // See: https://github.com/benhoyt/goawk/issues/41 + return -1, "", nil + } + return 0, "", err + } + if !scanner.Scan() { + if err := scanner.Err(); err != nil { + return -1, "", nil + } + return 0, "", nil + } + return 1, scanner.Text(), nil + + default: // no redirect + p.flushOutputAndError() // Flush output in case they've written a prompt + var err error + line, err := p.nextLine() + if err == io.EOF { + return 0, "", nil + } + if err != nil { + return -1, "", nil + } + return 1, line, nil + } +} + +// Perform augmented assignment operation. +func (p *interp) augAssignOp(op compiler.AugOp, l, r value) (value, error) { + switch op { + case compiler.AugOpAdd: + return num(l.num() + r.num()), nil + case compiler.AugOpSub: + return num(l.num() - r.num()), nil + case compiler.AugOpMul: + return num(l.num() * r.num()), nil + case compiler.AugOpDiv: + rf := r.num() + if rf == 0.0 { + return null(), newError("division by zero") + } + return num(l.num() / rf), nil + case compiler.AugOpPow: + return num(math.Pow(l.num(), r.num())), nil + default: // AugOpMod + rf := r.num() + if rf == 0.0 { + return null(), newError("division by zero in mod") + } + return num(math.Mod(l.num(), rf)), nil + } +} diff --git a/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go b/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go index 30147e9..dc3a48d 100644 --- a/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go +++ b/play/vendor/github.com/benhoyt/goawk/lexer/lexer.go @@ -5,11 +5,10 @@ // // To tokenize some source, create a new lexer with NewLexer(src) and // then call Scan() until the token type is EOF or ILLEGAL. -// package lexer import ( - "fmt" + "errors" ) // Lexer tokenizes a byte string of AWK source code. Use NewLexer to @@ -120,6 +119,8 @@ func (l *Lexer) scan() (Position, Token, string) { switch ch { case '$': tok = DOLLAR + case '@': + tok = AT case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.': // Avoid make/append and use l.offset directly for performance start := l.offset - 2 @@ -184,80 +185,18 @@ func (l *Lexer) scan() (Position, Token, string) { } case '"', '\'': // Note: POSIX awk spec doesn't allow single-quoted strings, - // but this helps without quoting, especially on Windows + // but this helps with quoting, especially on Windows // where the shell quote character is " (double quote). - chars := make([]byte, 0, 32) // most won't require heap allocation - for l.ch != ch { - c := l.ch - if c == 0 { - return l.pos, ILLEGAL, "didn't find end quote in string" - } - if c == '\r' || c == '\n' { - return l.pos, ILLEGAL, "can't have newline in string" - } - if c != '\\' { - // Normal, non-escaped character - chars = append(chars, c) - l.next() - continue - } - // Escape sequence, skip over \ and process - l.next() - switch l.ch { - case 'n': - c = '\n' - l.next() - case 't': - c = '\t' - l.next() - case 'r': - c = '\r' - l.next() - case 'a': - c = '\a' - l.next() - case 'b': - c = '\b' - l.next() - case 'f': - c = '\f' - l.next() - case 'v': - c = '\v' - l.next() - case 'x': - // Hex byte of one of two hex digits - l.next() - digit := hexDigit(l.ch) - if digit < 0 { - return l.pos, ILLEGAL, "1 or 2 hex digits expected" - } - c = byte(digit) - l.next() - digit = hexDigit(l.ch) - if digit >= 0 { - c = c*16 + byte(digit) - l.next() - } - case '0', '1', '2', '3', '4', '5', '6', '7': - // Octal byte of 1-3 octal digits - c = l.ch - '0' - l.next() - for i := 0; i < 2 && l.ch >= '0' && l.ch <= '7'; i++ { - c = c*8 + l.ch - '0' - l.next() - } - default: - // Any other escape character is just the char - // itself, eg: "\z" is just "z" - c = l.ch - l.next() - } - chars = append(chars, c) + s, err := parseString(ch, func() byte { return l.ch }, l.next) + if err != nil { + return l.pos, ILLEGAL, err.Error() + } + if l.ch != ch { + return l.pos, ILLEGAL, "didn't find end quote in string" } l.next() tok = STRING - val = string(chars) + val = s case '(': tok = LPAREN case ')': @@ -366,7 +305,7 @@ func (l *Lexer) scanRegex() (Position, Token, string) { pos.Column -= 2 chars = append(chars, '=') default: - return l.pos, ILLEGAL, fmt.Sprintf("unexpected %s preceding regex", l.lastTok) + panic("ScanRegex should only be called after DIV or DIV_ASSIGN token") } for l.ch != '/' { c := l.ch @@ -424,7 +363,7 @@ func (l *Lexer) unread() { } func isNameStart(ch byte) bool { - return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') + return ch == '_' || ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' } func isDigit(ch byte) bool { @@ -459,3 +398,102 @@ func (l *Lexer) choice(ch byte, one, two Token) Token { func (l *Lexer) PeekByte() byte { return l.ch } + +// Unescape unescapes the backslash escapes in s (which shouldn't include the +// surrounding quotes) and returns the unquoted string. It's intended for use +// when unescaping command line var=value assignments, as required by the +// POSIX AWK spec. +func Unescape(s string) (string, error) { + i := 0 + ch := func() byte { + if i >= len(s) { + return 0 + } + return s[i] + } + next := func() { + i++ + } + return parseString(0, ch, next) +} + +// Parses a string ending with given quote character (not parsed). The ch +// function returns the current character (or 0 at the end); the next function +// moves forward one character. +func parseString(quote byte, ch func() byte, next func()) (string, error) { + chars := make([]byte, 0, 32) // most strings won't require heap allocation + for { + c := ch() + if c == quote || c == 0 { + break + } + if c == '\r' || c == '\n' { + return "", errors.New("can't have newline in string") + } + if c != '\\' { + // Normal, non-escaped character + chars = append(chars, c) + next() + continue + } + // Escape sequence, skip over \ and process + next() + switch ch() { + case 'n': + c = '\n' + next() + case 't': + c = '\t' + next() + case 'r': + c = '\r' + next() + case 'a': + c = '\a' + next() + case 'b': + c = '\b' + next() + case 'f': + c = '\f' + next() + case 'v': + c = '\v' + next() + case 'x': + // Hex byte of one of two hex digits + next() + digit := hexDigit(ch()) + if digit < 0 { + return "", errors.New("1 or 2 hex digits expected") + } + c = byte(digit) + next() + digit = hexDigit(ch()) + if digit >= 0 { + c = c*16 + byte(digit) + next() + } + case '0', '1', '2', '3', '4', '5', '6', '7': + // Octal byte of 1-3 octal digits + c = ch() - '0' + next() + for i := 0; i < 2 && ch() >= '0' && ch() <= '7'; i++ { + c = c*8 + ch() - '0' + next() + } + default: + // Any other escape character is just the char + // itself, eg: "\z" is just "z". + c = ch() + if c == 0 { + // Expect backslash right at the end of the string, which is + // interpreted as a literal backslash (only for Unescape). + c = '\\' + } + next() + } + chars = append(chars, c) + } + return string(chars), nil +} diff --git a/play/vendor/github.com/benhoyt/goawk/lexer/token.go b/play/vendor/github.com/benhoyt/goawk/lexer/token.go index 6780816..b3be569 100644 --- a/play/vendor/github.com/benhoyt/goawk/lexer/token.go +++ b/play/vendor/github.com/benhoyt/goawk/lexer/token.go @@ -18,6 +18,7 @@ const ( AND APPEND ASSIGN + AT COLON COMMA DECR @@ -172,6 +173,7 @@ var tokenNames = map[Token]string{ AND: "&&", APPEND: ">>", ASSIGN: "=", + AT: "@", COLON: ":", COMMA: ",", DECR: "--", diff --git a/play/vendor/github.com/benhoyt/goawk/parser/parser.go b/play/vendor/github.com/benhoyt/goawk/parser/parser.go index d8ff2c6..491b346 100644 --- a/play/vendor/github.com/benhoyt/goawk/parser/parser.go +++ b/play/vendor/github.com/benhoyt/goawk/parser/parser.go @@ -1,8 +1,7 @@ // Package parser is an AWK parser and abstract syntax tree. // -// Use the ParseProgram function to parse an AWK program, and then -// give the result to one of the interp.Exec* functions to execute it. -// +// Use the ParseProgram function to parse an AWK program, and then give the +// result to interp.Exec, interp.ExecProgram, or interp.New to execute it. package parser import ( @@ -12,7 +11,8 @@ import ( "strconv" "strings" - . "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/compiler" . "github.com/benhoyt/goawk/lexer" ) @@ -68,40 +68,52 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) { } p.initResolve() p.next() // initialize p.tok - return p.program(), nil + + // Parse into abstract syntax tree + prog = p.program() + + // Compile to virtual machine code + prog.Compiled, err = compiler.Compile(prog.toAST()) + return prog, err } -// Program is the abstract syntax tree for an entire AWK program. +// Program is the parsed and compiled representation of an entire AWK program. type Program struct { // These fields aren't intended to be used or modified directly, // but are exported for the interpreter (Program itself needs to // be exported in package "parser", otherwise these could live in // "internal/ast".) - Begin []Stmts - Actions []Action - End []Stmts - Functions []Function + Begin []ast.Stmts + Actions []ast.Action + End []ast.Stmts + Functions []ast.Function Scalars map[string]int Arrays map[string]int + Compiled *compiler.Program } // String returns an indented, pretty-printed version of the parsed // program. func (p *Program) String() string { - parts := []string{} - for _, ss := range p.Begin { - parts = append(parts, "BEGIN {\n"+ss.String()+"}") + return p.toAST().String() +} + +// Disassemble writes a human-readable form of the program's virtual machine +// instructions to writer. +func (p *Program) Disassemble(writer io.Writer) error { + return p.Compiled.Disassemble(writer) +} + +// toAST converts the *Program to an *ast.Program. +func (p *Program) toAST() *ast.Program { + return &ast.Program{ + Begin: p.Begin, + Actions: p.Actions, + End: p.End, + Functions: p.Functions, + Scalars: p.Scalars, + Arrays: p.Arrays, } - for _, a := range p.Actions { - parts = append(parts, a.String()) - } - for _, ss := range p.End { - parts = append(parts, "END {\n"+ss.String()+"}") - } - for _, function := range p.Functions { - parts = append(parts, function.String()) - } - return strings.Join(parts, "\n\n") } // Parser state @@ -123,7 +135,7 @@ type parser struct { varTypes map[string]map[string]typeInfo // map of func name to var name to type varRefs []varRef // all variable references (usually scalars) arrayRefs []arrayRef // all array references - multiExprs map[*MultiExpr]Position // tracks comma-separated expressions + multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions // Function tracking functions map[string]int // map of function name to index @@ -154,7 +166,7 @@ func (p *parser) program() *Program { default: p.inAction = true // Allow empty pattern, normal pattern, or range pattern - pattern := []Expr{} + pattern := []ast.Expr{} if !p.matches(LBRACE, EOF) { pattern = append(pattern, p.expr()) } @@ -163,7 +175,7 @@ func (p *parser) program() *Program { pattern = append(pattern, p.expr()) } // Or an empty action (equivalent to { print $0 }) - action := Action{pattern, nil} + action := ast.Action{pattern, nil} if p.tok == LBRACE { action.Stmts = p.stmtsBrace() } @@ -181,7 +193,7 @@ func (p *parser) program() *Program { } // Parse a list of statements. -func (p *parser) stmts() Stmts { +func (p *parser) stmts() ast.Stmts { switch p.tok { case SEMICOLON: // This is so things like this parse correctly: @@ -191,15 +203,15 @@ func (p *parser) stmts() Stmts { case LBRACE: return p.stmtsBrace() default: - return []Stmt{p.stmt()} + return []ast.Stmt{p.stmt()} } } // Parse a list of statements surrounded in {...} braces. -func (p *parser) stmtsBrace() Stmts { +func (p *parser) stmtsBrace() ast.Stmts { p.expect(LBRACE) p.optionalNewlines() - ss := []Stmt{} + ss := []ast.Stmt{} for p.tok != RBRACE && p.tok != EOF { ss = append(ss, p.stmt()) } @@ -211,7 +223,7 @@ func (p *parser) stmtsBrace() Stmts { } // Parse a "simple" statement (eg: allowed in a for loop init clause). -func (p *parser) simpleStmt() Stmt { +func (p *parser) simpleStmt() ast.Stmt { switch p.tok { case PRINT, PRINTF: op := p.tok @@ -219,31 +231,31 @@ func (p *parser) simpleStmt() Stmt { args := p.exprList(p.printExpr) if len(args) == 1 { // This allows parens around all the print args - if m, ok := args[0].(*MultiExpr); ok { + if m, ok := args[0].(*ast.MultiExpr); ok { args = m.Exprs p.useMultiExpr(m) } } redirect := ILLEGAL - var dest Expr + var dest ast.Expr if p.matches(GREATER, APPEND, PIPE) { redirect = p.tok p.next() dest = p.expr() } if op == PRINT { - return &PrintStmt{args, redirect, dest} + return &ast.PrintStmt{args, redirect, dest} } else { if len(args) == 0 { panic(p.errorf("expected printf args, got none")) } - return &PrintfStmt{args, redirect, dest} + return &ast.PrintfStmt{args, redirect, dest} } case DELETE: p.next() ref := p.arrayRef(p.val, p.pos) p.expect(NAME) - var index []Expr + var index []ast.Expr if p.tok == LBRACKET { p.next() index = p.exprList(p.expr) @@ -252,20 +264,20 @@ func (p *parser) simpleStmt() Stmt { } p.expect(RBRACKET) } - return &DeleteStmt{ref, index} + return &ast.DeleteStmt{ref, index} case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN: panic(p.errorf("expected print/printf, delete, or expression")) default: - return &ExprStmt{p.expr()} + return &ast.ExprStmt{p.expr()} } } // Parse any top-level statement. -func (p *parser) stmt() Stmt { +func (p *parser) stmt() ast.Stmt { for p.matches(SEMICOLON, NEWLINE) { p.next() } - var s Stmt + var s ast.Stmt switch p.tok { case IF: p.next() @@ -275,13 +287,13 @@ func (p *parser) stmt() Stmt { p.optionalNewlines() body := p.stmts() p.optionalNewlines() - var elseBody Stmts + var elseBody ast.Stmts if p.tok == ELSE { p.next() p.optionalNewlines() elseBody = p.stmts() } - s = &IfStmt{cond, body, elseBody} + s = &ast.IfStmt{cond, body, elseBody} case FOR: // Parse for statement, either "for in" or C-like for loop. // @@ -292,7 +304,7 @@ func (p *parser) stmt() Stmt { // p.next() p.expect(LPAREN) - var pre Stmt + var pre ast.Stmt if p.tok != SEMICOLON { pre = p.simpleStmt() } @@ -300,41 +312,41 @@ func (p *parser) stmt() Stmt { // Match: for (var in array) body p.next() p.optionalNewlines() - exprStmt, ok := pre.(*ExprStmt) + exprStmt, ok := pre.(*ast.ExprStmt) if !ok { panic(p.errorf("expected 'for (var in array) ...'")) } - inExpr, ok := (exprStmt.Expr).(*InExpr) + inExpr, ok := exprStmt.Expr.(*ast.InExpr) if !ok { panic(p.errorf("expected 'for (var in array) ...'")) } if len(inExpr.Index) != 1 { panic(p.errorf("expected 'for (var in array) ...'")) } - varExpr, ok := (inExpr.Index[0]).(*VarExpr) + varExpr, ok := inExpr.Index[0].(*ast.VarExpr) if !ok { panic(p.errorf("expected 'for (var in array) ...'")) } body := p.loopStmts() - s = &ForInStmt{varExpr, inExpr.Array, body} + s = &ast.ForInStmt{varExpr, inExpr.Array, body} } else { // Match: for ([pre]; [cond]; [post]) body p.expect(SEMICOLON) p.optionalNewlines() - var cond Expr + var cond ast.Expr if p.tok != SEMICOLON { cond = p.expr() } p.expect(SEMICOLON) p.optionalNewlines() - var post Stmt + var post ast.Stmt if p.tok != RPAREN { post = p.simpleStmt() } p.expect(RPAREN) p.optionalNewlines() body := p.loopStmts() - s = &ForStmt{pre, cond, post, body} + s = &ast.ForStmt{pre, cond, post, body} } case WHILE: p.next() @@ -343,7 +355,7 @@ func (p *parser) stmt() Stmt { p.expect(RPAREN) p.optionalNewlines() body := p.loopStmts() - s = &WhileStmt{cond, body} + s = &ast.WhileStmt{cond, body} case DO: p.next() p.optionalNewlines() @@ -352,45 +364,45 @@ func (p *parser) stmt() Stmt { p.expect(LPAREN) cond := p.expr() p.expect(RPAREN) - s = &DoWhileStmt{body, cond} + s = &ast.DoWhileStmt{body, cond} case BREAK: if p.loopDepth == 0 { panic(p.errorf("break must be inside a loop body")) } p.next() - s = &BreakStmt{} + s = &ast.BreakStmt{} case CONTINUE: if p.loopDepth == 0 { panic(p.errorf("continue must be inside a loop body")) } p.next() - s = &ContinueStmt{} + s = &ast.ContinueStmt{} case NEXT: if !p.inAction && p.funcName == "" { panic(p.errorf("next can't be inside BEGIN or END")) } p.next() - s = &NextStmt{} + s = &ast.NextStmt{} case EXIT: p.next() - var status Expr + var status ast.Expr if !p.matches(NEWLINE, SEMICOLON, RBRACE) { status = p.expr() } - s = &ExitStmt{status} + s = &ast.ExitStmt{status} case RETURN: if p.funcName == "" { panic(p.errorf("return must be inside a function")) } p.next() - var value Expr + var value ast.Expr if !p.matches(NEWLINE, SEMICOLON, RBRACE) { value = p.expr() } - s = &ReturnStmt{value} + s = &ast.ReturnStmt{value} case LBRACE: body := p.stmtsBrace() - s = &BlockStmt{body} + s = &ast.BlockStmt{body} default: s = p.simpleStmt() } @@ -407,7 +419,7 @@ func (p *parser) stmt() Stmt { // Same as stmts(), but tracks that we're in a loop (as break and // continue can only occur inside a loop). -func (p *parser) loopStmts() Stmts { +func (p *parser) loopStmts() ast.Stmts { p.loopDepth++ ss := p.stmts() p.loopDepth-- @@ -417,7 +429,7 @@ func (p *parser) loopStmts() Stmts { // Parse a function definition and body. As it goes, this resolves // the local variable indexes and tracks which parameters are array // parameters. -func (p *parser) function() Function { +func (p *parser) function() ast.Function { if p.funcName != "" { // Should never actually get here (FUNCTION token is only // handled at the top level), but just in case. @@ -458,13 +470,13 @@ func (p *parser) function() Function { p.stopFunction() p.locals = nil - return Function{name, params, nil, body} + return ast.Function{name, params, nil, body} } // Parse expressions separated by commas: args to print[f] or user // function call, or multi-dimensional index. -func (p *parser) exprList(parse func() Expr) []Expr { - exprs := []Expr{} +func (p *parser) exprList(parse func() ast.Expr) []ast.Expr { + exprs := []ast.Expr{} first := true for !p.matches(NEWLINE, SEMICOLON, RBRACE, RBRACKET, RPAREN, GREATER, PIPE, APPEND) { if !first { @@ -484,41 +496,43 @@ func (p *parser) exprList(parse func() Expr) []Expr { // which skips PIPE GETLINE and GREATER expressions. // Parse a single expression. -func (p *parser) expr() Expr { return p.getLine() } -func (p *parser) printExpr() Expr { return p._assign(p.printCond) } +func (p *parser) expr() ast.Expr { return p.getLine() } +func (p *parser) printExpr() ast.Expr { return p._assign(p.printCond) } // Parse an "expr | getline [lvalue]" expression: // -// assign [PIPE GETLINE [lvalue]] -// -func (p *parser) getLine() Expr { +// assign [PIPE GETLINE [lvalue]] +func (p *parser) getLine() ast.Expr { expr := p._assign(p.cond) if p.tok == PIPE { p.next() p.expect(GETLINE) target := p.optionalLValue() - return &GetlineExpr{expr, target, nil} + return &ast.GetlineExpr{expr, target, nil} } return expr } // Parse an = assignment expression: // -// lvalue [assign_op assign] +// lvalue [assign_op assign] // // An lvalue is a variable name, an array[expr] index expression, or // an $expr field expression. -// -func (p *parser) _assign(higher func() Expr) Expr { +func (p *parser) _assign(higher func() ast.Expr) ast.Expr { expr := higher() - if IsLValue(expr) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, + _, isNamedField := expr.(*ast.NamedFieldExpr) + if (isNamedField || ast.IsLValue(expr)) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) { + if isNamedField { + panic(p.errorf("assigning @ expression not supported")) + } op := p.tok p.next() right := p._assign(higher) switch op { case ASSIGN: - return &AssignExpr{expr, right} + return &ast.AssignExpr{expr, right} case ADD_ASSIGN: op = ADD case DIV_ASSIGN: @@ -532,19 +546,18 @@ func (p *parser) _assign(higher func() Expr) Expr { case SUB_ASSIGN: op = SUB } - return &AugAssignExpr{expr, op, right} + return &ast.AugAssignExpr{expr, op, right} } return expr } // Parse a ?: conditional expression: // -// or [QUESTION NEWLINE* cond COLON NEWLINE* cond] -// -func (p *parser) cond() Expr { return p._cond(p.or) } -func (p *parser) printCond() Expr { return p._cond(p.printOr) } +// or [QUESTION NEWLINE* cond COLON NEWLINE* cond] +func (p *parser) cond() ast.Expr { return p._cond(p.or) } +func (p *parser) printCond() ast.Expr { return p._cond(p.printOr) } -func (p *parser) _cond(higher func() Expr) Expr { +func (p *parser) _cond(higher func() ast.Expr) ast.Expr { expr := higher() if p.tok == QUESTION { p.next() @@ -553,156 +566,154 @@ func (p *parser) _cond(higher func() Expr) Expr { p.expect(COLON) p.optionalNewlines() f := p.expr() - return &CondExpr{expr, t, f} + return &ast.CondExpr{expr, t, f} } return expr } // Parse an || or expression: // -// and [OR NEWLINE* and] [OR NEWLINE* and] ... -// -func (p *parser) or() Expr { return p.binaryLeft(p.and, true, OR) } -func (p *parser) printOr() Expr { return p.binaryLeft(p.printAnd, true, OR) } +// and [OR NEWLINE* and] [OR NEWLINE* and] ... +func (p *parser) or() ast.Expr { return p.binaryLeft(p.and, true, OR) } +func (p *parser) printOr() ast.Expr { return p.binaryLeft(p.printAnd, true, OR) } // Parse an && and expression: // -// in [AND NEWLINE* in] [AND NEWLINE* in] ... -// -func (p *parser) and() Expr { return p.binaryLeft(p.in, true, AND) } -func (p *parser) printAnd() Expr { return p.binaryLeft(p.printIn, true, AND) } +// in [AND NEWLINE* in] [AND NEWLINE* in] ... +func (p *parser) and() ast.Expr { return p.binaryLeft(p.in, true, AND) } +func (p *parser) printAnd() ast.Expr { return p.binaryLeft(p.printIn, true, AND) } // Parse an "in" expression: // -// match [IN NAME] [IN NAME] ... -// -func (p *parser) in() Expr { return p._in(p.match) } -func (p *parser) printIn() Expr { return p._in(p.printMatch) } +// match [IN NAME] [IN NAME] ... +func (p *parser) in() ast.Expr { return p._in(p.match) } +func (p *parser) printIn() ast.Expr { return p._in(p.printMatch) } -func (p *parser) _in(higher func() Expr) Expr { +func (p *parser) _in(higher func() ast.Expr) ast.Expr { expr := higher() for p.tok == IN { p.next() ref := p.arrayRef(p.val, p.pos) p.expect(NAME) - expr = &InExpr{[]Expr{expr}, ref} + expr = &ast.InExpr{[]ast.Expr{expr}, ref} } return expr } // Parse a ~ match expression: // -// compare [MATCH|NOT_MATCH compare] -// -func (p *parser) match() Expr { return p._match(p.compare) } -func (p *parser) printMatch() Expr { return p._match(p.printCompare) } +// compare [MATCH|NOT_MATCH compare] +func (p *parser) match() ast.Expr { return p._match(p.compare) } +func (p *parser) printMatch() ast.Expr { return p._match(p.printCompare) } -func (p *parser) _match(higher func() Expr) Expr { +func (p *parser) _match(higher func() ast.Expr) ast.Expr { expr := higher() if p.matches(MATCH, NOT_MATCH) { op := p.tok p.next() right := p.regexStr(higher) // Not match() as these aren't associative - return &BinaryExpr{expr, op, right} + return &ast.BinaryExpr{expr, op, right} } return expr } // Parse a comparison expression: // -// concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat] -// -func (p *parser) compare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) } -func (p *parser) printCompare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) } +// concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat] +func (p *parser) compare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) } +func (p *parser) printCompare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) } -func (p *parser) _compare(ops ...Token) Expr { +func (p *parser) _compare(ops ...Token) ast.Expr { expr := p.concat() if p.matches(ops...) { op := p.tok p.next() right := p.concat() // Not compare() as these aren't associative - return &BinaryExpr{expr, op, right} + return &ast.BinaryExpr{expr, op, right} } return expr } -func (p *parser) concat() Expr { +func (p *parser) concat() ast.Expr { expr := p.add() - for p.matches(DOLLAR, NOT, NAME, NUMBER, STRING, LPAREN, INCR, DECR) || - (p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC) { + for p.matches(DOLLAR, AT, NOT, NAME, NUMBER, STRING, LPAREN, INCR, DECR) || + p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC { right := p.add() - expr = &BinaryExpr{expr, CONCAT, right} + expr = &ast.BinaryExpr{expr, CONCAT, right} } return expr } -func (p *parser) add() Expr { +func (p *parser) add() ast.Expr { return p.binaryLeft(p.mul, false, ADD, SUB) } -func (p *parser) mul() Expr { +func (p *parser) mul() ast.Expr { return p.binaryLeft(p.pow, false, MUL, DIV, MOD) } -func (p *parser) pow() Expr { +func (p *parser) pow() ast.Expr { // Note that pow (expr ^ expr) is right-associative expr := p.preIncr() if p.tok == POW { p.next() right := p.pow() - return &BinaryExpr{expr, POW, right} + return &ast.BinaryExpr{expr, POW, right} } return expr } -func (p *parser) preIncr() Expr { +func (p *parser) preIncr() ast.Expr { if p.tok == INCR || p.tok == DECR { op := p.tok p.next() exprPos := p.pos expr := p.preIncr() - if !IsLValue(expr) { + if !ast.IsLValue(expr) { panic(p.posErrorf(exprPos, "expected lvalue after ++ or --")) } - return &IncrExpr{expr, op, true} + return &ast.IncrExpr{expr, op, true} } return p.postIncr() } -func (p *parser) postIncr() Expr { +func (p *parser) postIncr() ast.Expr { expr := p.primary() - if (p.tok == INCR || p.tok == DECR) && IsLValue(expr) { + if (p.tok == INCR || p.tok == DECR) && ast.IsLValue(expr) { op := p.tok p.next() - return &IncrExpr{expr, op, false} + return &ast.IncrExpr{expr, op, false} } return expr } -func (p *parser) primary() Expr { +func (p *parser) primary() ast.Expr { switch p.tok { case NUMBER: // AWK allows forms like "1.5e", but ParseFloat doesn't s := strings.TrimRight(p.val, "eE") n, _ := strconv.ParseFloat(s, 64) p.next() - return &NumExpr{n} + return &ast.NumExpr{n} case STRING: s := p.val p.next() - return &StrExpr{s} + return &ast.StrExpr{s} case DIV, DIV_ASSIGN: // If we get to DIV or DIV_ASSIGN as a primary expression, // it's actually a regex. regex := p.nextRegex() - return &RegExpr{regex} + return &ast.RegExpr{regex} case DOLLAR: p.next() - return &FieldExpr{p.primary()} + return &ast.FieldExpr{p.primary()} + case AT: + p.next() + return &ast.NamedFieldExpr{p.primary()} case NOT, ADD, SUB: op := p.tok p.next() - return &UnaryExpr{op, p.pow()} + return &ast.UnaryExpr{op, p.pow()} case NAME: name := p.val namePos := p.pos @@ -715,7 +726,7 @@ func (p *parser) primary() Expr { panic(p.errorf("expected expression instead of ]")) } p.expect(RBRACKET) - return &IndexExpr{p.arrayRef(name, namePos), index} + return &ast.IndexExpr{p.arrayRef(name, namePos), index} } else if p.tok == LPAREN && !p.lexer.HadSpace() { if p.locals[name] { panic(p.errorf("can't call local variable %q as function", name)) @@ -743,7 +754,7 @@ func (p *parser) primary() Expr { p.next() ref := p.arrayRef(p.val, p.pos) p.expect(NAME) - return &InExpr{exprs, ref} + return &ast.InExpr{exprs, ref} } // MultiExpr is used as a pseudo-expression for print[f] parsing. return p.multiExpr(exprs, parenPos) @@ -751,12 +762,12 @@ func (p *parser) primary() Expr { case GETLINE: p.next() target := p.optionalLValue() - var file Expr + var file ast.Expr if p.tok == LESS { p.next() file = p.primary() } - return &GetlineExpr{nil, target, file} + return &ast.GetlineExpr{nil, target, file} // Below is the parsing of all the builtin function calls. We // could unify these but several of them have special handling // (array/lvalue/regex params, optional arguments, and so on). @@ -768,18 +779,18 @@ func (p *parser) primary() Expr { regex := p.regexStr(p.expr) p.commaNewlines() repl := p.expr() - args := []Expr{regex, repl} + args := []ast.Expr{regex, repl} if p.tok == COMMA { p.commaNewlines() inPos := p.pos in := p.expr() - if !IsLValue(in) { + if !ast.IsLValue(in) { panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue")) } args = append(args, in) } p.expect(RPAREN) - return &CallExpr{op, args} + return &ast.CallExpr{op, args} case F_SPLIT: p.next() p.expect(LPAREN) @@ -787,13 +798,13 @@ func (p *parser) primary() Expr { p.commaNewlines() ref := p.arrayRef(p.val, p.pos) p.expect(NAME) - args := []Expr{str, ref} + args := []ast.Expr{str, ref} if p.tok == COMMA { p.commaNewlines() args = append(args, p.regexStr(p.expr)) } p.expect(RPAREN) - return &CallExpr{F_SPLIT, args} + return &ast.CallExpr{F_SPLIT, args} case F_MATCH: p.next() p.expect(LPAREN) @@ -801,24 +812,24 @@ func (p *parser) primary() Expr { p.commaNewlines() regex := p.regexStr(p.expr) p.expect(RPAREN) - return &CallExpr{F_MATCH, []Expr{str, regex}} + return &ast.CallExpr{F_MATCH, []ast.Expr{str, regex}} case F_RAND: p.next() p.expect(LPAREN) p.expect(RPAREN) - return &CallExpr{F_RAND, nil} + return &ast.CallExpr{F_RAND, nil} case F_SRAND: p.next() p.expect(LPAREN) - var args []Expr + var args []ast.Expr if p.tok != RPAREN { args = append(args, p.expr()) } p.expect(RPAREN) - return &CallExpr{F_SRAND, args} + return &ast.CallExpr{F_SRAND, args} case F_LENGTH: p.next() - var args []Expr + var args []ast.Expr // AWK quirk: "length" is allowed to be called without parens if p.tok == LPAREN { p.next() @@ -827,39 +838,39 @@ func (p *parser) primary() Expr { } p.expect(RPAREN) } - return &CallExpr{F_LENGTH, args} + return &ast.CallExpr{F_LENGTH, args} case F_SUBSTR: p.next() p.expect(LPAREN) str := p.expr() p.commaNewlines() start := p.expr() - args := []Expr{str, start} + args := []ast.Expr{str, start} if p.tok == COMMA { p.commaNewlines() args = append(args, p.expr()) } p.expect(RPAREN) - return &CallExpr{F_SUBSTR, args} + return &ast.CallExpr{F_SUBSTR, args} case F_SPRINTF: p.next() p.expect(LPAREN) - args := []Expr{p.expr()} + args := []ast.Expr{p.expr()} for p.tok == COMMA { p.commaNewlines() args = append(args, p.expr()) } p.expect(RPAREN) - return &CallExpr{F_SPRINTF, args} + return &ast.CallExpr{F_SPRINTF, args} case F_FFLUSH: p.next() p.expect(LPAREN) - var args []Expr + var args []ast.Expr if p.tok != RPAREN { args = append(args, p.expr()) } p.expect(RPAREN) - return &CallExpr{F_FFLUSH, args} + return &ast.CallExpr{F_FFLUSH, args} case F_COS, F_SIN, F_EXP, F_LOG, F_SQRT, F_INT, F_TOLOWER, F_TOUPPER, F_SYSTEM, F_CLOSE: // Simple 1-argument functions op := p.tok @@ -867,7 +878,7 @@ func (p *parser) primary() Expr { p.expect(LPAREN) arg := p.expr() p.expect(RPAREN) - return &CallExpr{op, []Expr{arg}} + return &ast.CallExpr{op, []ast.Expr{arg}} case F_ATAN2, F_INDEX: // Simple 2-argument functions op := p.tok @@ -877,14 +888,14 @@ func (p *parser) primary() Expr { p.commaNewlines() arg2 := p.expr() p.expect(RPAREN) - return &CallExpr{op, []Expr{arg1, arg2}} + return &ast.CallExpr{op, []ast.Expr{arg1, arg2}} default: panic(p.errorf("expected expression instead of %s", p.tok)) } } // Parse an optional lvalue -func (p *parser) optionalLValue() Expr { +func (p *parser) optionalLValue() ast.Expr { switch p.tok { case NAME: if p.lexer.PeekByte() == '(' { @@ -902,12 +913,12 @@ func (p *parser) optionalLValue() Expr { panic(p.errorf("expected expression instead of ]")) } p.expect(RBRACKET) - return &IndexExpr{p.arrayRef(name, namePos), index} + return &ast.IndexExpr{p.arrayRef(name, namePos), index} } return p.varRef(name, namePos) case DOLLAR: p.next() - return &FieldExpr{p.primary()} + return &ast.FieldExpr{p.primary()} default: return nil } @@ -915,12 +926,11 @@ func (p *parser) optionalLValue() Expr { // Parse /.../ regex or generic expression: // -// REGEX | expr -// -func (p *parser) regexStr(parse func() Expr) Expr { +// REGEX | expr +func (p *parser) regexStr(parse func() ast.Expr) ast.Expr { if p.matches(DIV, DIV_ASSIGN) { regex := p.nextRegex() - return &StrExpr{regex} + return &ast.StrExpr{regex} } return parse() } @@ -928,9 +938,8 @@ func (p *parser) regexStr(parse func() Expr) Expr { // Parse left-associative binary operator. Allow newlines after // operator if allowNewline is true. // -// parse [op parse] [op parse] ... -// -func (p *parser) binaryLeft(higher func() Expr, allowNewline bool, ops ...Token) Expr { +// parse [op parse] [op parse] ... +func (p *parser) binaryLeft(higher func() ast.Expr, allowNewline bool, ops ...Token) ast.Expr { expr := higher() for p.matches(ops...) { op := p.tok @@ -939,15 +948,14 @@ func (p *parser) binaryLeft(higher func() Expr, allowNewline bool, ops ...Token) p.optionalNewlines() } right := higher() - expr = &BinaryExpr{expr, op, right} + expr = &ast.BinaryExpr{expr, op, right} } return expr } // Parse comma followed by optional newlines: // -// COMMA NEWLINE* -// +// COMMA NEWLINE* func (p *parser) commaNewlines() { p.expect(COMMA) p.optionalNewlines() @@ -955,8 +963,7 @@ func (p *parser) commaNewlines() { // Parse zero or more optional newlines: // -// [NEWLINE] [NEWLINE] ... -// +// [NEWLINE] [NEWLINE] ... func (p *parser) optionalNewlines() { for p.tok == NEWLINE { p.next() @@ -1021,9 +1028,9 @@ func (p *parser) posErrorf(pos Position, format string, args ...interface{}) err // Parse call to a user-defined function (and record call site for // resolving later). -func (p *parser) userCall(name string, pos Position) *UserCallExpr { +func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr { p.expect(LPAREN) - args := []Expr{} + args := []ast.Expr{} i := 0 for !p.matches(NEWLINE, RPAREN) { if i > 0 { @@ -1035,7 +1042,7 @@ func (p *parser) userCall(name string, pos Position) *UserCallExpr { i++ } p.expect(RPAREN) - call := &UserCallExpr{false, -1, name, args} // index is resolved later + call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later p.recordUserCall(call, pos) return call } diff --git a/play/vendor/github.com/benhoyt/goawk/parser/resolve.go b/play/vendor/github.com/benhoyt/goawk/parser/resolve.go index 6543633..a2ed08d 100644 --- a/play/vendor/github.com/benhoyt/goawk/parser/resolve.go +++ b/play/vendor/github.com/benhoyt/goawk/parser/resolve.go @@ -7,7 +7,7 @@ import ( "reflect" "sort" - . "github.com/benhoyt/goawk/internal/ast" + "github.com/benhoyt/goawk/internal/ast" . "github.com/benhoyt/goawk/lexer" ) @@ -33,8 +33,8 @@ func (t varType) String() string { // typeInfo records type information for a single variable type typeInfo struct { typ varType - ref *VarExpr - scope VarScope + ref *ast.VarExpr + scope ast.VarScope index int callName string argIndex int @@ -44,9 +44,9 @@ type typeInfo struct { func (t typeInfo) String() string { var scope string switch t.scope { - case ScopeGlobal: + case ast.ScopeGlobal: scope = "Global" - case ScopeLocal: + case ast.ScopeLocal: scope = "Local" default: scope = "Special" @@ -58,7 +58,7 @@ func (t typeInfo) String() string { // A single variable reference (normally scalar) type varRef struct { funcName string - ref *VarExpr + ref *ast.VarExpr isArg bool pos Position } @@ -66,7 +66,7 @@ type varRef struct { // A single array reference type arrayRef struct { funcName string - ref *ArrayExpr + ref *ast.ArrayExpr pos Position } @@ -76,8 +76,9 @@ func (p *parser) initResolve() { p.varTypes[""] = make(map[string]typeInfo) // globals p.functions = make(map[string]int) p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present - p.arrayRef("ENVIRON", Position{1, 1}) // and ENVIRON - p.multiExprs = make(map[*MultiExpr]Position, 3) + p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays + p.arrayRef("FIELDS", Position{1, 1}) + p.multiExprs = make(map[*ast.MultiExpr]Position, 3) } // Signal the start of a function @@ -98,13 +99,13 @@ func (p *parser) addFunction(name string, index int) { // Records a call to a user function (for resolving indexes later) type userCall struct { - call *UserCallExpr + call *ast.UserCallExpr pos Position inFunc string } // Record a user call site -func (p *parser) recordUserCall(call *UserCallExpr, pos Position) { +func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) { p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName}) } @@ -149,8 +150,8 @@ func (p *parser) resolveUserCalls(prog *Program) { // For arguments that are variable references, we don't know the // type based on context, so mark the types for these as unknown. -func (p *parser) processUserCallArg(funcName string, arg Expr, index int) { - if varExpr, ok := arg.(*VarExpr); ok { +func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) { + if varExpr, ok := arg.(*ast.VarExpr); ok { scope, varFuncName := p.getScope(varExpr.Name) ref := p.varTypes[varFuncName][varExpr.Name].ref if ref == varExpr { @@ -166,22 +167,22 @@ func (p *parser) processUserCallArg(funcName string, arg Expr, index int) { // Determine scope of given variable reference (and funcName if it's // a local, otherwise empty string) -func (p *parser) getScope(name string) (VarScope, string) { +func (p *parser) getScope(name string) (ast.VarScope, string) { switch { case p.locals[name]: - return ScopeLocal, p.funcName - case SpecialVarIndex(name) > 0: - return ScopeSpecial, "" + return ast.ScopeLocal, p.funcName + case ast.SpecialVarIndex(name) > 0: + return ast.ScopeSpecial, "" default: - return ScopeGlobal, "" + return ast.ScopeGlobal, "" } } // Record a variable (scalar) reference and return the *VarExpr (but // VarExpr.Index won't be set till later) -func (p *parser) varRef(name string, pos Position) *VarExpr { +func (p *parser) varRef(name string, pos Position) *ast.VarExpr { scope, funcName := p.getScope(name) - expr := &VarExpr{scope, 0, name} + expr := &ast.VarExpr{scope, 0, name} p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos}) info := p.varTypes[funcName][name] if info.typ == typeUnknown { @@ -192,12 +193,12 @@ func (p *parser) varRef(name string, pos Position) *VarExpr { // Record an array reference and return the *ArrayExpr (but // ArrayExpr.Index won't be set till later) -func (p *parser) arrayRef(name string, pos Position) *ArrayExpr { +func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr { scope, funcName := p.getScope(name) - if scope == ScopeSpecial { + if scope == ast.ScopeSpecial { panic(p.errorf("can't use scalar %q as array", name)) } - expr := &ArrayExpr{scope, 0, name} + expr := &ast.ArrayExpr{scope, 0, name} p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos}) info := p.varTypes[funcName][name] if info.typ == typeUnknown { @@ -233,52 +234,45 @@ func (p *parser) printVarTypes(prog *Program) { } } -// If we can't finish resolving after this many iterations, give up -const maxResolveIterations = 10000 - // Resolve unknown variables types and generate variable indexes and // name-to-index mappings for interpreter func (p *parser) resolveVars(prog *Program) { // First go through all unknown types and try to determine the - // type from the parameter type in that function definition. May - // need multiple passes depending on the order of functions. This - // is not particularly efficient, but on realistic programs it's - // not an issue. - for i := 0; ; i++ { - progressed := false - for funcName, infos := range p.varTypes { - for name, info := range infos { - if info.scope == ScopeSpecial || info.typ != typeUnknown { - // It's a special var or type is already known - continue - } - funcIndex, ok := p.functions[info.callName] - if !ok { - // Function being called is a native function - continue - } - // Determine var type based on type of this parameter - // in the called function (if we know that) - paramName := prog.Functions[funcIndex].Params[info.argIndex] - typ := p.varTypes[info.callName][paramName].typ - if typ != typeUnknown { - if p.debugTypes { - fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n", - funcName, name, typ) - } - info.typ = typ - p.varTypes[funcName][name] = info - progressed = true - } + // type from the parameter type in that function definition. + // Iterate through functions in topological order, for example + // if f() calls g(), process g first, then f. + callGraph := make(map[string]map[string]struct{}) + for _, call := range p.userCalls { + if _, ok := callGraph[call.inFunc]; !ok { + callGraph[call.inFunc] = make(map[string]struct{}) + } + callGraph[call.inFunc][call.call.Name] = struct{}{} + } + sortedFuncs := topoSort(callGraph) + for _, funcName := range sortedFuncs { + infos := p.varTypes[funcName] + for name, info := range infos { + if info.scope == ast.ScopeSpecial || info.typ != typeUnknown { + // It's a special var or type is already known + continue + } + funcIndex, ok := p.functions[info.callName] + if !ok { + // Function being called is a native function + continue + } + // Determine var type based on type of this parameter + // in the called function (if we know that) + paramName := prog.Functions[funcIndex].Params[info.argIndex] + typ := p.varTypes[info.callName][paramName].typ + if typ != typeUnknown { + if p.debugTypes { + fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n", + funcName, name, typ) + } + info.typ = typ + p.varTypes[funcName][name] = info } - } - if !progressed { - // If we didn't progress we're done (or trying again is - // not going to help) - break - } - if i >= maxResolveIterations { - panic(p.errorf("too many iterations trying to resolve variable types")) } } @@ -293,8 +287,8 @@ func (p *parser) resolveVars(prog *Program) { panic(p.errorf("global var %q can't also be a function", name)) } var index int - if info.scope == ScopeSpecial { - index = SpecialVarIndex(name) + if info.scope == ast.ScopeSpecial { + index = ast.SpecialVarIndex(name) } else if info.typ == typeArray { index = len(prog.Arrays) prog.Arrays[name] = index @@ -317,7 +311,7 @@ func (p *parser) resolveVars(prog *Program) { } function := prog.Functions[c.call.Index] for i, arg := range c.call.Args { - varExpr, ok := arg.(*VarExpr) + varExpr, ok := arg.(*ast.VarExpr) if !ok { continue } @@ -368,7 +362,7 @@ func (p *parser) resolveVars(prog *Program) { // Check native function calls if c.call.Native { for _, arg := range c.call.Args { - varExpr, ok := arg.(*VarExpr) + varExpr, ok := arg.(*ast.VarExpr) if !ok { // Non-variable expression, must be scalar continue @@ -385,7 +379,7 @@ func (p *parser) resolveVars(prog *Program) { // Check AWK function calls function := prog.Functions[c.call.Index] for i, arg := range c.call.Args { - varExpr, ok := arg.(*VarExpr) + varExpr, ok := arg.(*ast.VarExpr) if !ok { if function.Arrays[i] { panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg)) @@ -441,14 +435,14 @@ func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string { // Record a "multi expression" (comma-separated pseudo-expression // used to allow commas around print/printf arguments). -func (p *parser) multiExpr(exprs []Expr, pos Position) Expr { - expr := &MultiExpr{exprs} +func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr { + expr := &ast.MultiExpr{exprs} p.multiExprs[expr] = pos return expr } // Mark the multi expression as used (by a print/printf statement). -func (p *parser) useMultiExpr(expr *MultiExpr) { +func (p *parser) useMultiExpr(expr *ast.MultiExpr) { delete(p.multiExprs, expr) } @@ -460,7 +454,7 @@ func (p *parser) checkMultiExprs() { // Show error on first comma-separated expression min := Position{1000000000, 1000000000} for _, pos := range p.multiExprs { - if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) { + if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column { min = pos } } diff --git a/play/vendor/github.com/benhoyt/goawk/parser/toposort.go b/play/vendor/github.com/benhoyt/goawk/parser/toposort.go new file mode 100644 index 0000000..90b71fa --- /dev/null +++ b/play/vendor/github.com/benhoyt/goawk/parser/toposort.go @@ -0,0 +1,72 @@ +// Topological sorting + +package parser + +/* +This algorithm is taken from: +https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search + +L ← Empty list that will contain the sorted nodes +while exists nodes without a permanent mark do + select an unmarked node n + visit(n) + +function visit(node n) + if n has a permanent mark then + return + if n has a temporary mark then + stop (not a DAG) + + mark n with a temporary mark + + for each node m with an edge from n to m do + visit(m) + + remove temporary mark from n + mark n with a permanent mark + add n to head of L +*/ + +// Perform a topological sort on the given graph. +func topoSort(graph map[string]map[string]struct{}) []string { + if len(graph) == 0 { + return nil + } + + unmarked := make(map[string]struct{}) + for node := range graph { + unmarked[node] = struct{}{} + } + permMarks := make(map[string]struct{}) + tempMarks := make(map[string]struct{}) + var sorted []string + + var visit func(string) + visit = func(n string) { + if _, ok := permMarks[n]; ok { + return + } + if _, ok := tempMarks[n]; ok { + return + } + tempMarks[n] = struct{}{} + for m := range graph[n] { + visit(m) + } + delete(tempMarks, n) + permMarks[n] = struct{}{} + delete(unmarked, n) + sorted = append(sorted, n) + return + } + + for len(unmarked) > 0 { + var n string + for n = range unmarked { + break + } + visit(n) + } + + return sorted +} diff --git a/play/vendor/modules.txt b/play/vendor/modules.txt index ab9da8b..774cc0d 100644 --- a/play/vendor/modules.txt +++ b/play/vendor/modules.txt @@ -1,6 +1,7 @@ -# github.com/benhoyt/goawk v1.13.0 -## explicit; go 1.13 +# github.com/benhoyt/goawk v1.20.0 +## explicit; go 1.14 github.com/benhoyt/goawk/internal/ast +github.com/benhoyt/goawk/internal/compiler github.com/benhoyt/goawk/interp github.com/benhoyt/goawk/lexer github.com/benhoyt/goawk/parser