play: Update Go, use `embed` for static content

This updates to the latest Go version for building the default OCI
container for the Grawkit playground, and switches to using the `embed`
package for serving static content.
This commit is contained in:
Alex Palaistras 2022-10-20 17:40:31 +01:00
parent 3b8c5eca21
commit e1208d67bd
26 changed files with 5048 additions and 1745 deletions

16
play/Containerfile Normal file
View File

@ -0,0 +1,16 @@
FROM docker.io/golang:1.19 AS builder
WORKDIR /src
COPY play/ /src/
RUN go build -o /play play.go
FROM docker.io/debian:stable-slim
WORKDIR /play
COPY --from=builder /play .
COPY grawkit .
USER nobody
EXPOSE 8080
CMD ["/play/play", "-script-path", "/play/grawkit", "-listen-address", ":8080"]

View File

@ -1,17 +0,0 @@
FROM golang:1.13 AS builder
WORKDIR /mnt
COPY play/go.mod play/go.sum play/play.go /mnt/
RUN go build -o play play.go
FROM debian:stable-slim
WORKDIR /play
COPY --from=builder /mnt/play .
COPY grawkit .
COPY play/static static
USER nobody
EXPOSE 8080
CMD ["/play/play", "-script-path", "grawkit", "-listen-address", ":8080"]

View File

@ -1,5 +1,5 @@
module github.com/deuill/grawkit/play
go 1.17
go 1.19
require github.com/benhoyt/goawk v1.13.0
require github.com/benhoyt/goawk v1.20.0

View File

@ -1,2 +1,2 @@
github.com/benhoyt/goawk v1.13.0 h1:/Iu42ErHsT5vHrpWyewpI98hB2PHBk66o+oLZs4drPs=
github.com/benhoyt/goawk v1.13.0/go.mod h1:UKzPyqDh9O7HZ/ftnU33MYlAP2rPbXdwQ+OVlEOPsjM=
github.com/benhoyt/goawk v1.20.0 h1:oz81agTfP/8Z7afMvmOwX4Ms9qTtGhZxPEzHCycIFds=
github.com/benhoyt/goawk v1.20.0/go.mod h1:Dp3jBsApuiItYR9atsCm//q/70OnqjihLh5WkU6eW7U=

View File

@ -5,7 +5,6 @@ import (
"bytes"
"errors"
"flag"
"io/ioutil"
"log"
"net"
"net/http"
@ -17,6 +16,9 @@ import (
"text/template"
"time"
// Internal packages.
"github.com/deuill/grawkit/play/static"
// Third-party packages
"github.com/benhoyt/goawk/interp"
"github.com/benhoyt/goawk/parser"
@ -33,21 +35,13 @@ const (
)
var (
// Command-line flags to parse.
scriptPath = flag.String("script-path", "../grawkit", "The path to the Grawkit script")
staticDir = flag.String("static-dir", "static", "The directory under which static files can be found")
listenAddress = flag.String("listen-address", "localhost:8080", "The default address to listen on")
index *template.Template // The base template to render.
program *parser.Program // The parsed version of the Grawkit script.
)
type templateData struct {
Content string
Preview string
Error string
}
// ParseContent accepts un-filtered POST form content, and returns the content to render as a string.
// An error is returned if the content is missing or otherwise invalid.
func parseContent(form url.Values) (string, error) {
@ -70,7 +64,11 @@ func parseContent(form url.Values) (string, error) {
func handleRequest(w http.ResponseWriter, r *http.Request) {
// Handle template rendering on root path.
if r.URL.Path == "/" {
var data templateData
var data struct {
Content string
Preview string
Error string
}
var outbuf, errbuf bytes.Buffer
switch r.Method {
@ -117,22 +115,8 @@ func handleRequest(w http.ResponseWriter, r *http.Request) {
return
}
// Get sanitized filename for request path given.
name := path.Join(*staticDir, path.Clean(r.URL.Path))
// Check if a file exists for the path requested.
stat, err := os.Stat(name)
if os.IsNotExist(err) || stat != nil && stat.IsDir() {
http.NotFound(w, r)
return
} else if err != nil {
code := http.StatusInternalServerError
http.Error(w, http.StatusText(code), code)
return
}
// Serve file as fallback.
http.ServeFile(w, r, name)
http.FileServer(http.FS(static.FS)).ServeHTTP(w, r)
}
// Setup reads configuration flags and initializes global state for the service, returning an error
@ -144,17 +128,17 @@ func setup() error {
// Set up and parse known template files.
var err error
var files = []string{
path.Join(*staticDir, "template", "index.template"),
path.Join(*staticDir, "template", "default-content.template"),
path.Join(*staticDir, "template", "default-preview.template"),
path.Join("template", "index.template"),
path.Join("template", "default-content.template"),
path.Join("template", "default-preview.template"),
}
if index, err = template.ParseFiles(files...); err != nil {
if index, err = template.ParseFS(static.FS, files...); err != nil {
return err
}
// Parse Grawkit script into concrete representation.
if script, err := ioutil.ReadFile(*scriptPath); err != nil {
if script, err := os.ReadFile(*scriptPath); err != nil {
return err
} else if program, err = parser.ParseProgram(script, nil); err != nil {
return err

View File

@ -51,10 +51,8 @@ a:hover, a:active {
border: 0.2rem solid #333;
border-radius: 0;
color: #fefefe;
font-family: monospace;
font-weight: bold;
padding: 0 1rem;
text-transform: none;
transition: background 0.2s ease, border 0.2s ease, color 0.2s ease;
}

9
play/static/static.go Normal file
View File

@ -0,0 +1,9 @@
package static
import "embed"
// FS is an [fs.FS] implementation containing all static files needed for serving the Grawkit
// playground.
//
//go:embed *
var FS embed.FS

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019 Ben Hoyt
Copyright (c) 2022 Ben Hoyt
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -10,6 +10,35 @@ import (
. "github.com/benhoyt/goawk/lexer"
)
// Program is an entire AWK program.
type Program struct {
Begin []Stmts
Actions []Action
End []Stmts
Functions []Function
Scalars map[string]int
Arrays map[string]int
}
// String returns an indented, pretty-printed version of the parsed
// program.
func (p *Program) String() string {
parts := []string{}
for _, ss := range p.Begin {
parts = append(parts, "BEGIN {\n"+ss.String()+"}")
}
for _, a := range p.Actions {
parts = append(parts, a.String())
}
for _, ss := range p.End {
parts = append(parts, "END {\n"+ss.String()+"}")
}
for _, function := range p.Functions {
parts = append(parts, function.String())
}
return strings.Join(parts, "\n\n")
}
// Stmts is a block containing multiple statements.
type Stmts []Stmt
@ -53,24 +82,25 @@ type Expr interface {
}
// All these types implement the Expr interface.
func (e *FieldExpr) expr() {}
func (e *UnaryExpr) expr() {}
func (e *BinaryExpr) expr() {}
func (e *ArrayExpr) expr() {}
func (e *InExpr) expr() {}
func (e *CondExpr) expr() {}
func (e *NumExpr) expr() {}
func (e *StrExpr) expr() {}
func (e *RegExpr) expr() {}
func (e *VarExpr) expr() {}
func (e *IndexExpr) expr() {}
func (e *AssignExpr) expr() {}
func (e *AugAssignExpr) expr() {}
func (e *IncrExpr) expr() {}
func (e *CallExpr) expr() {}
func (e *UserCallExpr) expr() {}
func (e *MultiExpr) expr() {}
func (e *GetlineExpr) expr() {}
func (e *FieldExpr) expr() {}
func (e *NamedFieldExpr) expr() {}
func (e *UnaryExpr) expr() {}
func (e *BinaryExpr) expr() {}
func (e *ArrayExpr) expr() {}
func (e *InExpr) expr() {}
func (e *CondExpr) expr() {}
func (e *NumExpr) expr() {}
func (e *StrExpr) expr() {}
func (e *RegExpr) expr() {}
func (e *VarExpr) expr() {}
func (e *IndexExpr) expr() {}
func (e *AssignExpr) expr() {}
func (e *AugAssignExpr) expr() {}
func (e *IncrExpr) expr() {}
func (e *CallExpr) expr() {}
func (e *UserCallExpr) expr() {}
func (e *MultiExpr) expr() {}
func (e *GetlineExpr) expr() {}
// FieldExpr is an expression like $0.
type FieldExpr struct {
@ -81,6 +111,15 @@ func (e *FieldExpr) String() string {
return "$" + e.Index.String()
}
// NamedFieldExpr is an expression like @"name".
type NamedFieldExpr struct {
Field Expr
}
func (e *NamedFieldExpr) String() string {
return "@" + e.Field.String()
}
// UnaryExpr is an expression like -1234.
type UnaryExpr struct {
Op Token
@ -155,7 +194,11 @@ type NumExpr struct {
}
func (e *NumExpr) String() string {
return fmt.Sprintf("%.6g", e.Value)
if e.Value == float64(int(e.Value)) {
return strconv.Itoa(int(e.Value))
} else {
return fmt.Sprintf("%.6g", e.Value)
}
}
// StrExpr is a literal string like "foo".

View File

@ -2,6 +2,10 @@
package ast
import (
"fmt"
)
const (
V_ILLEGAL = iota
V_ARGC
@ -9,11 +13,13 @@ const (
V_FILENAME
V_FNR
V_FS
V_INPUTMODE
V_NF
V_NR
V_OFMT
V_OFS
V_ORS
V_OUTPUTMODE
V_RLENGTH
V_RS
V_RSTART
@ -24,21 +30,23 @@ const (
)
var specialVars = map[string]int{
"ARGC": V_ARGC,
"CONVFMT": V_CONVFMT,
"FILENAME": V_FILENAME,
"FNR": V_FNR,
"FS": V_FS,
"NF": V_NF,
"NR": V_NR,
"OFMT": V_OFMT,
"OFS": V_OFS,
"ORS": V_ORS,
"RLENGTH": V_RLENGTH,
"RS": V_RS,
"RSTART": V_RSTART,
"RT": V_RT,
"SUBSEP": V_SUBSEP,
"ARGC": V_ARGC,
"CONVFMT": V_CONVFMT,
"FILENAME": V_FILENAME,
"FNR": V_FNR,
"FS": V_FS,
"INPUTMODE": V_INPUTMODE,
"NF": V_NF,
"NR": V_NR,
"OFMT": V_OFMT,
"OFS": V_OFS,
"ORS": V_ORS,
"OUTPUTMODE": V_OUTPUTMODE,
"RLENGTH": V_RLENGTH,
"RS": V_RS,
"RSTART": V_RSTART,
"RT": V_RT,
"SUBSEP": V_SUBSEP,
}
// SpecialVarIndex returns the "index" of the special variable, or 0
@ -46,3 +54,47 @@ var specialVars = map[string]int{
func SpecialVarIndex(name string) int {
return specialVars[name]
}
// SpecialVarName returns the name of the special variable by index.
func SpecialVarName(index int) string {
switch index {
case V_ILLEGAL:
return "ILLEGAL"
case V_ARGC:
return "ARGC"
case V_CONVFMT:
return "CONVFMT"
case V_FILENAME:
return "FILENAME"
case V_FNR:
return "FNR"
case V_FS:
return "FS"
case V_INPUTMODE:
return "INPUTMODE"
case V_NF:
return "NF"
case V_NR:
return "NR"
case V_OFMT:
return "OFMT"
case V_OFS:
return "OFS"
case V_ORS:
return "ORS"
case V_OUTPUTMODE:
return "OUTPUTMODE"
case V_RLENGTH:
return "RLENGTH"
case V_RS:
return "RS"
case V_RSTART:
return "RSTART"
case V_RT:
return "RT"
case V_SUBSEP:
return "SUBSEP"
default:
return fmt.Sprintf("<unknown special var %d>", index)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,495 @@
// Disassembles compiled program to text assembly instructions
package compiler
import (
"fmt"
"io"
"strings"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/lexer"
)
// Disassemble writes a human-readable form of the program's virtual machine
// instructions to writer.
func (p *Program) Disassemble(writer io.Writer) error {
if p.Begin != nil {
d := &disassembler{
program: p,
writer: writer,
code: p.Begin,
nativeFuncNames: p.nativeFuncNames,
}
err := d.disassemble("BEGIN")
if err != nil {
return err
}
}
for _, action := range p.Actions {
switch len(action.Pattern) {
case 0:
// Nothing to do here.
case 1:
d := &disassembler{
program: p,
writer: writer,
code: action.Pattern[0],
nativeFuncNames: p.nativeFuncNames,
}
err := d.disassemble("pattern")
if err != nil {
return err
}
case 2:
d := &disassembler{
program: p,
writer: writer,
code: action.Pattern[0],
nativeFuncNames: p.nativeFuncNames,
}
err := d.disassemble("start")
if err != nil {
return err
}
d = &disassembler{
program: p,
writer: writer,
code: action.Pattern[1],
nativeFuncNames: p.nativeFuncNames,
}
err = d.disassemble("stop")
if err != nil {
return err
}
}
if len(action.Body) > 0 {
d := &disassembler{
program: p,
writer: writer,
code: action.Body,
nativeFuncNames: p.nativeFuncNames,
}
err := d.disassemble("{ body }")
if err != nil {
return err
}
}
}
if p.End != nil {
d := &disassembler{
program: p,
writer: writer,
code: p.End,
nativeFuncNames: p.nativeFuncNames,
}
err := d.disassemble("END")
if err != nil {
return err
}
}
for i, f := range p.Functions {
d := &disassembler{
program: p,
writer: writer,
code: f.Body,
nativeFuncNames: p.nativeFuncNames,
funcIndex: i,
}
err := d.disassemble("function " + f.Name)
if err != nil {
return err
}
}
return nil
}
// Disassembles a single block of opcodes.
type disassembler struct {
program *Program
writer io.Writer
code []Opcode
nativeFuncNames []string
funcIndex int
ip int
opAddr int
err error
}
func (d *disassembler) disassemble(prefix string) error {
if prefix != "" {
d.writef(" // %s\n", prefix)
}
for d.ip < len(d.code) && d.err == nil {
d.opAddr = d.ip
op := d.fetch()
switch op {
case Num:
index := d.fetch()
num := d.program.Nums[index]
if num == float64(int(num)) {
d.writeOpf("Num %d (%d)", int(num), index)
} else {
d.writeOpf("Num %.6g (%d)", num, index)
}
case Str:
index := d.fetch()
d.writeOpf("Str %q (%d)", d.program.Strs[index], index)
case FieldInt:
index := d.fetch()
d.writeOpf("FieldInt %d", index)
case FieldByNameStr:
index := d.fetch()
d.writeOpf("FieldByNameStr %q (%d)", d.program.Strs[index], index)
case Global:
index := d.fetch()
d.writeOpf("Global %s", d.program.scalarNames[index])
case Local:
index := int(d.fetch())
d.writeOpf("Local %s", d.localName(index))
case Special:
index := d.fetch()
d.writeOpf("Special %s", ast.SpecialVarName(int(index)))
case ArrayGlobal:
arrayIndex := d.fetch()
d.writeOpf("ArrayGlobal %s", d.program.arrayNames[arrayIndex])
case ArrayLocal:
arrayIndex := d.fetch()
d.writeOpf("ArrayLocal %s", d.localArrayName(int(arrayIndex)))
case InGlobal:
arrayIndex := d.fetch()
d.writeOpf("InGlobal %s", d.program.arrayNames[arrayIndex])
case InLocal:
arrayIndex := int(d.fetch())
d.writeOpf("InLocal %s", d.localArrayName(arrayIndex))
case AssignGlobal:
index := d.fetch()
d.writeOpf("AssignGlobal %s", d.program.scalarNames[index])
case AssignLocal:
index := int(d.fetch())
d.writeOpf("AssignLocal %s", d.localName(index))
case AssignSpecial:
index := d.fetch()
d.writeOpf("AssignSpecial %s", ast.SpecialVarName(int(index)))
case AssignArrayGlobal:
arrayIndex := d.fetch()
d.writeOpf("AssignArrayGlobal %s", d.program.arrayNames[arrayIndex])
case AssignArrayLocal:
arrayIndex := int(d.fetch())
d.writeOpf("AssignArrayLocal %s", d.localArrayName(arrayIndex))
case Delete:
arrayScope := ast.VarScope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("Delete %s", d.arrayName(arrayScope, arrayIndex))
case DeleteAll:
arrayScope := ast.VarScope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("DeleteAll %s", d.arrayName(arrayScope, arrayIndex))
case IncrField:
amount := d.fetch()
d.writeOpf("IncrField %d", amount)
case IncrGlobal:
amount := d.fetch()
index := d.fetch()
d.writeOpf("IncrGlobal %d %s", amount, d.program.scalarNames[index])
case IncrLocal:
amount := d.fetch()
index := int(d.fetch())
d.writeOpf("IncrLocal %d %s", amount, d.localName(index))
case IncrSpecial:
amount := d.fetch()
index := d.fetch()
d.writeOpf("IncrSpecial %d %s", amount, ast.SpecialVarName(int(index)))
case IncrArrayGlobal:
amount := d.fetch()
arrayIndex := d.fetch()
d.writeOpf("IncrArrayGlobal %d %s", amount, d.program.arrayNames[arrayIndex])
case IncrArrayLocal:
amount := d.fetch()
arrayIndex := int(d.fetch())
d.writeOpf("IncrArrayLocal %d %s", amount, d.localArrayName(arrayIndex))
case AugAssignField:
operation := AugOp(d.fetch())
d.writeOpf("AugAssignField %s", operation)
case AugAssignGlobal:
operation := AugOp(d.fetch())
index := d.fetch()
d.writeOpf("AugAssignGlobal %s %s", operation, d.program.scalarNames[index])
case AugAssignLocal:
operation := AugOp(d.fetch())
index := int(d.fetch())
d.writeOpf("AugAssignLocal %s %s", operation, d.localName(index))
case AugAssignSpecial:
operation := AugOp(d.fetch())
index := d.fetch()
d.writeOpf("AugAssignSpecial %s %d", operation, ast.SpecialVarName(int(index)))
case AugAssignArrayGlobal:
operation := AugOp(d.fetch())
arrayIndex := d.fetch()
d.writeOpf("AugAssignArrayGlobal %s %s", operation, d.program.arrayNames[arrayIndex])
case AugAssignArrayLocal:
operation := AugOp(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("AugAssignArrayLocal %s %s", operation, d.localArrayName(arrayIndex))
case Regex:
regexIndex := d.fetch()
d.writeOpf("Regex %q (%d)", d.program.Regexes[regexIndex], regexIndex)
case IndexMulti:
num := d.fetch()
d.writeOpf("IndexMulti %d", num)
case ConcatMulti:
num := d.fetch()
d.writeOpf("ConcatMulti %d", num)
case Jump:
offset := d.fetch()
d.writeOpf("Jump 0x%04x", d.ip+int(offset))
case JumpFalse:
offset := d.fetch()
d.writeOpf("JumpFalse 0x%04x", d.ip+int(offset))
case JumpTrue:
offset := d.fetch()
d.writeOpf("JumpTrue 0x%04x", d.ip+int(offset))
case JumpEquals:
offset := d.fetch()
d.writeOpf("JumpEquals 0x%04x", d.ip+int(offset))
case JumpNotEquals:
offset := d.fetch()
d.writeOpf("JumpNotEquals 0x%04x", d.ip+int(offset))
case JumpLess:
offset := d.fetch()
d.writeOpf("JumpLess 0x%04x", d.ip+int(offset))
case JumpGreater:
offset := d.fetch()
d.writeOpf("JumpGreater 0x%04x", d.ip+int(offset))
case JumpLessOrEqual:
offset := d.fetch()
d.writeOpf("JumpLessOrEqual 0x%04x", d.ip+int(offset))
case JumpGreaterOrEqual:
offset := d.fetch()
d.writeOpf("JumpGreaterOrEqual 0x%04x", d.ip+int(offset))
case ForIn:
varScope := ast.VarScope(d.fetch())
varIndex := int(d.fetch())
arrayScope := ast.VarScope(d.fetch())
arrayIndex := int(d.fetch())
offset := d.fetch()
d.writeOpf("ForIn %s %s 0x%04x", d.varName(varScope, varIndex), d.arrayName(arrayScope, arrayIndex), d.ip+int(offset))
case CallBuiltin:
builtinOp := BuiltinOp(d.fetch())
d.writeOpf("CallBuiltin %s", builtinOp)
case CallSplit:
arrayScope := ast.VarScope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("CallSplit %s", d.arrayName(arrayScope, arrayIndex))
case CallSplitSep:
arrayScope := ast.VarScope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("CallSplitSep %s", d.arrayName(arrayScope, arrayIndex))
case CallSprintf:
numArgs := d.fetch()
d.writeOpf("CallSprintf %d", numArgs)
case CallUser:
funcIndex := d.fetch()
numArrayArgs := int(d.fetch())
var arrayArgs []string
for i := 0; i < numArrayArgs; i++ {
arrayScope := ast.VarScope(d.fetch())
arrayIndex := int(d.fetch())
arrayArgs = append(arrayArgs, d.arrayName(arrayScope, arrayIndex))
}
d.writeOpf("CallUser %s [%s]", d.program.Functions[funcIndex].Name, strings.Join(arrayArgs, ", "))
case CallNative:
funcIndex := d.fetch()
numArgs := d.fetch()
d.writeOpf("CallNative %s %d", d.nativeFuncNames[funcIndex], numArgs)
case Nulls:
numNulls := d.fetch()
d.writeOpf("Nulls %d", numNulls)
case Print:
numArgs := d.fetch()
redirect := lexer.Token(d.fetch())
if redirect == lexer.ILLEGAL {
d.writeOpf("Print %d", numArgs)
} else {
d.writeOpf("Print %d %s", numArgs, redirect)
}
case Printf:
numArgs := d.fetch()
redirect := lexer.Token(d.fetch())
if redirect == lexer.ILLEGAL {
d.writeOpf("Printf %d", numArgs)
} else {
d.writeOpf("Printf %d %s", numArgs, redirect)
}
case Getline:
redirect := lexer.Token(d.fetch())
d.writeOpf("Getline %s", redirect)
case GetlineField:
redirect := lexer.Token(d.fetch())
d.writeOpf("GetlineField %s", redirect)
case GetlineGlobal:
redirect := lexer.Token(d.fetch())
index := d.fetch()
d.writeOpf("GetlineGlobal %s %s", redirect, d.program.scalarNames[index])
case GetlineLocal:
redirect := lexer.Token(d.fetch())
index := int(d.fetch())
d.writeOpf("GetlineLocal %s %s", redirect, d.localName(index))
case GetlineSpecial:
redirect := lexer.Token(d.fetch())
index := d.fetch()
d.writeOpf("GetlineSpecial %s %s", redirect, ast.SpecialVarName(int(index)))
case GetlineArray:
redirect := lexer.Token(d.fetch())
arrayScope := ast.VarScope(d.fetch())
arrayIndex := int(d.fetch())
d.writeOpf("GetlineArray %s %s", redirect, d.arrayName(arrayScope, arrayIndex))
default:
// Handles all other opcodes with no arguments
d.writeOpf("%s", op)
}
}
d.writef("\n")
return d.err
}
// Fetch the next opcode and increment the "instruction pointer".
func (d *disassembler) fetch() Opcode {
op := d.code[d.ip]
d.ip++
return op
}
// Write formatted string to the disassembly output.
func (d *disassembler) writef(format string, args ...interface{}) {
if d.err != nil {
return
}
_, d.err = fmt.Fprintf(d.writer, format, args...)
}
// Write formatted opcode (with address and newline) to disassembly output.
func (d *disassembler) writeOpf(format string, args ...interface{}) {
if d.err != nil {
return
}
addrStr := fmt.Sprintf("%04x", d.opAddr)
_, d.err = fmt.Fprintf(d.writer, addrStr+" "+format+"\n", args...)
}
// Return the scalar variable name described by scope and index.
func (d *disassembler) varName(scope ast.VarScope, index int) string {
switch scope {
case ast.ScopeGlobal:
return d.program.scalarNames[index]
case ast.ScopeLocal:
return d.localName(index)
default: // ScopeSpecial
return ast.SpecialVarName(index)
}
}
// Return the local variable name with the given index.
func (d *disassembler) localName(index int) string {
f := d.program.Functions[d.funcIndex]
n := 0
for i, p := range f.Params {
if f.Arrays[i] {
continue
}
if n == index {
return p
}
n++
}
panic(fmt.Sprintf("unexpected local variable index %d", index))
}
// Return the array variable name describes by scope and index.
func (d *disassembler) arrayName(scope ast.VarScope, index int) string {
if scope == ast.ScopeLocal {
return d.localArrayName(index)
}
return d.program.arrayNames[index]
}
// Return the local array name with the given index.
func (d *disassembler) localArrayName(index int) string {
f := d.program.Functions[d.funcIndex]
n := 0
for i, p := range f.Params {
if !f.Arrays[i] {
continue
}
if n == index {
return p
}
n++
}
panic(fmt.Sprintf("unexpected local array index %d", index))
}

View File

@ -0,0 +1,174 @@
// Code generated by "stringer -type=Opcode,AugOp,BuiltinOp"; DO NOT EDIT.
package compiler
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[Nop-0]
_ = x[Num-1]
_ = x[Str-2]
_ = x[Dupe-3]
_ = x[Drop-4]
_ = x[Swap-5]
_ = x[Field-6]
_ = x[FieldInt-7]
_ = x[FieldByName-8]
_ = x[FieldByNameStr-9]
_ = x[Global-10]
_ = x[Local-11]
_ = x[Special-12]
_ = x[ArrayGlobal-13]
_ = x[ArrayLocal-14]
_ = x[InGlobal-15]
_ = x[InLocal-16]
_ = x[AssignField-17]
_ = x[AssignGlobal-18]
_ = x[AssignLocal-19]
_ = x[AssignSpecial-20]
_ = x[AssignArrayGlobal-21]
_ = x[AssignArrayLocal-22]
_ = x[Delete-23]
_ = x[DeleteAll-24]
_ = x[IncrField-25]
_ = x[IncrGlobal-26]
_ = x[IncrLocal-27]
_ = x[IncrSpecial-28]
_ = x[IncrArrayGlobal-29]
_ = x[IncrArrayLocal-30]
_ = x[AugAssignField-31]
_ = x[AugAssignGlobal-32]
_ = x[AugAssignLocal-33]
_ = x[AugAssignSpecial-34]
_ = x[AugAssignArrayGlobal-35]
_ = x[AugAssignArrayLocal-36]
_ = x[Regex-37]
_ = x[IndexMulti-38]
_ = x[ConcatMulti-39]
_ = x[Add-40]
_ = x[Subtract-41]
_ = x[Multiply-42]
_ = x[Divide-43]
_ = x[Power-44]
_ = x[Modulo-45]
_ = x[Equals-46]
_ = x[NotEquals-47]
_ = x[Less-48]
_ = x[Greater-49]
_ = x[LessOrEqual-50]
_ = x[GreaterOrEqual-51]
_ = x[Concat-52]
_ = x[Match-53]
_ = x[NotMatch-54]
_ = x[Not-55]
_ = x[UnaryMinus-56]
_ = x[UnaryPlus-57]
_ = x[Boolean-58]
_ = x[Jump-59]
_ = x[JumpFalse-60]
_ = x[JumpTrue-61]
_ = x[JumpEquals-62]
_ = x[JumpNotEquals-63]
_ = x[JumpLess-64]
_ = x[JumpGreater-65]
_ = x[JumpLessOrEqual-66]
_ = x[JumpGreaterOrEqual-67]
_ = x[Next-68]
_ = x[Exit-69]
_ = x[ForIn-70]
_ = x[BreakForIn-71]
_ = x[CallBuiltin-72]
_ = x[CallSplit-73]
_ = x[CallSplitSep-74]
_ = x[CallSprintf-75]
_ = x[CallUser-76]
_ = x[CallNative-77]
_ = x[Return-78]
_ = x[ReturnNull-79]
_ = x[Nulls-80]
_ = x[Print-81]
_ = x[Printf-82]
_ = x[Getline-83]
_ = x[GetlineField-84]
_ = x[GetlineGlobal-85]
_ = x[GetlineLocal-86]
_ = x[GetlineSpecial-87]
_ = x[GetlineArray-88]
_ = x[EndOpcode-89]
}
const _Opcode_name = "NopNumStrDupeDropSwapFieldFieldIntFieldByNameFieldByNameStrGlobalLocalSpecialArrayGlobalArrayLocalInGlobalInLocalAssignFieldAssignGlobalAssignLocalAssignSpecialAssignArrayGlobalAssignArrayLocalDeleteDeleteAllIncrFieldIncrGlobalIncrLocalIncrSpecialIncrArrayGlobalIncrArrayLocalAugAssignFieldAugAssignGlobalAugAssignLocalAugAssignSpecialAugAssignArrayGlobalAugAssignArrayLocalRegexIndexMultiConcatMultiAddSubtractMultiplyDividePowerModuloEqualsNotEqualsLessGreaterLessOrEqualGreaterOrEqualConcatMatchNotMatchNotUnaryMinusUnaryPlusBooleanJumpJumpFalseJumpTrueJumpEqualsJumpNotEqualsJumpLessJumpGreaterJumpLessOrEqualJumpGreaterOrEqualNextExitForInBreakForInCallBuiltinCallSplitCallSplitSepCallSprintfCallUserCallNativeReturnReturnNullNullsPrintPrintfGetlineGetlineFieldGetlineGlobalGetlineLocalGetlineSpecialGetlineArrayEndOpcode"
var _Opcode_index = [...]uint16{0, 3, 6, 9, 13, 17, 21, 26, 34, 45, 59, 65, 70, 77, 88, 98, 106, 113, 124, 136, 147, 160, 177, 193, 199, 208, 217, 227, 236, 247, 262, 276, 290, 305, 319, 335, 355, 374, 379, 389, 400, 403, 411, 419, 425, 430, 436, 442, 451, 455, 462, 473, 487, 493, 498, 506, 509, 519, 528, 535, 539, 548, 556, 566, 579, 587, 598, 613, 631, 635, 639, 644, 654, 665, 674, 686, 697, 705, 715, 721, 731, 736, 741, 747, 754, 766, 779, 791, 805, 817, 826}
func (i Opcode) String() string {
if i < 0 || i >= Opcode(len(_Opcode_index)-1) {
return "Opcode(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Opcode_name[_Opcode_index[i]:_Opcode_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[AugOpAdd-0]
_ = x[AugOpSub-1]
_ = x[AugOpMul-2]
_ = x[AugOpDiv-3]
_ = x[AugOpPow-4]
_ = x[AugOpMod-5]
}
const _AugOp_name = "AugOpAddAugOpSubAugOpMulAugOpDivAugOpPowAugOpMod"
var _AugOp_index = [...]uint8{0, 8, 16, 24, 32, 40, 48}
func (i AugOp) String() string {
if i < 0 || i >= AugOp(len(_AugOp_index)-1) {
return "AugOp(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _AugOp_name[_AugOp_index[i]:_AugOp_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[BuiltinAtan2-0]
_ = x[BuiltinClose-1]
_ = x[BuiltinCos-2]
_ = x[BuiltinExp-3]
_ = x[BuiltinFflush-4]
_ = x[BuiltinFflushAll-5]
_ = x[BuiltinGsub-6]
_ = x[BuiltinIndex-7]
_ = x[BuiltinInt-8]
_ = x[BuiltinLength-9]
_ = x[BuiltinLengthArg-10]
_ = x[BuiltinLog-11]
_ = x[BuiltinMatch-12]
_ = x[BuiltinRand-13]
_ = x[BuiltinSin-14]
_ = x[BuiltinSqrt-15]
_ = x[BuiltinSrand-16]
_ = x[BuiltinSrandSeed-17]
_ = x[BuiltinSub-18]
_ = x[BuiltinSubstr-19]
_ = x[BuiltinSubstrLength-20]
_ = x[BuiltinSystem-21]
_ = x[BuiltinTolower-22]
_ = x[BuiltinToupper-23]
}
const _BuiltinOp_name = "BuiltinAtan2BuiltinCloseBuiltinCosBuiltinExpBuiltinFflushBuiltinFflushAllBuiltinGsubBuiltinIndexBuiltinIntBuiltinLengthBuiltinLengthArgBuiltinLogBuiltinMatchBuiltinRandBuiltinSinBuiltinSqrtBuiltinSrandBuiltinSrandSeedBuiltinSubBuiltinSubstrBuiltinSubstrLengthBuiltinSystemBuiltinTolowerBuiltinToupper"
var _BuiltinOp_index = [...]uint16{0, 12, 24, 34, 44, 57, 73, 84, 96, 106, 119, 135, 145, 157, 168, 178, 189, 201, 217, 227, 240, 259, 272, 286, 300}
func (i BuiltinOp) String() string {
if i < 0 || i >= BuiltinOp(len(_BuiltinOp_index)-1) {
return "BuiltinOp(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _BuiltinOp_name[_BuiltinOp_index[i]:_BuiltinOp_index[i+1]]
}

View File

@ -0,0 +1,180 @@
package compiler
//go:generate go run golang.org/x/tools/cmd/[email protected] -type=Opcode,AugOp,BuiltinOp
// Opcode represents a single virtual machine instruction (or argument). The
// comments beside each opcode show any arguments that instruction consumes.
//
// Normally this is called "bytecode", but I've avoided that term here as each
// opcode is a 32-bit word, not an 8-bit byte.
//
// I tested various bit widths, and I believe 32 bit was the fastest, but also
// means we don't have to worry about jump offsets overflowing. That's tested
// in the compiler, but who's going to have an AWK program bigger than 2GB?
type Opcode int32
const (
Nop Opcode = iota
// Stack operations
Num // numIndex
Str // strIndex
Dupe
Drop
Swap
// Fetch a field, variable, or array item
Field
FieldInt // index
FieldByName
FieldByNameStr // strIndex
Global // index
Local // index
Special // index
ArrayGlobal // arrayIndex
ArrayLocal // arrayIndex
InGlobal // arrayIndex
InLocal // arrayIndex
// Assign a field, variable, or array item
AssignField
AssignGlobal // index
AssignLocal // index
AssignSpecial // index
AssignArrayGlobal // arrayIndex
AssignArrayLocal // arrayIndex
// Delete statement
Delete // arrayScope arrayIndex
DeleteAll // arrayScope arrayIndex
// Post-increment and post-decrement
IncrField // amount
IncrGlobal // amount index
IncrLocal // amount index
IncrSpecial // amount index
IncrArrayGlobal // amount arrayIndex
IncrArrayLocal // amount arrayIndex
// Augmented assignment (also used for pre-increment and pre-decrement)
AugAssignField // augOp
AugAssignGlobal // augOp index
AugAssignLocal // augOp index
AugAssignSpecial // augOp index
AugAssignArrayGlobal // augOp arrayIndex
AugAssignArrayLocal // augOp arrayIndex
// Stand-alone regex expression /foo/
Regex // regexIndex
// Multi-index concatenation
IndexMulti // num
// Multi-value concatenation
ConcatMulti // num
// Binary operators
Add
Subtract
Multiply
Divide
Power
Modulo
Equals
NotEquals
Less
Greater
LessOrEqual
GreaterOrEqual
Concat
Match
NotMatch
// Unary operators
Not
UnaryMinus
UnaryPlus
Boolean
// Control flow
Jump // offset
JumpFalse // offset
JumpTrue // offset
JumpEquals // offset
JumpNotEquals // offset
JumpLess // offset
JumpGreater // offset
JumpLessOrEqual // offset
JumpGreaterOrEqual // offset
Next
Exit
ForIn // varScope varIndex arrayScope arrayIndex offset
BreakForIn
// Builtin functions
CallBuiltin // builtinOp
CallSplit // arrayScope arrayIndex
CallSplitSep // arrayScope arrayIndex
CallSprintf // numArgs
// User and native functions
CallUser // funcIndex numArrayArgs [arrayScope1 arrayIndex1 ...]
CallNative // funcIndex numArgs
Return
ReturnNull
Nulls // numNulls
// Print, printf, and getline
Print // numArgs redirect
Printf // numArgs redirect
Getline // redirect
GetlineField // redirect
GetlineGlobal // redirect index
GetlineLocal // redirect index
GetlineSpecial // redirect index
GetlineArray // redirect arrayScope arrayIndex
EndOpcode
)
// AugOp represents an augmented assignment operation.
type AugOp Opcode
const (
AugOpAdd AugOp = iota
AugOpSub
AugOpMul
AugOpDiv
AugOpPow
AugOpMod
)
// BuiltinOp represents a builtin function call.
type BuiltinOp Opcode
const (
BuiltinAtan2 BuiltinOp = iota
BuiltinClose
BuiltinCos
BuiltinExp
BuiltinFflush
BuiltinFflushAll
BuiltinGsub
BuiltinIndex
BuiltinInt
BuiltinLength
BuiltinLengthArg
BuiltinLog
BuiltinMatch
BuiltinRand
BuiltinSin
BuiltinSqrt
BuiltinSrand
BuiltinSrandSeed
BuiltinSub
BuiltinSubstr
BuiltinSubstrLength
BuiltinSystem
BuiltinTolower
BuiltinToupper
)

View File

@ -1,4 +1,4 @@
// Evaluate builtin and user-defined function calls
// Call native Go functions; helpers for some builtin function calls.
package interp
@ -6,391 +6,19 @@ import (
"bytes"
"errors"
"fmt"
"io"
"math"
"os/exec"
"reflect"
"sort"
"strconv"
"strings"
"time"
"unicode/utf8"
. "github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
)
// Call builtin function specified by "op" with given args
func (p *interp) callBuiltin(op Token, argExprs []Expr) (value, error) {
// split() has an array arg (not evaluated) and [g]sub() have an
// lvalue arg, so handle them as special cases
switch op {
case F_SPLIT:
strValue, err := p.eval(argExprs[0])
if err != nil {
return null(), err
}
str := p.toString(strValue)
var fieldSep string
if len(argExprs) == 3 {
sepValue, err := p.eval(argExprs[2])
if err != nil {
return null(), err
}
fieldSep = p.toString(sepValue)
} else {
fieldSep = p.fieldSep
}
arrayExpr := argExprs[1].(*ArrayExpr)
n, err := p.split(str, arrayExpr.Scope, arrayExpr.Index, fieldSep)
if err != nil {
return null(), err
}
return num(float64(n)), nil
case F_SUB, F_GSUB:
regexValue, err := p.eval(argExprs[0])
if err != nil {
return null(), err
}
regex := p.toString(regexValue)
replValue, err := p.eval(argExprs[1])
if err != nil {
return null(), err
}
repl := p.toString(replValue)
var in string
if len(argExprs) == 3 {
inValue, err := p.eval(argExprs[2])
if err != nil {
return null(), err
}
in = p.toString(inValue)
} else {
in = p.line
}
out, n, err := p.sub(regex, repl, in, op == F_GSUB)
if err != nil {
return null(), err
}
if len(argExprs) == 3 {
err := p.assign(argExprs[2], str(out))
if err != nil {
return null(), err
}
} else {
p.setLine(out, true)
}
return num(float64(n)), nil
}
// Now evaluate the argExprs (calls with up to 7 args don't
// require heap allocation)
args := make([]value, 0, 7)
for _, a := range argExprs {
arg, err := p.eval(a)
if err != nil {
return null(), err
}
args = append(args, arg)
}
// Then switch on the function for the ordinary functions
switch op {
case F_LENGTH:
var s string
if len(args) > 0 {
s = p.toString(args[0])
} else {
s = p.line
}
var n int
if p.bytes {
n = len(s)
} else {
n = utf8.RuneCountInString(s)
}
return num(float64(n)), nil
case F_MATCH:
re, err := p.compileRegex(p.toString(args[1]))
if err != nil {
return null(), err
}
s := p.toString(args[0])
loc := re.FindStringIndex(s)
if loc == nil {
p.matchStart = 0
p.matchLength = -1
return num(0), nil
}
if p.bytes {
p.matchStart = loc[0] + 1
p.matchLength = loc[1] - loc[0]
} else {
p.matchStart = utf8.RuneCountInString(s[:loc[0]]) + 1
p.matchLength = utf8.RuneCountInString(s[loc[0]:loc[1]])
}
return num(float64(p.matchStart)), nil
case F_SUBSTR:
s := p.toString(args[0])
pos := int(args[1].num())
if p.bytes {
if pos > len(s) {
pos = len(s) + 1
}
if pos < 1 {
pos = 1
}
maxLength := len(s) - pos + 1
length := maxLength
if len(args) == 3 {
length = int(args[2].num())
if length < 0 {
length = 0
}
if length > maxLength {
length = maxLength
}
}
return str(s[pos-1 : pos-1+length]), nil
} else {
// Count characters till we get to pos.
chars := 1
start := 0
for start = range s {
chars++
if chars > pos {
break
}
}
if pos >= chars {
start = len(s)
}
// Count characters from start till we reach length.
var end int
if len(args) == 3 {
length := int(args[2].num())
chars = 0
for end = range s[start:] {
chars++
if chars > length {
break
}
}
if length >= chars {
end = len(s)
} else {
end += start
}
} else {
end = len(s)
}
return str(s[start:end]), nil
}
case F_SPRINTF:
s, err := p.sprintf(p.toString(args[0]), args[1:])
if err != nil {
return null(), err
}
return str(s), nil
case F_INDEX:
s := p.toString(args[0])
substr := p.toString(args[1])
index := strings.Index(s, substr)
if p.bytes {
return num(float64(index + 1)), nil
} else {
if index < 0 {
return num(float64(0)), nil
}
index = utf8.RuneCountInString(s[:index])
return num(float64(index + 1)), nil
}
case F_TOLOWER:
return str(strings.ToLower(p.toString(args[0]))), nil
case F_TOUPPER:
return str(strings.ToUpper(p.toString(args[0]))), nil
case F_ATAN2:
return num(math.Atan2(args[0].num(), args[1].num())), nil
case F_COS:
return num(math.Cos(args[0].num())), nil
case F_EXP:
return num(math.Exp(args[0].num())), nil
case F_INT:
return num(float64(int(args[0].num()))), nil
case F_LOG:
return num(math.Log(args[0].num())), nil
case F_SQRT:
return num(math.Sqrt(args[0].num())), nil
case F_RAND:
return num(p.random.Float64()), nil
case F_SIN:
return num(math.Sin(args[0].num())), nil
case F_SRAND:
prevSeed := p.randSeed
switch len(args) {
case 0:
p.random.Seed(time.Now().UnixNano())
case 1:
p.randSeed = args[0].num()
p.random.Seed(int64(math.Float64bits(p.randSeed)))
}
return num(prevSeed), nil
case F_SYSTEM:
if p.noExec {
return null(), newError("can't call system() due to NoExec")
}
cmdline := p.toString(args[0])
cmd := p.execShell(cmdline)
cmd.Stdout = p.output
cmd.Stderr = p.errorOutput
_ = p.flushAll() // ensure synchronization
err := cmd.Start()
if err != nil {
p.printErrorf("%s\n", err)
return num(-1), nil
}
err = cmd.Wait()
if err != nil {
if exitErr, ok := err.(*exec.ExitError); ok {
code := exitErr.ProcessState.ExitCode()
return num(float64(code)), nil
} else {
p.printErrorf("unexpected error running command %q: %v\n", cmdline, err)
return num(-1), nil
}
}
return num(0), nil
case F_CLOSE:
name := p.toString(args[0])
var c io.Closer = p.inputStreams[name]
if c != nil {
// Close input stream
delete(p.inputStreams, name)
err := c.Close()
if err != nil {
return num(-1), nil
}
return num(0), nil
}
c = p.outputStreams[name]
if c != nil {
// Close output stream
delete(p.outputStreams, name)
err := c.Close()
if err != nil {
return num(-1), nil
}
return num(0), nil
}
// Nothing to close
return num(-1), nil
case F_FFLUSH:
var name string
if len(args) > 0 {
name = p.toString(args[0])
}
var ok bool
if name != "" {
// Flush a single, named output stream
ok = p.flushStream(name)
} else {
// fflush() or fflush("") flushes all output streams
ok = p.flushAll()
}
if !ok {
return num(-1), nil
}
return num(0), nil
default:
// Shouldn't happen
panic(fmt.Sprintf("unexpected function: %s", op))
}
}
// Executes code using configured system shell
func (p *interp) execShell(code string) *exec.Cmd {
executable := p.shellCommand[0]
args := p.shellCommand[1:]
args = append(args, code)
cmd := exec.Command(executable, args...)
return cmd
}
// Call user-defined function with given index and arguments, return
// its return value (or null value if it doesn't return anything)
func (p *interp) callUser(index int, args []Expr) (value, error) {
f := p.program.Functions[index]
if p.callDepth >= maxCallDepth {
return null(), newError("calling %q exceeded maximum call depth of %d", f.Name, maxCallDepth)
}
// Evaluate the arguments and push them onto the locals stack
oldFrame := p.frame
newFrameStart := len(p.stack)
var arrays []int
for i, arg := range args {
if f.Arrays[i] {
a := arg.(*VarExpr)
arrays = append(arrays, p.getArrayIndex(a.Scope, a.Index))
} else {
argValue, err := p.eval(arg)
if err != nil {
return null(), err
}
p.stack = append(p.stack, argValue)
}
}
// Push zero value for any additional parameters (it's valid to
// call a function with fewer arguments than it has parameters)
oldArraysLen := len(p.arrays)
for i := len(args); i < len(f.Params); i++ {
if f.Arrays[i] {
arrays = append(arrays, len(p.arrays))
p.arrays = append(p.arrays, make(map[string]value))
} else {
p.stack = append(p.stack, null())
}
}
p.frame = p.stack[newFrameStart:]
p.localArrays = append(p.localArrays, arrays)
// Execute the function!
p.callDepth++
err := p.executes(f.Body)
p.callDepth--
// Pop the locals off the stack
p.stack = p.stack[:newFrameStart]
p.frame = oldFrame
p.localArrays = p.localArrays[:len(p.localArrays)-1]
p.arrays = p.arrays[:oldArraysLen]
if r, ok := err.(returnValue); ok {
return r.Value, nil
}
if err != nil {
return null(), err
}
return null(), nil
}
// Call native-defined function with given name and arguments, return
// its return value (or null value if it doesn't return anything).
func (p *interp) callNative(index int, args []Expr) (value, error) {
func (p *interp) callNative(index int, args []value) (value, error) {
f := p.nativeFuncs[index]
minIn := len(f.in) // Minimum number of args we should pass
var variadicType reflect.Type
@ -401,11 +29,7 @@ func (p *interp) callNative(index int, args []Expr) (value, error) {
// Build list of args to pass to function
values := make([]reflect.Value, 0, 7) // up to 7 args won't require heap allocation
for i, arg := range args {
a, err := p.eval(arg)
if err != nil {
return null(), err
}
for i, a := range args {
var argType reflect.Type
if !f.isVariadic || i < len(f.in)-1 {
argType = f.in[i]
@ -618,12 +242,12 @@ func validNativeType(typ reflect.Type) bool {
}
// Guts of the split() function
func (p *interp) split(s string, scope VarScope, index int, fs string) (int, error) {
func (p *interp) split(s string, scope ast.VarScope, index int, fs string) (int, error) {
var parts []string
if fs == " " {
parts = strings.Fields(s)
} else if s == "" {
// NF should be 0 on empty line
// Leave parts 0 length on empty string
} else if utf8.RuneCountInString(fs) <= 1 {
parts = strings.Split(s, fs)
} else {
@ -637,7 +261,7 @@ func (p *interp) split(s string, scope VarScope, index int, fs string) (int, err
for i, part := range parts {
array[strconv.Itoa(i+1)] = numStr(part)
}
p.arrays[p.getArrayIndex(scope, index)] = array
p.arrays[p.arrayIndex(scope, index)] = array
return len(array), nil
}
@ -753,7 +377,7 @@ func (p *interp) sprintf(format string, args []value) (string, error) {
if len(types) > len(args) {
return "", newError("format error: got %d args, expected %d", len(args), len(types))
}
converted := make([]interface{}, len(types))
converted := make([]interface{}, 0, 7) // up to 7 args won't require heap allocation
for i, t := range types {
a := args[i]
var v interface{}
@ -765,7 +389,7 @@ func (p *interp) sprintf(format string, args []value) (string, error) {
case 'f':
v = a.num()
case 'u':
v = uint32(a.num())
v = uint(a.num())
case 'c':
var c []byte
n, isStr := a.isTrueStr()
@ -783,7 +407,7 @@ func (p *interp) sprintf(format string, args []value) (string, error) {
}
v = c
}
converted[i] = v
converted = append(converted, v)
}
return fmt.Sprintf(format, converted...), nil
}

File diff suppressed because it is too large Load Diff

View File

@ -5,16 +5,19 @@ package interp
import (
"bufio"
"bytes"
"encoding/csv"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"regexp"
"runtime"
"strconv"
"strings"
"unicode/utf8"
. "github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
)
@ -27,6 +30,73 @@ func (p *interp) printLine(writer io.Writer, line string) error {
return writeOutput(writer, p.outputRecordSep)
}
// Print given arguments followed by a newline (for "print" statement).
func (p *interp) printArgs(writer io.Writer, args []value) error {
switch p.outputMode {
case CSVMode, TSVMode:
fields := make([]string, 0, 7) // up to 7 args won't require a heap allocation
for _, arg := range args {
fields = append(fields, arg.str(p.outputFormat))
}
err := p.writeCSV(writer, fields)
if err != nil {
return err
}
default:
// Print OFS-separated args followed by ORS (usually newline).
for i, arg := range args {
if i > 0 {
err := writeOutput(writer, p.outputFieldSep)
if err != nil {
return err
}
}
err := writeOutput(writer, arg.str(p.outputFormat))
if err != nil {
return err
}
}
err := writeOutput(writer, p.outputRecordSep)
if err != nil {
return err
}
}
return nil
}
func (p *interp) writeCSV(output io.Writer, fields []string) error {
// If output is already a *bufio.Writer (the common case), csv.NewWriter
// will use it directly. This is not explicitly documented, but
// csv.NewWriter calls bufio.NewWriter which calls bufio.NewWriterSize
// with a 4KB buffer, and bufio.NewWriterSize is documented as returning
// the underlying bufio.Writer if it's passed a large enough one.
var flush func() error
_, isBuffered := output.(*bufio.Writer)
if !isBuffered {
// Otherwise create a new buffered writer and flush after writing.
if p.csvOutput == nil {
p.csvOutput = bufio.NewWriterSize(output, 4096)
} else {
p.csvOutput.Reset(output)
}
output = p.csvOutput
flush = p.csvOutput.Flush
}
// Given the above, creating a new one of these is cheap.
writer := csv.NewWriter(output)
writer.Comma = p.csvOutputConfig.Separator
writer.UseCRLF = runtime.GOOS == "windows"
err := writer.Write(fields)
if err != nil {
return err
}
if flush != nil {
return flush()
}
return nil
}
// Implement a buffered version of WriteCloser so output is buffered
// when redirecting to a file (eg: print >"out")
type bufferedWriteCloser struct {
@ -49,16 +119,7 @@ func (wc *bufferedWriteCloser) Close() error {
// Determine the output stream for given redirect token and
// destination (file or pipe name)
func (p *interp) getOutputStream(redirect Token, dest Expr) (io.Writer, error) {
if redirect == ILLEGAL {
// Token "ILLEGAL" means send to standard output
return p.output, nil
}
destValue, err := p.eval(dest)
if err != nil {
return nil, err
}
func (p *interp) getOutputStream(redirect Token, destValue value) (io.Writer, error) {
name := p.toString(destValue)
if _, ok := p.inputStreams[name]; ok {
return nil, newError("can't write to reader stream")
@ -121,6 +182,18 @@ func (p *interp) getOutputStream(redirect Token, dest Expr) (io.Writer, error) {
}
}
// Executes code using configured system shell
func (p *interp) execShell(code string) *exec.Cmd {
executable := p.shellCommand[0]
args := p.shellCommand[1:]
args = append(args, code)
if p.checkCtx {
return exec.CommandContext(p.ctx, executable, args...)
} else {
return exec.Command(executable, args...)
}
}
// Get input Scanner to use for "getline" based on file name
func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
if _, ok := p.outputStreams[name]; ok {
@ -134,7 +207,7 @@ func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
if scanner, ok := p.scanners["-"]; ok {
return scanner, nil
}
scanner := p.newScanner(p.stdin)
scanner := p.newScanner(p.stdin, make([]byte, inputBufSize))
p.scanners[name] = scanner
return scanner, nil
}
@ -145,7 +218,7 @@ func (p *interp) getInputScannerFile(name string) (*bufio.Scanner, error) {
if err != nil {
return nil, err // *os.PathError is handled by caller (getline returns -1)
}
scanner := p.newScanner(r)
scanner := p.newScanner(r, make([]byte, inputBufSize))
p.scanners[name] = scanner
p.inputStreams[name] = r
return scanner, nil
@ -175,7 +248,7 @@ func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) {
p.printErrorf("%s\n", err)
return bufio.NewScanner(strings.NewReader("")), nil
}
scanner := p.newScanner(r)
scanner := p.newScanner(r, make([]byte, inputBufSize))
p.commands[name] = cmd
p.inputStreams[name] = r
p.scanners[name] = scanner
@ -183,28 +256,53 @@ func (p *interp) getInputScannerPipe(name string) (*bufio.Scanner, error) {
}
// Create a new buffered Scanner for reading input records
func (p *interp) newScanner(input io.Reader) *bufio.Scanner {
func (p *interp) newScanner(input io.Reader, buffer []byte) *bufio.Scanner {
scanner := bufio.NewScanner(input)
switch {
case p.inputMode == CSVMode || p.inputMode == TSVMode:
splitter := csvSplitter{
separator: p.csvInputConfig.Separator,
sepLen: utf8.RuneLen(p.csvInputConfig.Separator),
comment: p.csvInputConfig.Comment,
header: p.csvInputConfig.Header,
fields: &p.fields,
setFieldNames: p.setFieldNames,
}
scanner.Split(splitter.scan)
case p.recordSep == "\n":
// Scanner default is to split on newlines
case p.recordSep == "":
// Empty string for RS means split on \n\n (blank lines)
splitter := blankLineSplitter{&p.recordTerminator}
splitter := blankLineSplitter{terminator: &p.recordTerminator}
scanner.Split(splitter.scan)
case len(p.recordSep) == 1:
splitter := byteSplitter{p.recordSep[0]}
splitter := byteSplitter{sep: p.recordSep[0]}
scanner.Split(splitter.scan)
case utf8.RuneCountInString(p.recordSep) >= 1:
// Multi-byte and single char but multi-byte RS use regex
splitter := regexSplitter{p.recordSepRegex, &p.recordTerminator}
splitter := regexSplitter{re: p.recordSepRegex, terminator: &p.recordTerminator}
scanner.Split(splitter.scan)
}
buffer := make([]byte, inputBufSize)
scanner.Buffer(buffer, maxRecordLength)
return scanner
}
// setFieldNames is called by csvSplitter.scan on the first row (if the
// "header" option is specified).
func (p *interp) setFieldNames(names []string) {
p.fieldNames = names
p.fieldIndexes = nil // clear name-to-index cache
// Populate FIELDS array (mapping of field indexes to field names).
fieldsArray := p.array(ast.ScopeGlobal, p.program.Arrays["FIELDS"])
for k := range fieldsArray {
delete(fieldsArray, k)
}
for i, name := range names {
fieldsArray[strconv.Itoa(i+1)] = str(name)
}
}
// Copied from bufio/scan.go in the stdlib: I guess it's a bit more
// efficient than bytes.TrimSuffix(data, []byte("\r"))
func dropCR(data []byte) []byte {
@ -323,10 +421,222 @@ func (s regexSplitter) scan(data []byte, atEOF bool) (advance int, token []byte,
return 0, nil, nil
}
// Splitter that splits records in CSV or TSV format.
type csvSplitter struct {
separator rune
sepLen int
comment rune
header bool
recordBuffer []byte
fieldIndexes []int
noBOMCheck bool
fields *[]string
setFieldNames func(names []string)
rowNum int
}
// The structure of this code is taken from the stdlib encoding/csv Reader
// code, which is licensed under a compatible BSD-style license.
//
// We don't support all encoding/csv features: FieldsPerRecord is not
// supported, LazyQuotes is always on, and TrimLeadingSpace is always off.
func (s *csvSplitter) scan(data []byte, atEOF bool) (advance int, token []byte, err error) {
// Some CSV files are saved with a UTF-8 BOM at the start; skip it.
if !s.noBOMCheck && len(data) >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF {
data = data[3:]
advance = 3
s.noBOMCheck = true
}
origData := data
if atEOF && len(data) == 0 {
// No more data, tell Scanner to stop.
return 0, nil, nil
}
readLine := func() []byte {
newline := bytes.IndexByte(data, '\n')
var line []byte
switch {
case newline >= 0:
// Process a single line (including newline).
line = data[:newline+1]
data = data[newline+1:]
case atEOF:
// If at EOF, we have a final record without a newline.
line = data
data = data[len(data):]
default:
// Need more data
return nil
}
// For backwards compatibility, drop trailing \r before EOF.
if len(line) > 0 && atEOF && line[len(line)-1] == '\r' {
line = line[:len(line)-1]
advance++
}
return line
}
// Read line (automatically skipping past empty lines and any comments).
skip := 0
var line []byte
for {
line = readLine()
if len(line) == 0 {
return 0, nil, nil // Request more data
}
if s.comment != 0 && nextRune(line) == s.comment {
advance += len(line)
skip += len(line)
continue // Skip comment lines
}
if len(line) == lenNewline(line) {
advance += len(line)
skip += len(line)
continue // Skip empty lines
}
break
}
// Parse each field in the record.
const quoteLen = len(`"`)
tokenHasCR := false
s.recordBuffer = s.recordBuffer[:0]
s.fieldIndexes = s.fieldIndexes[:0]
parseField:
for {
if len(line) == 0 || line[0] != '"' {
// Non-quoted string field
i := bytes.IndexRune(line, s.separator)
field := line
if i >= 0 {
advance += i + s.sepLen
field = field[:i]
} else {
advance += len(field)
field = field[:len(field)-lenNewline(field)]
}
s.recordBuffer = append(s.recordBuffer, field...)
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
if i >= 0 {
line = line[i+s.sepLen:]
continue parseField
}
break parseField
} else {
// Quoted string field
line = line[quoteLen:]
advance += quoteLen
for {
i := bytes.IndexByte(line, '"')
if i >= 0 {
// Hit next quote.
s.recordBuffer = append(s.recordBuffer, line[:i]...)
line = line[i+quoteLen:]
advance += i + quoteLen
switch rn := nextRune(line); {
case rn == '"':
// `""` sequence (append quote).
s.recordBuffer = append(s.recordBuffer, '"')
line = line[quoteLen:]
advance += quoteLen
case rn == s.separator:
// `",` sequence (end of field).
line = line[s.sepLen:]
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
advance += s.sepLen
continue parseField
case lenNewline(line) == len(line):
// `"\n` sequence (end of line).
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
advance += len(line)
break parseField
default:
// `"` sequence (bare quote).
s.recordBuffer = append(s.recordBuffer, '"')
}
} else if len(line) > 0 {
// Hit end of line (copy all data so far).
advance += len(line)
newlineLen := lenNewline(line)
if newlineLen == 2 {
tokenHasCR = true
s.recordBuffer = append(s.recordBuffer, line[:len(line)-2]...)
s.recordBuffer = append(s.recordBuffer, '\n')
} else {
s.recordBuffer = append(s.recordBuffer, line...)
}
line = readLine()
if line == nil {
return 0, nil, nil // Request more data
}
} else {
// Abrupt end of file.
s.fieldIndexes = append(s.fieldIndexes, len(s.recordBuffer))
advance += len(line)
break parseField
}
}
}
}
// Create a single string and create slices out of it.
// This pins the memory of the fields together, but allocates once.
strBuf := string(s.recordBuffer) // Convert to string once to batch allocations
fields := make([]string, len(s.fieldIndexes))
preIdx := 0
for i, idx := range s.fieldIndexes {
fields[i] = strBuf[preIdx:idx]
preIdx = idx
}
s.noBOMCheck = true
if s.rowNum == 0 && s.header {
// Set header field names and advance, but don't return a line (token).
s.rowNum++
s.setFieldNames(fields)
return advance, nil, nil
}
// Normal row, set fields and return a line (token).
s.rowNum++
*s.fields = fields
token = origData[skip:advance]
token = token[:len(token)-lenNewline(token)]
if tokenHasCR {
token = bytes.ReplaceAll(token, []byte{'\r'}, nil)
}
return advance, token, nil
}
// lenNewline reports the number of bytes for the trailing \n.
func lenNewline(b []byte) int {
if len(b) > 0 && b[len(b)-1] == '\n' {
if len(b) > 1 && b[len(b)-2] == '\r' {
return 2
}
return 1
}
return 0
}
// nextRune returns the next rune in b or utf8.RuneError.
func nextRune(b []byte) rune {
r, _ := utf8.DecodeRune(b)
return r
}
// Setup for a new input file with given name (empty string if stdin)
func (p *interp) setFile(filename string) {
p.filename = numStr(filename)
p.fileLineNum = 0
p.hadFiles = true
}
// Setup for a new input line (but don't parse it into fields till we
@ -335,6 +645,7 @@ func (p *interp) setLine(line string, isTrueStr bool) {
p.line = line
p.lineIsTrueStr = isTrueStr
p.haveFields = false
p.reparseCSV = true
}
// Ensure that the current line is parsed into fields, splitting it
@ -346,6 +657,23 @@ func (p *interp) ensureFields() {
p.haveFields = true
switch {
case p.inputMode == CSVMode || p.inputMode == TSVMode:
if p.reparseCSV {
scanner := bufio.NewScanner(strings.NewReader(p.line))
scanner.Buffer(nil, maxRecordLength)
splitter := csvSplitter{
separator: p.csvInputConfig.Separator,
sepLen: utf8.RuneLen(p.csvInputConfig.Separator),
comment: p.csvInputConfig.Comment,
fields: &p.fields,
}
scanner.Split(splitter.scan)
if !scanner.Scan() {
p.fields = nil
}
} else {
// Normally fields have already been parsed by csvSplitter
}
case p.fieldSep == " ":
// FS space (default) means split fields on any whitespace
p.fields = strings.Fields(p.line)
@ -362,7 +690,7 @@ func (p *interp) ensureFields() {
// Special case for when RS=="" and FS is single character,
// split on newline in addition to FS. See more here:
// https://www.gnu.org/software/gawk/manual/html_node/Multiple-Line.html
if p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 {
if p.inputMode == DefaultMode && p.recordSep == "" && utf8.RuneCountInString(p.fieldSep) == 1 {
fields := make([]string, 0, len(p.fields))
for _, field := range p.fields {
lines := strings.Split(field, "\n")
@ -374,7 +702,10 @@ func (p *interp) ensureFields() {
p.fields = fields
}
p.fieldsIsTrueStr = make([]bool, len(p.fields))
p.fieldsIsTrueStr = p.fieldsIsTrueStr[:0] // avoid allocation most of the time
for range p.fields {
p.fieldsIsTrueStr = append(p.fieldsIsTrueStr, false)
}
p.numFields = len(p.fields)
}
@ -391,8 +722,7 @@ func (p *interp) nextLine() (string, error) {
// Moved past number of ARGV args and haven't seen
// any files yet, use stdin
p.input = p.stdin
p.setFile("")
p.hadFiles = true
p.setFile("-")
} else {
if p.filenameIndex >= p.argc {
// Done with ARGV args, all done with input
@ -403,15 +733,24 @@ func (p *interp) nextLine() (string, error) {
// not present
index := strconv.Itoa(p.filenameIndex)
argvIndex := p.program.Arrays["ARGV"]
argvArray := p.arrays[p.getArrayIndex(ScopeGlobal, argvIndex)]
argvArray := p.array(ast.ScopeGlobal, argvIndex)
filename := p.toString(argvArray[index])
p.filenameIndex++
// Is it actually a var=value assignment?
matches := varRegex.FindStringSubmatch(filename)
var matches []string
if !p.noArgVars {
matches = varRegex.FindStringSubmatch(filename)
}
if len(matches) >= 3 {
// Yep, set variable to value and keep going
err := p.setVarByName(matches[1], matches[2])
name, val := matches[1], matches[2]
// Oddly, var=value args must interpret escapes (issue #129)
unescaped, err := Unescape(val)
if err == nil {
val = unescaped
}
err = p.setVarByName(name, val)
if err != nil {
return "", err
}
@ -423,7 +762,7 @@ func (p *interp) nextLine() (string, error) {
} else if filename == "-" {
// ARGV arg is "-" meaning stdin
p.input = p.stdin
p.setFile("")
p.setFile("-")
} else {
// A regular file name, open it
if p.noFileReads {
@ -435,10 +774,12 @@ func (p *interp) nextLine() (string, error) {
}
p.input = input
p.setFile(filename)
p.hadFiles = true
}
}
p.scanner = p.newScanner(p.input)
if p.inputBuffer == nil { // reuse buffer from last input file
p.inputBuffer = make([]byte, inputBufSize)
}
p.scanner = p.newScanner(p.input, p.inputBuffer)
}
p.recordTerminator = p.recordSep // will be overridden if RS is "" or multiple chars
if p.scanner.Scan() {

View File

@ -0,0 +1,176 @@
// The New...Execute API (allows you to efficiently execute the same program repeatedly).
package interp
import (
"context"
"math"
"github.com/benhoyt/goawk/parser"
)
const checkContextOps = 1000 // for efficiency, only check context every N instructions
// Interpreter is an interpreter for a specific program, allowing you to
// efficiently execute the same program over and over with different inputs.
// Use New to create an Interpreter.
//
// Most programs won't need reusable execution, and should use the simpler
// Exec or ExecProgram functions instead.
type Interpreter struct {
interp *interp
}
// New creates a reusable interpreter for the given program.
//
// Most programs won't need reusable execution, and should use the simpler
// Exec or ExecProgram functions instead.
func New(program *parser.Program) (*Interpreter, error) {
p := newInterp(program)
return &Interpreter{interp: p}, nil
}
// Execute runs this program with the given execution configuration (input,
// output, and variables) and returns the exit status code of the program. A
// nil config is valid and will use the defaults (zero values).
//
// Internal memory allocations are reused, so calling Execute on the same
// Interpreter instance is significantly more efficient than calling
// ExecProgram multiple times.
//
// I/O state is reset between each run, but variables and the random number
// generator seed are not; use ResetVars and ResetRand to reset those.
//
// It's best to set config.Environ to a non-nil slice, otherwise Execute will
// call the relatively inefficient os.Environ each time. Set config.Environ to
// []string{} if the script doesn't need environment variables, or call
// os.Environ once and set config.Environ to that value each execution.
//
// Note that config.Funcs must be the same value provided to
// parser.ParseProgram, and must not change between calls to Execute.
func (p *Interpreter) Execute(config *Config) (int, error) {
p.interp.resetCore()
p.interp.checkCtx = false
err := p.interp.setExecuteConfig(config)
if err != nil {
return 0, err
}
return p.interp.executeAll()
}
func (p *interp) resetCore() {
p.scanner = nil
for k := range p.scanners {
delete(p.scanners, k)
}
p.input = nil
for k := range p.inputStreams {
delete(p.inputStreams, k)
}
for k := range p.outputStreams {
delete(p.outputStreams, k)
}
for k := range p.commands {
delete(p.commands, k)
}
p.sp = 0
p.localArrays = p.localArrays[:0]
p.callDepth = 0
p.filename = null()
p.line = ""
p.lineIsTrueStr = false
p.lineNum = 0
p.fileLineNum = 0
p.fields = nil
p.fieldsIsTrueStr = nil
p.numFields = 0
p.haveFields = false
p.exitStatus = 0
}
func (p *interp) resetVars() {
// Reset global scalars
for i := range p.globals {
p.globals[i] = null()
}
// Reset global arrays
for _, array := range p.arrays {
for k := range array {
delete(array, k)
}
}
// Reset special variables
p.convertFormat = "%.6g"
p.outputFormat = "%.6g"
p.fieldSep = " "
p.fieldSepRegex = nil
p.recordSep = "\n"
p.recordSepRegex = nil
p.recordTerminator = ""
p.outputFieldSep = " "
p.outputRecordSep = "\n"
p.subscriptSep = "\x1c"
p.matchLength = 0
p.matchStart = 0
}
// ResetVars resets this interpreter's variables, setting scalar variables to
// null, clearing arrays, and resetting special variables such as FS and RS to
// their defaults.
func (p *Interpreter) ResetVars() {
p.interp.resetVars()
}
// ResetRand resets this interpreter's random number generator seed, so that
// rand() produces the same sequence it would have after calling New. This is
// a relatively CPU-intensive operation.
func (p *Interpreter) ResetRand() {
p.interp.randSeed = 1.0
p.interp.random.Seed(int64(math.Float64bits(p.interp.randSeed)))
}
// ExecuteContext is like Execute, but takes a context to allow the caller to
// set an execution timeout or cancel the execution. For efficiency, the
// context is only tested every 1000 virtual machine instructions.
//
// Context handling is not preemptive: currently long-running operations like
// system() won't be interrupted.
func (p *Interpreter) ExecuteContext(ctx context.Context, config *Config) (int, error) {
p.interp.resetCore()
p.interp.checkCtx = ctx != context.Background() && ctx != context.TODO()
p.interp.ctx = ctx
p.interp.ctxDone = ctx.Done()
p.interp.ctxOps = 0
err := p.interp.setExecuteConfig(config)
if err != nil {
return 0, err
}
return p.interp.executeAll()
}
func (p *interp) checkContext() error {
p.ctxOps++
if p.ctxOps < checkContextOps {
return nil
}
p.ctxOps = 0
return p.checkContextNow()
}
func (p *interp) checkContextNow() error {
select {
case <-p.ctxDone:
return p.ctx.Err()
default:
return nil
}
}

View File

@ -53,6 +53,20 @@ func boolean(b bool) value {
return num(0)
}
// String returns a string representation of v for debugging.
func (v value) String() string {
switch v.typ {
case typeStr:
return fmt.Sprintf("str(%q)", v.s)
case typeNum:
return fmt.Sprintf("num(%s)", v.str("%.6g"))
case typeNumStr:
return fmt.Sprintf("numStr(%q)", v.s)
default:
return "null()"
}
}
// Return true if value is a "true string" (a string or a "numeric string"
// from an input field that can't be converted to a number). If false,
// also return the (possibly converted) number.
@ -61,7 +75,7 @@ func (v value) isTrueStr() (float64, bool) {
case typeStr:
return 0, true
case typeNumStr:
f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64)
f, err := parseFloat(v.s)
if err != nil {
return 0, true
}
@ -79,7 +93,7 @@ func (v value) boolean() bool {
case typeStr:
return v.s != ""
case typeNumStr:
f, err := strconv.ParseFloat(strings.TrimSpace(v.s), 64)
f, err := parseFloat(v.s)
if err != nil {
return v.s != ""
}
@ -89,6 +103,30 @@ func (v value) boolean() bool {
}
}
// Like strconv.ParseFloat, but allow hex floating point without exponent, and
// allow "+nan" and "-nan" (though they both return math.NaN()). Also disallow
// underscore digit separators.
func parseFloat(s string) (float64, error) {
s = strings.TrimSpace(s)
if len(s) > 1 && (s[0] == '+' || s[0] == '-') {
if len(s) == 4 && hasNaNPrefix(s[1:]) {
// ParseFloat doesn't handle "nan" with sign prefix, so handle it here.
return math.NaN(), nil
}
if len(s) > 3 && hasHexPrefix(s[1:]) && strings.IndexByte(s, 'p') < 0 {
s += "p0"
}
} else if len(s) > 2 && hasHexPrefix(s) && strings.IndexByte(s, 'p') < 0 {
s += "p0"
}
n, err := strconv.ParseFloat(s, 64)
if err == nil && strings.IndexByte(s, '_') >= 0 {
// Underscore separators aren't supported by AWK.
return 0, strconv.ErrSyntax
}
return n, err
}
// Return value's string value, or convert to a string using given
// format if a number value. Integers are a special case and don't
// use floatFormat.
@ -106,6 +144,9 @@ func (v value) str(floatFormat string) string {
case v.n == float64(int(v.n)):
return strconv.Itoa(int(v.n))
default:
if floatFormat == "%.6g" {
return strconv.FormatFloat(v.n, 'g', 6, 64)
}
return fmt.Sprintf(floatFormat, v.n)
}
}
@ -137,20 +178,35 @@ func parseFloatPrefix(s string) float64 {
}
start := i
// Parse mantissa: optional sign, initial digit(s), optional '.',
// then more digits
gotDigit := false
// Parse optional sign and check for NaN and Inf.
if i < len(s) && (s[i] == '+' || s[i] == '-') {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
if i+3 <= len(s) {
if hasNaNPrefix(s[i:]) {
return math.NaN()
}
if hasInfPrefix(s[i:]) {
if s[start] == '-' {
return math.Inf(-1)
}
return math.Inf(1)
}
}
// Parse mantissa: initial digit(s), optional '.', then more digits
if i+2 < len(s) && hasHexPrefix(s[i:]) {
return parseHexFloatPrefix(s, start, i+2)
}
gotDigit := false
for i < len(s) && isDigit(s[i]) {
gotDigit = true
i++
}
if i < len(s) && s[i] == '.' {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
for i < len(s) && isDigit(s[i]) {
gotDigit = true
i++
}
@ -166,7 +222,7 @@ func parseFloatPrefix(s string) float64 {
if i < len(s) && (s[i] == '+' || s[i] == '-') {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
for i < len(s) && isDigit(s[i]) {
i++
end = i
}
@ -176,3 +232,63 @@ func parseFloatPrefix(s string) float64 {
f, _ := strconv.ParseFloat(floatStr, 64)
return f // Returns infinity in case of "value out of range" error
}
func hasHexPrefix(s string) bool {
return s[0] == '0' && (s[1] == 'x' || s[1] == 'X')
}
func hasNaNPrefix(s string) bool {
return (s[0] == 'n' || s[0] == 'N') && (s[1] == 'a' || s[1] == 'A') && (s[2] == 'n' || s[2] == 'N')
}
func hasInfPrefix(s string) bool {
return (s[0] == 'i' || s[0] == 'I') && (s[1] == 'n' || s[1] == 'N') && (s[2] == 'f' || s[2] == 'F')
}
// Helper used by parseFloatPrefix to handle hexadecimal floating point.
func parseHexFloatPrefix(s string, start, i int) float64 {
gotDigit := false
for i < len(s) && isHexDigit(s[i]) {
gotDigit = true
i++
}
if i < len(s) && s[i] == '.' {
i++
}
for i < len(s) && isHexDigit(s[i]) {
gotDigit = true
i++
}
if !gotDigit {
return 0
}
gotExponent := false
end := i
if i < len(s) && (s[i] == 'p' || s[i] == 'P') {
i++
if i < len(s) && (s[i] == '+' || s[i] == '-') {
i++
}
for i < len(s) && isDigit(s[i]) {
gotExponent = true
i++
end = i
}
}
floatStr := s[start:end]
if !gotExponent {
floatStr += "p0" // AWK allows "0x12", ParseFloat requires "0x12p0"
}
f, _ := strconv.ParseFloat(floatStr, 64)
return f // Returns infinity in case of "value out of range" error
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
func isHexDigit(c byte) bool {
return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
}

1259
play/vendor/github.com/benhoyt/goawk/interp/vm.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -5,11 +5,10 @@
//
// To tokenize some source, create a new lexer with NewLexer(src) and
// then call Scan() until the token type is EOF or ILLEGAL.
//
package lexer
import (
"fmt"
"errors"
)
// Lexer tokenizes a byte string of AWK source code. Use NewLexer to
@ -120,6 +119,8 @@ func (l *Lexer) scan() (Position, Token, string) {
switch ch {
case '$':
tok = DOLLAR
case '@':
tok = AT
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
// Avoid make/append and use l.offset directly for performance
start := l.offset - 2
@ -184,80 +185,18 @@ func (l *Lexer) scan() (Position, Token, string) {
}
case '"', '\'':
// Note: POSIX awk spec doesn't allow single-quoted strings,
// but this helps without quoting, especially on Windows
// but this helps with quoting, especially on Windows
// where the shell quote character is " (double quote).
chars := make([]byte, 0, 32) // most won't require heap allocation
for l.ch != ch {
c := l.ch
if c == 0 {
return l.pos, ILLEGAL, "didn't find end quote in string"
}
if c == '\r' || c == '\n' {
return l.pos, ILLEGAL, "can't have newline in string"
}
if c != '\\' {
// Normal, non-escaped character
chars = append(chars, c)
l.next()
continue
}
// Escape sequence, skip over \ and process
l.next()
switch l.ch {
case 'n':
c = '\n'
l.next()
case 't':
c = '\t'
l.next()
case 'r':
c = '\r'
l.next()
case 'a':
c = '\a'
l.next()
case 'b':
c = '\b'
l.next()
case 'f':
c = '\f'
l.next()
case 'v':
c = '\v'
l.next()
case 'x':
// Hex byte of one of two hex digits
l.next()
digit := hexDigit(l.ch)
if digit < 0 {
return l.pos, ILLEGAL, "1 or 2 hex digits expected"
}
c = byte(digit)
l.next()
digit = hexDigit(l.ch)
if digit >= 0 {
c = c*16 + byte(digit)
l.next()
}
case '0', '1', '2', '3', '4', '5', '6', '7':
// Octal byte of 1-3 octal digits
c = l.ch - '0'
l.next()
for i := 0; i < 2 && l.ch >= '0' && l.ch <= '7'; i++ {
c = c*8 + l.ch - '0'
l.next()
}
default:
// Any other escape character is just the char
// itself, eg: "\z" is just "z"
c = l.ch
l.next()
}
chars = append(chars, c)
s, err := parseString(ch, func() byte { return l.ch }, l.next)
if err != nil {
return l.pos, ILLEGAL, err.Error()
}
if l.ch != ch {
return l.pos, ILLEGAL, "didn't find end quote in string"
}
l.next()
tok = STRING
val = string(chars)
val = s
case '(':
tok = LPAREN
case ')':
@ -366,7 +305,7 @@ func (l *Lexer) scanRegex() (Position, Token, string) {
pos.Column -= 2
chars = append(chars, '=')
default:
return l.pos, ILLEGAL, fmt.Sprintf("unexpected %s preceding regex", l.lastTok)
panic("ScanRegex should only be called after DIV or DIV_ASSIGN token")
}
for l.ch != '/' {
c := l.ch
@ -424,7 +363,7 @@ func (l *Lexer) unread() {
}
func isNameStart(ch byte) bool {
return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
return ch == '_' || ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z'
}
func isDigit(ch byte) bool {
@ -459,3 +398,102 @@ func (l *Lexer) choice(ch byte, one, two Token) Token {
func (l *Lexer) PeekByte() byte {
return l.ch
}
// Unescape unescapes the backslash escapes in s (which shouldn't include the
// surrounding quotes) and returns the unquoted string. It's intended for use
// when unescaping command line var=value assignments, as required by the
// POSIX AWK spec.
func Unescape(s string) (string, error) {
i := 0
ch := func() byte {
if i >= len(s) {
return 0
}
return s[i]
}
next := func() {
i++
}
return parseString(0, ch, next)
}
// Parses a string ending with given quote character (not parsed). The ch
// function returns the current character (or 0 at the end); the next function
// moves forward one character.
func parseString(quote byte, ch func() byte, next func()) (string, error) {
chars := make([]byte, 0, 32) // most strings won't require heap allocation
for {
c := ch()
if c == quote || c == 0 {
break
}
if c == '\r' || c == '\n' {
return "", errors.New("can't have newline in string")
}
if c != '\\' {
// Normal, non-escaped character
chars = append(chars, c)
next()
continue
}
// Escape sequence, skip over \ and process
next()
switch ch() {
case 'n':
c = '\n'
next()
case 't':
c = '\t'
next()
case 'r':
c = '\r'
next()
case 'a':
c = '\a'
next()
case 'b':
c = '\b'
next()
case 'f':
c = '\f'
next()
case 'v':
c = '\v'
next()
case 'x':
// Hex byte of one of two hex digits
next()
digit := hexDigit(ch())
if digit < 0 {
return "", errors.New("1 or 2 hex digits expected")
}
c = byte(digit)
next()
digit = hexDigit(ch())
if digit >= 0 {
c = c*16 + byte(digit)
next()
}
case '0', '1', '2', '3', '4', '5', '6', '7':
// Octal byte of 1-3 octal digits
c = ch() - '0'
next()
for i := 0; i < 2 && ch() >= '0' && ch() <= '7'; i++ {
c = c*8 + ch() - '0'
next()
}
default:
// Any other escape character is just the char
// itself, eg: "\z" is just "z".
c = ch()
if c == 0 {
// Expect backslash right at the end of the string, which is
// interpreted as a literal backslash (only for Unescape).
c = '\\'
}
next()
}
chars = append(chars, c)
}
return string(chars), nil
}

View File

@ -18,6 +18,7 @@ const (
AND
APPEND
ASSIGN
AT
COLON
COMMA
DECR
@ -172,6 +173,7 @@ var tokenNames = map[Token]string{
AND: "&&",
APPEND: ">>",
ASSIGN: "=",
AT: "@",
COLON: ":",
COMMA: ",",
DECR: "--",

View File

@ -1,8 +1,7 @@
// Package parser is an AWK parser and abstract syntax tree.
//
// Use the ParseProgram function to parse an AWK program, and then
// give the result to one of the interp.Exec* functions to execute it.
//
// Use the ParseProgram function to parse an AWK program, and then give the
// result to interp.Exec, interp.ExecProgram, or interp.New to execute it.
package parser
import (
@ -12,7 +11,8 @@ import (
"strconv"
"strings"
. "github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/compiler"
. "github.com/benhoyt/goawk/lexer"
)
@ -68,40 +68,52 @@ func ParseProgram(src []byte, config *ParserConfig) (prog *Program, err error) {
}
p.initResolve()
p.next() // initialize p.tok
return p.program(), nil
// Parse into abstract syntax tree
prog = p.program()
// Compile to virtual machine code
prog.Compiled, err = compiler.Compile(prog.toAST())
return prog, err
}
// Program is the abstract syntax tree for an entire AWK program.
// Program is the parsed and compiled representation of an entire AWK program.
type Program struct {
// These fields aren't intended to be used or modified directly,
// but are exported for the interpreter (Program itself needs to
// be exported in package "parser", otherwise these could live in
// "internal/ast".)
Begin []Stmts
Actions []Action
End []Stmts
Functions []Function
Begin []ast.Stmts
Actions []ast.Action
End []ast.Stmts
Functions []ast.Function
Scalars map[string]int
Arrays map[string]int
Compiled *compiler.Program
}
// String returns an indented, pretty-printed version of the parsed
// program.
func (p *Program) String() string {
parts := []string{}
for _, ss := range p.Begin {
parts = append(parts, "BEGIN {\n"+ss.String()+"}")
return p.toAST().String()
}
// Disassemble writes a human-readable form of the program's virtual machine
// instructions to writer.
func (p *Program) Disassemble(writer io.Writer) error {
return p.Compiled.Disassemble(writer)
}
// toAST converts the *Program to an *ast.Program.
func (p *Program) toAST() *ast.Program {
return &ast.Program{
Begin: p.Begin,
Actions: p.Actions,
End: p.End,
Functions: p.Functions,
Scalars: p.Scalars,
Arrays: p.Arrays,
}
for _, a := range p.Actions {
parts = append(parts, a.String())
}
for _, ss := range p.End {
parts = append(parts, "END {\n"+ss.String()+"}")
}
for _, function := range p.Functions {
parts = append(parts, function.String())
}
return strings.Join(parts, "\n\n")
}
// Parser state
@ -123,7 +135,7 @@ type parser struct {
varTypes map[string]map[string]typeInfo // map of func name to var name to type
varRefs []varRef // all variable references (usually scalars)
arrayRefs []arrayRef // all array references
multiExprs map[*MultiExpr]Position // tracks comma-separated expressions
multiExprs map[*ast.MultiExpr]Position // tracks comma-separated expressions
// Function tracking
functions map[string]int // map of function name to index
@ -154,7 +166,7 @@ func (p *parser) program() *Program {
default:
p.inAction = true
// Allow empty pattern, normal pattern, or range pattern
pattern := []Expr{}
pattern := []ast.Expr{}
if !p.matches(LBRACE, EOF) {
pattern = append(pattern, p.expr())
}
@ -163,7 +175,7 @@ func (p *parser) program() *Program {
pattern = append(pattern, p.expr())
}
// Or an empty action (equivalent to { print $0 })
action := Action{pattern, nil}
action := ast.Action{pattern, nil}
if p.tok == LBRACE {
action.Stmts = p.stmtsBrace()
}
@ -181,7 +193,7 @@ func (p *parser) program() *Program {
}
// Parse a list of statements.
func (p *parser) stmts() Stmts {
func (p *parser) stmts() ast.Stmts {
switch p.tok {
case SEMICOLON:
// This is so things like this parse correctly:
@ -191,15 +203,15 @@ func (p *parser) stmts() Stmts {
case LBRACE:
return p.stmtsBrace()
default:
return []Stmt{p.stmt()}
return []ast.Stmt{p.stmt()}
}
}
// Parse a list of statements surrounded in {...} braces.
func (p *parser) stmtsBrace() Stmts {
func (p *parser) stmtsBrace() ast.Stmts {
p.expect(LBRACE)
p.optionalNewlines()
ss := []Stmt{}
ss := []ast.Stmt{}
for p.tok != RBRACE && p.tok != EOF {
ss = append(ss, p.stmt())
}
@ -211,7 +223,7 @@ func (p *parser) stmtsBrace() Stmts {
}
// Parse a "simple" statement (eg: allowed in a for loop init clause).
func (p *parser) simpleStmt() Stmt {
func (p *parser) simpleStmt() ast.Stmt {
switch p.tok {
case PRINT, PRINTF:
op := p.tok
@ -219,31 +231,31 @@ func (p *parser) simpleStmt() Stmt {
args := p.exprList(p.printExpr)
if len(args) == 1 {
// This allows parens around all the print args
if m, ok := args[0].(*MultiExpr); ok {
if m, ok := args[0].(*ast.MultiExpr); ok {
args = m.Exprs
p.useMultiExpr(m)
}
}
redirect := ILLEGAL
var dest Expr
var dest ast.Expr
if p.matches(GREATER, APPEND, PIPE) {
redirect = p.tok
p.next()
dest = p.expr()
}
if op == PRINT {
return &PrintStmt{args, redirect, dest}
return &ast.PrintStmt{args, redirect, dest}
} else {
if len(args) == 0 {
panic(p.errorf("expected printf args, got none"))
}
return &PrintfStmt{args, redirect, dest}
return &ast.PrintfStmt{args, redirect, dest}
}
case DELETE:
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
var index []Expr
var index []ast.Expr
if p.tok == LBRACKET {
p.next()
index = p.exprList(p.expr)
@ -252,20 +264,20 @@ func (p *parser) simpleStmt() Stmt {
}
p.expect(RBRACKET)
}
return &DeleteStmt{ref, index}
return &ast.DeleteStmt{ref, index}
case IF, FOR, WHILE, DO, BREAK, CONTINUE, NEXT, EXIT, RETURN:
panic(p.errorf("expected print/printf, delete, or expression"))
default:
return &ExprStmt{p.expr()}
return &ast.ExprStmt{p.expr()}
}
}
// Parse any top-level statement.
func (p *parser) stmt() Stmt {
func (p *parser) stmt() ast.Stmt {
for p.matches(SEMICOLON, NEWLINE) {
p.next()
}
var s Stmt
var s ast.Stmt
switch p.tok {
case IF:
p.next()
@ -275,13 +287,13 @@ func (p *parser) stmt() Stmt {
p.optionalNewlines()
body := p.stmts()
p.optionalNewlines()
var elseBody Stmts
var elseBody ast.Stmts
if p.tok == ELSE {
p.next()
p.optionalNewlines()
elseBody = p.stmts()
}
s = &IfStmt{cond, body, elseBody}
s = &ast.IfStmt{cond, body, elseBody}
case FOR:
// Parse for statement, either "for in" or C-like for loop.
//
@ -292,7 +304,7 @@ func (p *parser) stmt() Stmt {
//
p.next()
p.expect(LPAREN)
var pre Stmt
var pre ast.Stmt
if p.tok != SEMICOLON {
pre = p.simpleStmt()
}
@ -300,41 +312,41 @@ func (p *parser) stmt() Stmt {
// Match: for (var in array) body
p.next()
p.optionalNewlines()
exprStmt, ok := pre.(*ExprStmt)
exprStmt, ok := pre.(*ast.ExprStmt)
if !ok {
panic(p.errorf("expected 'for (var in array) ...'"))
}
inExpr, ok := (exprStmt.Expr).(*InExpr)
inExpr, ok := exprStmt.Expr.(*ast.InExpr)
if !ok {
panic(p.errorf("expected 'for (var in array) ...'"))
}
if len(inExpr.Index) != 1 {
panic(p.errorf("expected 'for (var in array) ...'"))
}
varExpr, ok := (inExpr.Index[0]).(*VarExpr)
varExpr, ok := inExpr.Index[0].(*ast.VarExpr)
if !ok {
panic(p.errorf("expected 'for (var in array) ...'"))
}
body := p.loopStmts()
s = &ForInStmt{varExpr, inExpr.Array, body}
s = &ast.ForInStmt{varExpr, inExpr.Array, body}
} else {
// Match: for ([pre]; [cond]; [post]) body
p.expect(SEMICOLON)
p.optionalNewlines()
var cond Expr
var cond ast.Expr
if p.tok != SEMICOLON {
cond = p.expr()
}
p.expect(SEMICOLON)
p.optionalNewlines()
var post Stmt
var post ast.Stmt
if p.tok != RPAREN {
post = p.simpleStmt()
}
p.expect(RPAREN)
p.optionalNewlines()
body := p.loopStmts()
s = &ForStmt{pre, cond, post, body}
s = &ast.ForStmt{pre, cond, post, body}
}
case WHILE:
p.next()
@ -343,7 +355,7 @@ func (p *parser) stmt() Stmt {
p.expect(RPAREN)
p.optionalNewlines()
body := p.loopStmts()
s = &WhileStmt{cond, body}
s = &ast.WhileStmt{cond, body}
case DO:
p.next()
p.optionalNewlines()
@ -352,45 +364,45 @@ func (p *parser) stmt() Stmt {
p.expect(LPAREN)
cond := p.expr()
p.expect(RPAREN)
s = &DoWhileStmt{body, cond}
s = &ast.DoWhileStmt{body, cond}
case BREAK:
if p.loopDepth == 0 {
panic(p.errorf("break must be inside a loop body"))
}
p.next()
s = &BreakStmt{}
s = &ast.BreakStmt{}
case CONTINUE:
if p.loopDepth == 0 {
panic(p.errorf("continue must be inside a loop body"))
}
p.next()
s = &ContinueStmt{}
s = &ast.ContinueStmt{}
case NEXT:
if !p.inAction && p.funcName == "" {
panic(p.errorf("next can't be inside BEGIN or END"))
}
p.next()
s = &NextStmt{}
s = &ast.NextStmt{}
case EXIT:
p.next()
var status Expr
var status ast.Expr
if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
status = p.expr()
}
s = &ExitStmt{status}
s = &ast.ExitStmt{status}
case RETURN:
if p.funcName == "" {
panic(p.errorf("return must be inside a function"))
}
p.next()
var value Expr
var value ast.Expr
if !p.matches(NEWLINE, SEMICOLON, RBRACE) {
value = p.expr()
}
s = &ReturnStmt{value}
s = &ast.ReturnStmt{value}
case LBRACE:
body := p.stmtsBrace()
s = &BlockStmt{body}
s = &ast.BlockStmt{body}
default:
s = p.simpleStmt()
}
@ -407,7 +419,7 @@ func (p *parser) stmt() Stmt {
// Same as stmts(), but tracks that we're in a loop (as break and
// continue can only occur inside a loop).
func (p *parser) loopStmts() Stmts {
func (p *parser) loopStmts() ast.Stmts {
p.loopDepth++
ss := p.stmts()
p.loopDepth--
@ -417,7 +429,7 @@ func (p *parser) loopStmts() Stmts {
// Parse a function definition and body. As it goes, this resolves
// the local variable indexes and tracks which parameters are array
// parameters.
func (p *parser) function() Function {
func (p *parser) function() ast.Function {
if p.funcName != "" {
// Should never actually get here (FUNCTION token is only
// handled at the top level), but just in case.
@ -458,13 +470,13 @@ func (p *parser) function() Function {
p.stopFunction()
p.locals = nil
return Function{name, params, nil, body}
return ast.Function{name, params, nil, body}
}
// Parse expressions separated by commas: args to print[f] or user
// function call, or multi-dimensional index.
func (p *parser) exprList(parse func() Expr) []Expr {
exprs := []Expr{}
func (p *parser) exprList(parse func() ast.Expr) []ast.Expr {
exprs := []ast.Expr{}
first := true
for !p.matches(NEWLINE, SEMICOLON, RBRACE, RBRACKET, RPAREN, GREATER, PIPE, APPEND) {
if !first {
@ -484,41 +496,43 @@ func (p *parser) exprList(parse func() Expr) []Expr {
// which skips PIPE GETLINE and GREATER expressions.
// Parse a single expression.
func (p *parser) expr() Expr { return p.getLine() }
func (p *parser) printExpr() Expr { return p._assign(p.printCond) }
func (p *parser) expr() ast.Expr { return p.getLine() }
func (p *parser) printExpr() ast.Expr { return p._assign(p.printCond) }
// Parse an "expr | getline [lvalue]" expression:
//
// assign [PIPE GETLINE [lvalue]]
//
func (p *parser) getLine() Expr {
// assign [PIPE GETLINE [lvalue]]
func (p *parser) getLine() ast.Expr {
expr := p._assign(p.cond)
if p.tok == PIPE {
p.next()
p.expect(GETLINE)
target := p.optionalLValue()
return &GetlineExpr{expr, target, nil}
return &ast.GetlineExpr{expr, target, nil}
}
return expr
}
// Parse an = assignment expression:
//
// lvalue [assign_op assign]
// lvalue [assign_op assign]
//
// An lvalue is a variable name, an array[expr] index expression, or
// an $expr field expression.
//
func (p *parser) _assign(higher func() Expr) Expr {
func (p *parser) _assign(higher func() ast.Expr) ast.Expr {
expr := higher()
if IsLValue(expr) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN,
_, isNamedField := expr.(*ast.NamedFieldExpr)
if (isNamedField || ast.IsLValue(expr)) && p.matches(ASSIGN, ADD_ASSIGN, DIV_ASSIGN,
MOD_ASSIGN, MUL_ASSIGN, POW_ASSIGN, SUB_ASSIGN) {
if isNamedField {
panic(p.errorf("assigning @ expression not supported"))
}
op := p.tok
p.next()
right := p._assign(higher)
switch op {
case ASSIGN:
return &AssignExpr{expr, right}
return &ast.AssignExpr{expr, right}
case ADD_ASSIGN:
op = ADD
case DIV_ASSIGN:
@ -532,19 +546,18 @@ func (p *parser) _assign(higher func() Expr) Expr {
case SUB_ASSIGN:
op = SUB
}
return &AugAssignExpr{expr, op, right}
return &ast.AugAssignExpr{expr, op, right}
}
return expr
}
// Parse a ?: conditional expression:
//
// or [QUESTION NEWLINE* cond COLON NEWLINE* cond]
//
func (p *parser) cond() Expr { return p._cond(p.or) }
func (p *parser) printCond() Expr { return p._cond(p.printOr) }
// or [QUESTION NEWLINE* cond COLON NEWLINE* cond]
func (p *parser) cond() ast.Expr { return p._cond(p.or) }
func (p *parser) printCond() ast.Expr { return p._cond(p.printOr) }
func (p *parser) _cond(higher func() Expr) Expr {
func (p *parser) _cond(higher func() ast.Expr) ast.Expr {
expr := higher()
if p.tok == QUESTION {
p.next()
@ -553,156 +566,154 @@ func (p *parser) _cond(higher func() Expr) Expr {
p.expect(COLON)
p.optionalNewlines()
f := p.expr()
return &CondExpr{expr, t, f}
return &ast.CondExpr{expr, t, f}
}
return expr
}
// Parse an || or expression:
//
// and [OR NEWLINE* and] [OR NEWLINE* and] ...
//
func (p *parser) or() Expr { return p.binaryLeft(p.and, true, OR) }
func (p *parser) printOr() Expr { return p.binaryLeft(p.printAnd, true, OR) }
// and [OR NEWLINE* and] [OR NEWLINE* and] ...
func (p *parser) or() ast.Expr { return p.binaryLeft(p.and, true, OR) }
func (p *parser) printOr() ast.Expr { return p.binaryLeft(p.printAnd, true, OR) }
// Parse an && and expression:
//
// in [AND NEWLINE* in] [AND NEWLINE* in] ...
//
func (p *parser) and() Expr { return p.binaryLeft(p.in, true, AND) }
func (p *parser) printAnd() Expr { return p.binaryLeft(p.printIn, true, AND) }
// in [AND NEWLINE* in] [AND NEWLINE* in] ...
func (p *parser) and() ast.Expr { return p.binaryLeft(p.in, true, AND) }
func (p *parser) printAnd() ast.Expr { return p.binaryLeft(p.printIn, true, AND) }
// Parse an "in" expression:
//
// match [IN NAME] [IN NAME] ...
//
func (p *parser) in() Expr { return p._in(p.match) }
func (p *parser) printIn() Expr { return p._in(p.printMatch) }
// match [IN NAME] [IN NAME] ...
func (p *parser) in() ast.Expr { return p._in(p.match) }
func (p *parser) printIn() ast.Expr { return p._in(p.printMatch) }
func (p *parser) _in(higher func() Expr) Expr {
func (p *parser) _in(higher func() ast.Expr) ast.Expr {
expr := higher()
for p.tok == IN {
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
expr = &InExpr{[]Expr{expr}, ref}
expr = &ast.InExpr{[]ast.Expr{expr}, ref}
}
return expr
}
// Parse a ~ match expression:
//
// compare [MATCH|NOT_MATCH compare]
//
func (p *parser) match() Expr { return p._match(p.compare) }
func (p *parser) printMatch() Expr { return p._match(p.printCompare) }
// compare [MATCH|NOT_MATCH compare]
func (p *parser) match() ast.Expr { return p._match(p.compare) }
func (p *parser) printMatch() ast.Expr { return p._match(p.printCompare) }
func (p *parser) _match(higher func() Expr) Expr {
func (p *parser) _match(higher func() ast.Expr) ast.Expr {
expr := higher()
if p.matches(MATCH, NOT_MATCH) {
op := p.tok
p.next()
right := p.regexStr(higher) // Not match() as these aren't associative
return &BinaryExpr{expr, op, right}
return &ast.BinaryExpr{expr, op, right}
}
return expr
}
// Parse a comparison expression:
//
// concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat]
//
func (p *parser) compare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) }
func (p *parser) printCompare() Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) }
// concat [EQUALS|NOT_EQUALS|LESS|LTE|GREATER|GTE concat]
func (p *parser) compare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE, GREATER) }
func (p *parser) printCompare() ast.Expr { return p._compare(EQUALS, NOT_EQUALS, LESS, LTE, GTE) }
func (p *parser) _compare(ops ...Token) Expr {
func (p *parser) _compare(ops ...Token) ast.Expr {
expr := p.concat()
if p.matches(ops...) {
op := p.tok
p.next()
right := p.concat() // Not compare() as these aren't associative
return &BinaryExpr{expr, op, right}
return &ast.BinaryExpr{expr, op, right}
}
return expr
}
func (p *parser) concat() Expr {
func (p *parser) concat() ast.Expr {
expr := p.add()
for p.matches(DOLLAR, NOT, NAME, NUMBER, STRING, LPAREN, INCR, DECR) ||
(p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC) {
for p.matches(DOLLAR, AT, NOT, NAME, NUMBER, STRING, LPAREN, INCR, DECR) ||
p.tok >= FIRST_FUNC && p.tok <= LAST_FUNC {
right := p.add()
expr = &BinaryExpr{expr, CONCAT, right}
expr = &ast.BinaryExpr{expr, CONCAT, right}
}
return expr
}
func (p *parser) add() Expr {
func (p *parser) add() ast.Expr {
return p.binaryLeft(p.mul, false, ADD, SUB)
}
func (p *parser) mul() Expr {
func (p *parser) mul() ast.Expr {
return p.binaryLeft(p.pow, false, MUL, DIV, MOD)
}
func (p *parser) pow() Expr {
func (p *parser) pow() ast.Expr {
// Note that pow (expr ^ expr) is right-associative
expr := p.preIncr()
if p.tok == POW {
p.next()
right := p.pow()
return &BinaryExpr{expr, POW, right}
return &ast.BinaryExpr{expr, POW, right}
}
return expr
}
func (p *parser) preIncr() Expr {
func (p *parser) preIncr() ast.Expr {
if p.tok == INCR || p.tok == DECR {
op := p.tok
p.next()
exprPos := p.pos
expr := p.preIncr()
if !IsLValue(expr) {
if !ast.IsLValue(expr) {
panic(p.posErrorf(exprPos, "expected lvalue after ++ or --"))
}
return &IncrExpr{expr, op, true}
return &ast.IncrExpr{expr, op, true}
}
return p.postIncr()
}
func (p *parser) postIncr() Expr {
func (p *parser) postIncr() ast.Expr {
expr := p.primary()
if (p.tok == INCR || p.tok == DECR) && IsLValue(expr) {
if (p.tok == INCR || p.tok == DECR) && ast.IsLValue(expr) {
op := p.tok
p.next()
return &IncrExpr{expr, op, false}
return &ast.IncrExpr{expr, op, false}
}
return expr
}
func (p *parser) primary() Expr {
func (p *parser) primary() ast.Expr {
switch p.tok {
case NUMBER:
// AWK allows forms like "1.5e", but ParseFloat doesn't
s := strings.TrimRight(p.val, "eE")
n, _ := strconv.ParseFloat(s, 64)
p.next()
return &NumExpr{n}
return &ast.NumExpr{n}
case STRING:
s := p.val
p.next()
return &StrExpr{s}
return &ast.StrExpr{s}
case DIV, DIV_ASSIGN:
// If we get to DIV or DIV_ASSIGN as a primary expression,
// it's actually a regex.
regex := p.nextRegex()
return &RegExpr{regex}
return &ast.RegExpr{regex}
case DOLLAR:
p.next()
return &FieldExpr{p.primary()}
return &ast.FieldExpr{p.primary()}
case AT:
p.next()
return &ast.NamedFieldExpr{p.primary()}
case NOT, ADD, SUB:
op := p.tok
p.next()
return &UnaryExpr{op, p.pow()}
return &ast.UnaryExpr{op, p.pow()}
case NAME:
name := p.val
namePos := p.pos
@ -715,7 +726,7 @@ func (p *parser) primary() Expr {
panic(p.errorf("expected expression instead of ]"))
}
p.expect(RBRACKET)
return &IndexExpr{p.arrayRef(name, namePos), index}
return &ast.IndexExpr{p.arrayRef(name, namePos), index}
} else if p.tok == LPAREN && !p.lexer.HadSpace() {
if p.locals[name] {
panic(p.errorf("can't call local variable %q as function", name))
@ -743,7 +754,7 @@ func (p *parser) primary() Expr {
p.next()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
return &InExpr{exprs, ref}
return &ast.InExpr{exprs, ref}
}
// MultiExpr is used as a pseudo-expression for print[f] parsing.
return p.multiExpr(exprs, parenPos)
@ -751,12 +762,12 @@ func (p *parser) primary() Expr {
case GETLINE:
p.next()
target := p.optionalLValue()
var file Expr
var file ast.Expr
if p.tok == LESS {
p.next()
file = p.primary()
}
return &GetlineExpr{nil, target, file}
return &ast.GetlineExpr{nil, target, file}
// Below is the parsing of all the builtin function calls. We
// could unify these but several of them have special handling
// (array/lvalue/regex params, optional arguments, and so on).
@ -768,18 +779,18 @@ func (p *parser) primary() Expr {
regex := p.regexStr(p.expr)
p.commaNewlines()
repl := p.expr()
args := []Expr{regex, repl}
args := []ast.Expr{regex, repl}
if p.tok == COMMA {
p.commaNewlines()
inPos := p.pos
in := p.expr()
if !IsLValue(in) {
if !ast.IsLValue(in) {
panic(p.posErrorf(inPos, "3rd arg to sub/gsub must be lvalue"))
}
args = append(args, in)
}
p.expect(RPAREN)
return &CallExpr{op, args}
return &ast.CallExpr{op, args}
case F_SPLIT:
p.next()
p.expect(LPAREN)
@ -787,13 +798,13 @@ func (p *parser) primary() Expr {
p.commaNewlines()
ref := p.arrayRef(p.val, p.pos)
p.expect(NAME)
args := []Expr{str, ref}
args := []ast.Expr{str, ref}
if p.tok == COMMA {
p.commaNewlines()
args = append(args, p.regexStr(p.expr))
}
p.expect(RPAREN)
return &CallExpr{F_SPLIT, args}
return &ast.CallExpr{F_SPLIT, args}
case F_MATCH:
p.next()
p.expect(LPAREN)
@ -801,24 +812,24 @@ func (p *parser) primary() Expr {
p.commaNewlines()
regex := p.regexStr(p.expr)
p.expect(RPAREN)
return &CallExpr{F_MATCH, []Expr{str, regex}}
return &ast.CallExpr{F_MATCH, []ast.Expr{str, regex}}
case F_RAND:
p.next()
p.expect(LPAREN)
p.expect(RPAREN)
return &CallExpr{F_RAND, nil}
return &ast.CallExpr{F_RAND, nil}
case F_SRAND:
p.next()
p.expect(LPAREN)
var args []Expr
var args []ast.Expr
if p.tok != RPAREN {
args = append(args, p.expr())
}
p.expect(RPAREN)
return &CallExpr{F_SRAND, args}
return &ast.CallExpr{F_SRAND, args}
case F_LENGTH:
p.next()
var args []Expr
var args []ast.Expr
// AWK quirk: "length" is allowed to be called without parens
if p.tok == LPAREN {
p.next()
@ -827,39 +838,39 @@ func (p *parser) primary() Expr {
}
p.expect(RPAREN)
}
return &CallExpr{F_LENGTH, args}
return &ast.CallExpr{F_LENGTH, args}
case F_SUBSTR:
p.next()
p.expect(LPAREN)
str := p.expr()
p.commaNewlines()
start := p.expr()
args := []Expr{str, start}
args := []ast.Expr{str, start}
if p.tok == COMMA {
p.commaNewlines()
args = append(args, p.expr())
}
p.expect(RPAREN)
return &CallExpr{F_SUBSTR, args}
return &ast.CallExpr{F_SUBSTR, args}
case F_SPRINTF:
p.next()
p.expect(LPAREN)
args := []Expr{p.expr()}
args := []ast.Expr{p.expr()}
for p.tok == COMMA {
p.commaNewlines()
args = append(args, p.expr())
}
p.expect(RPAREN)
return &CallExpr{F_SPRINTF, args}
return &ast.CallExpr{F_SPRINTF, args}
case F_FFLUSH:
p.next()
p.expect(LPAREN)
var args []Expr
var args []ast.Expr
if p.tok != RPAREN {
args = append(args, p.expr())
}
p.expect(RPAREN)
return &CallExpr{F_FFLUSH, args}
return &ast.CallExpr{F_FFLUSH, args}
case F_COS, F_SIN, F_EXP, F_LOG, F_SQRT, F_INT, F_TOLOWER, F_TOUPPER, F_SYSTEM, F_CLOSE:
// Simple 1-argument functions
op := p.tok
@ -867,7 +878,7 @@ func (p *parser) primary() Expr {
p.expect(LPAREN)
arg := p.expr()
p.expect(RPAREN)
return &CallExpr{op, []Expr{arg}}
return &ast.CallExpr{op, []ast.Expr{arg}}
case F_ATAN2, F_INDEX:
// Simple 2-argument functions
op := p.tok
@ -877,14 +888,14 @@ func (p *parser) primary() Expr {
p.commaNewlines()
arg2 := p.expr()
p.expect(RPAREN)
return &CallExpr{op, []Expr{arg1, arg2}}
return &ast.CallExpr{op, []ast.Expr{arg1, arg2}}
default:
panic(p.errorf("expected expression instead of %s", p.tok))
}
}
// Parse an optional lvalue
func (p *parser) optionalLValue() Expr {
func (p *parser) optionalLValue() ast.Expr {
switch p.tok {
case NAME:
if p.lexer.PeekByte() == '(' {
@ -902,12 +913,12 @@ func (p *parser) optionalLValue() Expr {
panic(p.errorf("expected expression instead of ]"))
}
p.expect(RBRACKET)
return &IndexExpr{p.arrayRef(name, namePos), index}
return &ast.IndexExpr{p.arrayRef(name, namePos), index}
}
return p.varRef(name, namePos)
case DOLLAR:
p.next()
return &FieldExpr{p.primary()}
return &ast.FieldExpr{p.primary()}
default:
return nil
}
@ -915,12 +926,11 @@ func (p *parser) optionalLValue() Expr {
// Parse /.../ regex or generic expression:
//
// REGEX | expr
//
func (p *parser) regexStr(parse func() Expr) Expr {
// REGEX | expr
func (p *parser) regexStr(parse func() ast.Expr) ast.Expr {
if p.matches(DIV, DIV_ASSIGN) {
regex := p.nextRegex()
return &StrExpr{regex}
return &ast.StrExpr{regex}
}
return parse()
}
@ -928,9 +938,8 @@ func (p *parser) regexStr(parse func() Expr) Expr {
// Parse left-associative binary operator. Allow newlines after
// operator if allowNewline is true.
//
// parse [op parse] [op parse] ...
//
func (p *parser) binaryLeft(higher func() Expr, allowNewline bool, ops ...Token) Expr {
// parse [op parse] [op parse] ...
func (p *parser) binaryLeft(higher func() ast.Expr, allowNewline bool, ops ...Token) ast.Expr {
expr := higher()
for p.matches(ops...) {
op := p.tok
@ -939,15 +948,14 @@ func (p *parser) binaryLeft(higher func() Expr, allowNewline bool, ops ...Token)
p.optionalNewlines()
}
right := higher()
expr = &BinaryExpr{expr, op, right}
expr = &ast.BinaryExpr{expr, op, right}
}
return expr
}
// Parse comma followed by optional newlines:
//
// COMMA NEWLINE*
//
// COMMA NEWLINE*
func (p *parser) commaNewlines() {
p.expect(COMMA)
p.optionalNewlines()
@ -955,8 +963,7 @@ func (p *parser) commaNewlines() {
// Parse zero or more optional newlines:
//
// [NEWLINE] [NEWLINE] ...
//
// [NEWLINE] [NEWLINE] ...
func (p *parser) optionalNewlines() {
for p.tok == NEWLINE {
p.next()
@ -1021,9 +1028,9 @@ func (p *parser) posErrorf(pos Position, format string, args ...interface{}) err
// Parse call to a user-defined function (and record call site for
// resolving later).
func (p *parser) userCall(name string, pos Position) *UserCallExpr {
func (p *parser) userCall(name string, pos Position) *ast.UserCallExpr {
p.expect(LPAREN)
args := []Expr{}
args := []ast.Expr{}
i := 0
for !p.matches(NEWLINE, RPAREN) {
if i > 0 {
@ -1035,7 +1042,7 @@ func (p *parser) userCall(name string, pos Position) *UserCallExpr {
i++
}
p.expect(RPAREN)
call := &UserCallExpr{false, -1, name, args} // index is resolved later
call := &ast.UserCallExpr{false, -1, name, args} // index is resolved later
p.recordUserCall(call, pos)
return call
}

View File

@ -7,7 +7,7 @@ import (
"reflect"
"sort"
. "github.com/benhoyt/goawk/internal/ast"
"github.com/benhoyt/goawk/internal/ast"
. "github.com/benhoyt/goawk/lexer"
)
@ -33,8 +33,8 @@ func (t varType) String() string {
// typeInfo records type information for a single variable
type typeInfo struct {
typ varType
ref *VarExpr
scope VarScope
ref *ast.VarExpr
scope ast.VarScope
index int
callName string
argIndex int
@ -44,9 +44,9 @@ type typeInfo struct {
func (t typeInfo) String() string {
var scope string
switch t.scope {
case ScopeGlobal:
case ast.ScopeGlobal:
scope = "Global"
case ScopeLocal:
case ast.ScopeLocal:
scope = "Local"
default:
scope = "Special"
@ -58,7 +58,7 @@ func (t typeInfo) String() string {
// A single variable reference (normally scalar)
type varRef struct {
funcName string
ref *VarExpr
ref *ast.VarExpr
isArg bool
pos Position
}
@ -66,7 +66,7 @@ type varRef struct {
// A single array reference
type arrayRef struct {
funcName string
ref *ArrayExpr
ref *ast.ArrayExpr
pos Position
}
@ -76,8 +76,9 @@ func (p *parser) initResolve() {
p.varTypes[""] = make(map[string]typeInfo) // globals
p.functions = make(map[string]int)
p.arrayRef("ARGV", Position{1, 1}) // interpreter relies on ARGV being present
p.arrayRef("ENVIRON", Position{1, 1}) // and ENVIRON
p.multiExprs = make(map[*MultiExpr]Position, 3)
p.arrayRef("ENVIRON", Position{1, 1}) // and other built-in arrays
p.arrayRef("FIELDS", Position{1, 1})
p.multiExprs = make(map[*ast.MultiExpr]Position, 3)
}
// Signal the start of a function
@ -98,13 +99,13 @@ func (p *parser) addFunction(name string, index int) {
// Records a call to a user function (for resolving indexes later)
type userCall struct {
call *UserCallExpr
call *ast.UserCallExpr
pos Position
inFunc string
}
// Record a user call site
func (p *parser) recordUserCall(call *UserCallExpr, pos Position) {
func (p *parser) recordUserCall(call *ast.UserCallExpr, pos Position) {
p.userCalls = append(p.userCalls, userCall{call, pos, p.funcName})
}
@ -149,8 +150,8 @@ func (p *parser) resolveUserCalls(prog *Program) {
// For arguments that are variable references, we don't know the
// type based on context, so mark the types for these as unknown.
func (p *parser) processUserCallArg(funcName string, arg Expr, index int) {
if varExpr, ok := arg.(*VarExpr); ok {
func (p *parser) processUserCallArg(funcName string, arg ast.Expr, index int) {
if varExpr, ok := arg.(*ast.VarExpr); ok {
scope, varFuncName := p.getScope(varExpr.Name)
ref := p.varTypes[varFuncName][varExpr.Name].ref
if ref == varExpr {
@ -166,22 +167,22 @@ func (p *parser) processUserCallArg(funcName string, arg Expr, index int) {
// Determine scope of given variable reference (and funcName if it's
// a local, otherwise empty string)
func (p *parser) getScope(name string) (VarScope, string) {
func (p *parser) getScope(name string) (ast.VarScope, string) {
switch {
case p.locals[name]:
return ScopeLocal, p.funcName
case SpecialVarIndex(name) > 0:
return ScopeSpecial, ""
return ast.ScopeLocal, p.funcName
case ast.SpecialVarIndex(name) > 0:
return ast.ScopeSpecial, ""
default:
return ScopeGlobal, ""
return ast.ScopeGlobal, ""
}
}
// Record a variable (scalar) reference and return the *VarExpr (but
// VarExpr.Index won't be set till later)
func (p *parser) varRef(name string, pos Position) *VarExpr {
func (p *parser) varRef(name string, pos Position) *ast.VarExpr {
scope, funcName := p.getScope(name)
expr := &VarExpr{scope, 0, name}
expr := &ast.VarExpr{scope, 0, name}
p.varRefs = append(p.varRefs, varRef{funcName, expr, false, pos})
info := p.varTypes[funcName][name]
if info.typ == typeUnknown {
@ -192,12 +193,12 @@ func (p *parser) varRef(name string, pos Position) *VarExpr {
// Record an array reference and return the *ArrayExpr (but
// ArrayExpr.Index won't be set till later)
func (p *parser) arrayRef(name string, pos Position) *ArrayExpr {
func (p *parser) arrayRef(name string, pos Position) *ast.ArrayExpr {
scope, funcName := p.getScope(name)
if scope == ScopeSpecial {
if scope == ast.ScopeSpecial {
panic(p.errorf("can't use scalar %q as array", name))
}
expr := &ArrayExpr{scope, 0, name}
expr := &ast.ArrayExpr{scope, 0, name}
p.arrayRefs = append(p.arrayRefs, arrayRef{funcName, expr, pos})
info := p.varTypes[funcName][name]
if info.typ == typeUnknown {
@ -233,52 +234,45 @@ func (p *parser) printVarTypes(prog *Program) {
}
}
// If we can't finish resolving after this many iterations, give up
const maxResolveIterations = 10000
// Resolve unknown variables types and generate variable indexes and
// name-to-index mappings for interpreter
func (p *parser) resolveVars(prog *Program) {
// First go through all unknown types and try to determine the
// type from the parameter type in that function definition. May
// need multiple passes depending on the order of functions. This
// is not particularly efficient, but on realistic programs it's
// not an issue.
for i := 0; ; i++ {
progressed := false
for funcName, infos := range p.varTypes {
for name, info := range infos {
if info.scope == ScopeSpecial || info.typ != typeUnknown {
// It's a special var or type is already known
continue
}
funcIndex, ok := p.functions[info.callName]
if !ok {
// Function being called is a native function
continue
}
// Determine var type based on type of this parameter
// in the called function (if we know that)
paramName := prog.Functions[funcIndex].Params[info.argIndex]
typ := p.varTypes[info.callName][paramName].typ
if typ != typeUnknown {
if p.debugTypes {
fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n",
funcName, name, typ)
}
info.typ = typ
p.varTypes[funcName][name] = info
progressed = true
}
// type from the parameter type in that function definition.
// Iterate through functions in topological order, for example
// if f() calls g(), process g first, then f.
callGraph := make(map[string]map[string]struct{})
for _, call := range p.userCalls {
if _, ok := callGraph[call.inFunc]; !ok {
callGraph[call.inFunc] = make(map[string]struct{})
}
callGraph[call.inFunc][call.call.Name] = struct{}{}
}
sortedFuncs := topoSort(callGraph)
for _, funcName := range sortedFuncs {
infos := p.varTypes[funcName]
for name, info := range infos {
if info.scope == ast.ScopeSpecial || info.typ != typeUnknown {
// It's a special var or type is already known
continue
}
funcIndex, ok := p.functions[info.callName]
if !ok {
// Function being called is a native function
continue
}
// Determine var type based on type of this parameter
// in the called function (if we know that)
paramName := prog.Functions[funcIndex].Params[info.argIndex]
typ := p.varTypes[info.callName][paramName].typ
if typ != typeUnknown {
if p.debugTypes {
fmt.Fprintf(p.debugWriter, "resolving %s:%s to %s\n",
funcName, name, typ)
}
info.typ = typ
p.varTypes[funcName][name] = info
}
}
if !progressed {
// If we didn't progress we're done (or trying again is
// not going to help)
break
}
if i >= maxResolveIterations {
panic(p.errorf("too many iterations trying to resolve variable types"))
}
}
@ -293,8 +287,8 @@ func (p *parser) resolveVars(prog *Program) {
panic(p.errorf("global var %q can't also be a function", name))
}
var index int
if info.scope == ScopeSpecial {
index = SpecialVarIndex(name)
if info.scope == ast.ScopeSpecial {
index = ast.SpecialVarIndex(name)
} else if info.typ == typeArray {
index = len(prog.Arrays)
prog.Arrays[name] = index
@ -317,7 +311,7 @@ func (p *parser) resolveVars(prog *Program) {
}
function := prog.Functions[c.call.Index]
for i, arg := range c.call.Args {
varExpr, ok := arg.(*VarExpr)
varExpr, ok := arg.(*ast.VarExpr)
if !ok {
continue
}
@ -368,7 +362,7 @@ func (p *parser) resolveVars(prog *Program) {
// Check native function calls
if c.call.Native {
for _, arg := range c.call.Args {
varExpr, ok := arg.(*VarExpr)
varExpr, ok := arg.(*ast.VarExpr)
if !ok {
// Non-variable expression, must be scalar
continue
@ -385,7 +379,7 @@ func (p *parser) resolveVars(prog *Program) {
// Check AWK function calls
function := prog.Functions[c.call.Index]
for i, arg := range c.call.Args {
varExpr, ok := arg.(*VarExpr)
varExpr, ok := arg.(*ast.VarExpr)
if !ok {
if function.Arrays[i] {
panic(p.posErrorf(c.pos, "can't pass scalar %s as array param", arg))
@ -441,14 +435,14 @@ func (p *parser) getVarFuncName(prog *Program, name, inFunc string) string {
// Record a "multi expression" (comma-separated pseudo-expression
// used to allow commas around print/printf arguments).
func (p *parser) multiExpr(exprs []Expr, pos Position) Expr {
expr := &MultiExpr{exprs}
func (p *parser) multiExpr(exprs []ast.Expr, pos Position) ast.Expr {
expr := &ast.MultiExpr{exprs}
p.multiExprs[expr] = pos
return expr
}
// Mark the multi expression as used (by a print/printf statement).
func (p *parser) useMultiExpr(expr *MultiExpr) {
func (p *parser) useMultiExpr(expr *ast.MultiExpr) {
delete(p.multiExprs, expr)
}
@ -460,7 +454,7 @@ func (p *parser) checkMultiExprs() {
// Show error on first comma-separated expression
min := Position{1000000000, 1000000000}
for _, pos := range p.multiExprs {
if pos.Line < min.Line || (pos.Line == min.Line && pos.Column < min.Column) {
if pos.Line < min.Line || pos.Line == min.Line && pos.Column < min.Column {
min = pos
}
}

View File

@ -0,0 +1,72 @@
// Topological sorting
package parser
/*
This algorithm is taken from:
https://en.wikipedia.org/wiki/Topological_sorting#Depth-first_search
L Empty list that will contain the sorted nodes
while exists nodes without a permanent mark do
select an unmarked node n
visit(n)
function visit(node n)
if n has a permanent mark then
return
if n has a temporary mark then
stop (not a DAG)
mark n with a temporary mark
for each node m with an edge from n to m do
visit(m)
remove temporary mark from n
mark n with a permanent mark
add n to head of L
*/
// Perform a topological sort on the given graph.
func topoSort(graph map[string]map[string]struct{}) []string {
if len(graph) == 0 {
return nil
}
unmarked := make(map[string]struct{})
for node := range graph {
unmarked[node] = struct{}{}
}
permMarks := make(map[string]struct{})
tempMarks := make(map[string]struct{})
var sorted []string
var visit func(string)
visit = func(n string) {
if _, ok := permMarks[n]; ok {
return
}
if _, ok := tempMarks[n]; ok {
return
}
tempMarks[n] = struct{}{}
for m := range graph[n] {
visit(m)
}
delete(tempMarks, n)
permMarks[n] = struct{}{}
delete(unmarked, n)
sorted = append(sorted, n)
return
}
for len(unmarked) > 0 {
var n string
for n = range unmarked {
break
}
visit(n)
}
return sorted
}

View File

@ -1,6 +1,7 @@
# github.com/benhoyt/goawk v1.13.0
## explicit; go 1.13
# github.com/benhoyt/goawk v1.20.0
## explicit; go 1.14
github.com/benhoyt/goawk/internal/ast
github.com/benhoyt/goawk/internal/compiler
github.com/benhoyt/goawk/interp
github.com/benhoyt/goawk/lexer
github.com/benhoyt/goawk/parser