diff --git a/compiler/semantic/analyzer.go b/compiler/semantic/analyzer.go index 5a96e84dc3..f779d127e7 100644 --- a/compiler/semantic/analyzer.go +++ b/compiler/semantic/analyzer.go @@ -45,6 +45,10 @@ func Analyze(ctx context.Context, p *parser.AST, env *exec.Environment, extInput if err := t.Error(); err != nil { return nil, err } + newChecker(t, dagFuncs).check(t.reporter, semSeq) + if err := t.Error(); err != nil { + return nil, err + } main := newDagen(t.reporter).assemble(semSeq, dagFuncs) return main, t.Error() } diff --git a/compiler/semantic/checker.go b/compiler/semantic/checker.go new file mode 100644 index 0000000000..1d4a481ed9 --- /dev/null +++ b/compiler/semantic/checker.go @@ -0,0 +1,1077 @@ +package semantic + +import ( + "errors" + "fmt" + "slices" + + "github.com/brimdata/super" + "github.com/brimdata/super/compiler/ast" + "github.com/brimdata/super/compiler/semantic/sem" + "github.com/brimdata/super/runtime/sam/expr/agg" + "github.com/brimdata/super/sup" +) + +type checker struct { + t *translator + funcs map[string]*sem.FuncDef + checked map[super.Type]super.Type + unknown *super.TypeError + estack []errlist +} + +func newChecker(t *translator, funcs []*sem.FuncDef) *checker { + funcMap := make(map[string]*sem.FuncDef) + for _, f := range funcs { + funcMap[f.Tag] = f + } + return &checker{ + t: t, + funcs: funcMap, + unknown: t.sctx.LookupTypeError(t.sctx.MustLookupTypeRecord(nil)), + checked: make(map[super.Type]super.Type), + } +} + +func (c *checker) check(r reporter, seq sem.Seq) { + c.pushErrs() + c.seq(c.unknown, seq) + errs := c.popErrs() + errs.flushErrs(r) +} + +func (c *checker) seq(typ super.Type, seq sem.Seq) super.Type { + for len(seq) > 0 { + if fork, ok := seq[0].(*sem.ForkOp); ok && len(seq) >= 2 { + if join, ok := seq[1].(*sem.JoinOp); ok { + typ = c.join(c.fork(typ, fork), join) + seq = seq[2:] + continue + } + } + if swtch, ok := seq[0].(*sem.SwitchOp); ok && len(seq) >= 2 { + if join, ok := seq[1].(*sem.JoinOp); ok { + typ = c.join(c.swtch(typ, swtch), join) + seq = seq[2:] + continue + } + } + typ = c.op(typ, seq[0]) + seq = seq[1:] + } + return typ +} + +func (c *checker) op(typ super.Type, op sem.Op) super.Type { + switch op := op.(type) { + // + // Scanners first + // + case *sem.DefaultScan: + return c.unknown + case *sem.FileScan: + if op.Type == nil { + return c.unknown + } + return op.Type + case *sem.HTTPScan, + *sem.PoolScan, + *sem.RobotScan, + *sem.DBMetaScan, + *sem.PoolMetaScan, + *sem.CommitMetaScan, + *sem.DeleteScan: + return c.unknown + case *sem.NullScan: + return super.TypeNull + // + // Ops in alphabetical oder + // + case *sem.AggregateOp: + aggPaths := c.assignments(typ, op.Aggs) + keyPaths := c.assignments(typ, op.Keys) + return c.pathsToType(append(keyPaths, aggPaths...)) + case *sem.BadOp: + return c.unknown + case *sem.CutOp: + return c.pathsToType(c.assignments(typ, op.Args)) + case *sem.DebugOp: + c.expr(typ, op.Expr) + return typ + case *sem.DistinctOp: + c.expr(typ, op.Expr) + return typ + case *sem.DropOp: + drops := c.lvalsToPaths(op.Args) + if drops == nil { + return c.unknown + } + return c.dropPaths(typ, drops) + case *sem.ExplodeOp: + // TBD + return c.unknown + case *sem.FilterOp: + c.boolean(op.Expr, c.expr(typ, op.Expr)) + return typ + case *sem.ForkOp: + return c.fuse(c.fork(typ, op)) + case *sem.FuseOp: + return typ + case *sem.HeadOp: + return typ + case *sem.LoadOp: + return c.unknown + case *sem.MergeOp: + c.sortExprs(typ, op.Exprs) + return typ + case *sem.JoinOp: + c.error(op, errors.New("join requires two inputs")) + return c.unknown + case *sem.OutputOp: + return typ + case *sem.PassOp: + return typ + case *sem.PutOp: + fields := c.assignments(typ, op.Args) + return c.putPaths(typ, fields) + case *sem.RenameOp: + // TBD + return c.unknown + case *sem.SkipOp: + return typ + case *sem.SortOp: + return typ + case *sem.SwitchOp: + var types []super.Type + exprType := c.expr(typ, op.Expr) + for _, cs := range op.Cases { + c.expr(exprType, cs.Expr) + types = append(types, c.seq(typ, cs.Path)) + } + return c.fuse(types) + case *sem.TailOp: + return typ + case *sem.TopOp: + c.sortExprs(typ, op.Exprs) + return typ + case *sem.UniqOp: + return typ + case *sem.UnnestOp: + return c.seq(c.unnest(op.Expr, c.expr(typ, op.Expr)), op.Body) + case *sem.ValuesOp: + return c.fuse(c.exprs(typ, op.Exprs)) + default: + panic(op) + } +} + +func (c *checker) fork(typ super.Type, fork *sem.ForkOp) []super.Type { + var types []super.Type + for _, seq := range fork.Paths { + types = append(types, c.seq(typ, seq)) + } + return types +} + +func (c *checker) swtch(typ super.Type, op *sem.SwitchOp) []super.Type { + var types []super.Type + exprType := c.expr(typ, op.Expr) + for _, cs := range op.Cases { + c.expr(exprType, cs.Expr) + types = append(types, c.seq(typ, cs.Path)) + } + return types +} + +func (c *checker) join(types []super.Type, op *sem.JoinOp) super.Type { + if len(types) != 2 { + c.error(op, errors.New("join requires two query inputs")) + } + typ := c.t.sctx.MustLookupTypeRecord([]super.Field{ + super.NewField(op.LeftAlias, types[0]), + super.NewField(op.RightAlias, types[1]), + }) + c.expr(typ, op.Cond) + return typ +} + +func (c *checker) unnest(loc ast.Node, typ super.Type) super.Type { + c.pushErrs() + typ, ok := c.unnestCheck(loc, typ) + errs := c.popErrs() + if !ok { + c.keepErrs(errs) + } + return typ +} + +func (c *checker) unnestCheck(loc ast.Node, typ super.Type) (super.Type, bool) { + switch typ := super.TypeUnder(typ).(type) { + case *super.TypeError: + if isUnknown(typ) { + return c.unknown, true + } + c.error(loc, errors.New("unnested record cannot be an error")) + return c.unknown, false + case *super.TypeUnion: + var types []super.Type + var ok bool + for _, t := range typ.Types { + typ, tok := c.unnestCheck(loc, t) + if tok { + types = append(types, typ) + ok = true + } + } + return c.fuse(types), ok + case *super.TypeArray: + return typ.Type, true + case *super.TypeRecord: + if len(typ.Fields) != 2 { + c.error(loc, errors.New("unnested record must have two fields")) + return c.unknown, false + } + arrayField := typ.Fields[1] + if isUnknown(arrayField.Type) { + return typ, true + } + arrayType, ok := super.TypeUnder(arrayField.Type).(*super.TypeArray) + if !ok { + c.error(loc, errors.New("unnested record must have array for second field")) + return c.unknown, false + } + fields := []super.Field{typ.Fields[0], {Name: arrayField.Name, Type: arrayType.Type}} + return c.t.sctx.MustLookupTypeRecord(fields), true + default: + c.error(loc, errors.New("unnest value must be array or record")) + return c.unknown, false + } +} + +// assignments returns a set of paths where the LHS is a dotted path. If LHS is more +// complex than a dotted path (e.g., depends on the input data, e.g., "put this[fld]:=10"), +// then the elems of that path slot is null. +func (c *checker) assignments(in super.Type, assignments []sem.Assignment) []pathType { + var paths []pathType + for _, a := range assignments { + var path []string + if this, ok := a.LHS.(*sem.ThisExpr); ok { + path = this.Path + } + typ := c.expr(in, a.RHS) + paths = append(paths, pathType{path, typ}) + } + return paths +} + +func (c *checker) sortExprs(typ super.Type, exprs []sem.SortExpr) { + for _, se := range exprs { + c.expr(typ, se.Expr) + } +} + +func (c *checker) exprs(typ super.Type, exprs []sem.Expr) []super.Type { + var types []super.Type + for _, e := range exprs { + types = append(types, c.expr(typ, e)) + } + return types +} + +func (c *checker) expr(typ super.Type, e sem.Expr) super.Type { + switch e := e.(type) { + case nil: + return c.unknown + case *sem.AggFunc: + c.expr(typ, e.Expr) + c.expr(typ, e.Where) + // XXX This will be handled in a subsequent PR where we add type signatures + // to the package containing the agg func implementatons. + return c.unknown + case *sem.ArrayExpr: + return c.t.sctx.LookupTypeArray(c.arrayElems(typ, e.Elems)) + case *sem.BadExpr: + return c.unknown + case *sem.BinaryExpr: + lhs := c.expr(typ, e.LHS) + rhs := c.expr(typ, e.RHS) + switch e.Op { + case "and", "or": + c.logical(e.LHS, e.RHS, lhs, rhs) + return super.TypeBool + case "in": + c.in(e, e.LHS, e.RHS, lhs, rhs) + return super.TypeBool + case "==", "!=": + return c.equality(lhs, rhs) + case "<", "<=", ">", ">=": + return c.comparison(lhs, rhs) + case "+", "-", "*", "/", "%": + if e.Op == "+" { + return c.plus(e, lhs, rhs) + } + return c.arithmetic(e.LHS, e.RHS, lhs, rhs) + default: + panic(e.Op) + } + case *sem.CallExpr: + var types []super.Type + for _, e := range e.Args { + types = append(types, c.expr(typ, e)) + } + if isBuiltin(e.Tag) { + return c.callBuiltin(e, types) + } + return c.callFunc(e, types) + case *sem.CondExpr: + c.boolean(e.Cond, c.expr(typ, e.Cond)) + return c.fuse([]super.Type{c.expr(typ, e.Then), c.expr(typ, e.Else)}) + case *sem.DotExpr: + typ, _ := c.deref(e.Node, c.expr(typ, e.LHS), e.RHS) + return typ + case *sem.IndexExpr: + typ, _ := c.indexOf(e.Expr, e.Index, c.expr(typ, e.Expr), c.expr(typ, e.Index)) + return typ + case *sem.IsNullExpr: + c.expr(typ, e.Expr) + return super.TypeBool + case *sem.LiteralExpr: + if val, err := sup.ParseValue(c.t.sctx, e.Value); err == nil { + return val.Type() + } + return c.unknown + case *sem.MapCallExpr: + containerType := c.expr(typ, e.Expr) + elemType, ok := c.isContainer(containerType) + if !ok { + c.error(e.Expr, errors.New("map entity must be an array or set")) + return c.unknown + } + c.pushErrs() + lambdaType := c.expr(elemType, e.Lambda) + errs := c.popErrs() + if len(errs) != 0 { + c.error(errs[0].loc, fmt.Errorf("in functon called from map: %w", errs[0].err)) + } + return lambdaType + case *sem.MapExpr: + // fuser could take type at a time instead of array + var keyTypes []super.Type + var valTypes []super.Type + for _, entry := range e.Entries { + keyTypes = append(keyTypes, c.expr(typ, entry.Key)) + valTypes = append(valTypes, c.expr(typ, entry.Value)) + } + return c.t.sctx.LookupTypeMap(c.fuse(keyTypes), c.fuse(valTypes)) + case *sem.RecordExpr: + return c.recordElems(typ, e.Elems) + case *sem.RegexpMatchExpr: + if !hasString(c.expr(typ, e.Expr)) { + c.error(e.Expr, errors.New("string match must apply to type string")) + } + return super.TypeBool + case *sem.RegexpSearchExpr: + if !hasString(c.expr(typ, e.Expr)) { + c.error(e.Expr, errors.New("string match must apply to type string")) + } + return super.TypeBool + case *sem.SearchTermExpr: + c.expr(typ, e.Expr) + return super.TypeBool + case *sem.SetExpr: + return c.t.sctx.LookupTypeArray(c.arrayElems(typ, e.Elems)) + case *sem.SliceExpr: + c.integer(e.From, c.expr(typ, e.From)) + c.integer(e.To, c.expr(typ, e.To)) + container := c.expr(typ, e.Expr) + c.sliceable(e.Expr, container) + return container + case *sem.SubqueryExpr: + typ = c.seq(typ, e.Body) + if e.Array { + typ = c.t.sctx.LookupTypeArray(typ) + } + return typ + case *sem.ThisExpr: + for _, field := range e.Path { + typ, _ = c.deref(e.Node, typ, field) + } + return typ + case *sem.UnaryExpr: + typ = c.expr(typ, e.Operand) + switch e.Op { + case "-": + c.number(e.Operand, typ) + return typ + case "!": + c.boolean(e, typ) + return super.TypeBool + default: + panic(e.Op) + } + default: + panic(e) + } +} + +func (c *checker) isContainer(containerType super.Type) (super.Type, bool) { + switch typ := super.TypeUnder(containerType).(type) { + case *super.TypeArray: + return typ.Type, true + case *super.TypeSet: + return typ.Type, true + case *super.TypeError: + if isUnknown(typ) { + return c.unknown, true + } + } + return nil, false +} + +func (c *checker) arrayElems(typ super.Type, elems []sem.ArrayElem) super.Type { + fuser := c.newFuser() + for _, elem := range elems { + switch elem := elem.(type) { + case *sem.SpreadElem: + fuser.fuse(c.expr(typ, elem.Expr)) + case *sem.ExprElem: + fuser.fuse(c.expr(typ, elem.Expr)) + default: + panic(elem) + } + } + return fuser.Type(c) +} + +func (c *checker) recordElems(typ super.Type, elems []sem.RecordElem) super.Type { + fuser := c.newFuser() + for _, elem := range elems { + switch elem := elem.(type) { + case *sem.SpreadElem: + fuser.fuse(c.expr(typ, elem.Expr)) + case *sem.FieldElem: + column := super.Field{Name: elem.Name, Type: c.expr(typ, elem.Value)} + fuser.fuse(c.t.sctx.MustLookupTypeRecord([]super.Field{column})) + default: + panic(elem) + } + } + return fuser.Type(c) +} + +func (c *checker) callBuiltin(call *sem.CallExpr, args []super.Type) super.Type { + // XXX This will be handled in a subsequent PR where we add type signatures + // to the package containing the built-in function implementatons. + return c.unknown +} + +func (c *checker) callFunc(call *sem.CallExpr, args []super.Type) super.Type { + f := c.funcs[call.Tag] + if len(args) != len(f.Params) { + // The translator has already checked that len(args) is len(params) + // but when there's an error, mismatches can still show up here so + // we ignore these here. + return c.unknown + } + fields := make([]super.Field, 0, len(args)) + for k, param := range f.Params { + fields = append(fields, super.Field{Name: param, Type: args[k]}) + } + argsType := c.t.sctx.MustLookupTypeRecord(fields) + if typ, ok := c.checked[argsType]; ok { + return typ + } + // For recursive calls, we use unknown for the base type to halt the recursion + // then fill the actual type computed from the unknown base type. This has the + // downside that we now have an error unknown in the sum type of the return value + // of all recursive functions. When we add (optional) type signatures to functions, + // this problem will (partially) go away. + c.checked[argsType] = c.unknown + typ := c.expr(argsType, f.Body) + c.checked[argsType] = typ + return typ +} + +type pathType struct { + elems []string + typ super.Type +} + +func (c *checker) pathsToType(paths []pathType) super.Type { + fuser := c.newFuser() + for _, path := range paths { + fuser.fuse(c.pathToRec(path.typ, path.elems)) + } + return fuser.Type(c) +} + +func (c *checker) pathToRec(typ super.Type, elems []string) super.Type { + for _, elem := range slices.Backward(elems) { + typ = c.t.sctx.MustLookupTypeRecord([]super.Field{{Name: elem, Type: typ}}) + } + return typ +} + +func (c *checker) dropPaths(typ super.Type, drops []path) super.Type { + for _, drop := range drops { + typ = c.dropPath(typ, drop) + } + return typ +} + +func (c *checker) dropPath(typ super.Type, drop path) super.Type { + if len(drop.elems) == 0 { + return nil + } + // Drop is a little tricky since it passes through non-record values so + // we need to preserve any union type presented to its input. pickRec returns + // a copy of the types slice so we can modify it. + types, pick := pickRec(typ) + if types == nil { + // drop passes through non-records + return typ + } + rec := super.TypeUnder(types[pick]).(*super.TypeRecord) + off, ok := rec.IndexOfField(drop.elems[0]) + if !ok { + if !hasUnknown(typ) { + c.error(drop.loc, fmt.Errorf("no such field to drop: %q", drop.elems[0])) + } + return c.unknown + } + fields := slices.Clone(rec.Fields) + childType := c.dropPath(fields[off].Type, path{drop.loc, drop.elems[1:]}) + if childType == nil { + fields = slices.Delete(fields, off, off+1) + } else { + fields[off].Type = childType + } + types[pick] = c.t.sctx.MustLookupTypeRecord(fields) + if len(types) > 1 { + return c.t.sctx.LookupTypeUnion(types) + } + return types[0] +} + +func pickRec(typ super.Type) ([]super.Type, int) { + switch typ := super.TypeUnder(typ).(type) { + case *super.TypeRecord: + return []super.Type{typ}, 0 + case *super.TypeUnion: + types := slices.Clone(typ.Types) + for k := range types { + if _, ok := super.TypeUnder(types[k]).(*super.TypeRecord); ok { + return types, k + } + } + } + return nil, 0 +} + +func (c *checker) putPaths(typ super.Type, puts []pathType) super.Type { + // Fuse each path as a single-record path into the input type. + fuser := c.newFuser() + fuser.fuse(typ) + for _, put := range puts { + fuser.fuse(c.pathToRec(put.typ, put.elems)) + } + return fuser.Type(c) +} + +type path struct { + loc ast.Node + elems []string +} + +func (c *checker) lvalsToPaths(exprs []sem.Expr) []path { + var paths []path + for _, e := range exprs { + this, ok := e.(*sem.ThisExpr) + if !ok { + return nil + } + paths = append(paths, path{loc: this.Node, elems: this.Path}) + } + return paths +} + +func (c *checker) fuse(types []super.Type) super.Type { + if len(types) == 0 { + return c.unknown + } + if len(types) == 1 { + return types[0] + } + fuser := c.newFuser() + for _, typ := range types { + fuser.fuse(typ) + } + return fuser.Type(c) +} + +func (c *checker) boolean(loc ast.Node, typ super.Type) bool { + ok := typeCheck(typ, func(typ super.Type) bool { + return typ == super.TypeBool || typ == super.TypeNull + }) + if !ok { + c.error(loc, fmt.Errorf("boolean type required, encountered type %q", sup.FormatType(typ))) + } + return ok +} + +func typeCheck(typ super.Type, check func(super.Type) bool) bool { + if isUnknown(typ) { + return true + } + if u, ok := super.TypeUnder(typ).(*super.TypeUnion); ok { + for _, t := range u.Types { + if typeCheck(t, check) { + return true + } + } + return false + } + return check(typ) +} + +func (c *checker) integer(loc ast.Node, typ super.Type) bool { + ok := typeCheck(typ, func(typ super.Type) bool { + return super.IsInteger(typ.ID()) + }) + if !ok { + c.error(loc, fmt.Errorf("integer type required, encountered %s", sup.FormatType(typ))) + } + return ok +} + +func (c *checker) number(loc ast.Node, typ super.Type) bool { + ok := typeCheck(typ, func(typ super.Type) bool { + id := typ.ID() + return super.IsNumber(id) || id == super.IDNull + }) + if !ok { + c.error(loc, fmt.Errorf("numeric type required, encountered %s", sup.FormatType(typ))) + } + return ok +} + +func (c *checker) deref(loc ast.Node, typ super.Type, field string) (super.Type, bool) { + switch typ := super.TypeUnder(typ).(type) { + case *super.TypeOfNull: + return super.TypeNull, true + case *super.TypeError: + if isUnknown(typ) { + return typ, true + } + case *super.TypeMap: + return c.indexMap(loc, typ, super.TypeString) + case *super.TypeRecord: + which, ok := typ.IndexOfField(field) + if !ok { + if !hasUnknown(typ) { + c.error(loc, fmt.Errorf("no such field %q", field)) + } + return c.unknown, false + } + return typ.Fields[which].Type, true + case *super.TypeUnion: + // Push the error stack and if we find some valid deref, + // we'll discard the errors. Otherwise, we'll keep them. + c.pushErrs() + var types []super.Type + var valid bool + for _, t := range typ.Types { + typ, ok := c.deref(loc, t, field) + if ok { + types = append(types, typ) + valid = true + } + } + errs := c.popErrs() + if !valid { + c.keepErrs(errs) + } + return c.fuse(types), valid + } + c.error(loc, fmt.Errorf("%q no such field", field)) + return c.unknown, false +} + +func (c *checker) logical(lloc, rloc ast.Node, lhs, rhs super.Type) { + c.boolean(lloc, lhs) + c.boolean(rloc, rhs) +} + +func (c *checker) in(loc, lloc, rloc ast.Node, lhs, rhs super.Type) bool { + switch typ := super.TypeUnder(rhs).(type) { + case *super.TypeOfNull: + case *super.TypeError: + return isUnknown(typ) + case *super.TypeArray: + if !comparable(lhs, typ.Type) { + c.error(lloc, errors.New("left-hand side of in operator be compatible with array type")) + return false + } + case *super.TypeSet: + if !comparable(lhs, typ.Type) { + c.error(lloc, errors.New("left-hand side of in operator be compatible with set type")) + return false + } + case *super.TypeRecord: + var types []super.Type + for _, field := range typ.Fields { + types = append(types, field.Type) + } + if !comparable(lhs, c.fuse(types)) { + c.error(lloc, errors.New("left-hand side of in operator not compatible with any fields of record on right-hand side")) + return false + } + case *super.TypeMap: + if !comparable(lhs, typ.ValType) && !comparable(lhs, typ.KeyType) { + c.error(lloc, errors.New("left-hand side of in operator be compatible with map value or key type")) + return false + } + case *super.TypeOfNet: + c.error(rloc, errors.New("right-hand side of in operator cannot be type net; consider cidr_match()")) + return false + case *super.TypeUnion: + // Push the error stack and if we find some valid deref, + // we'll discard the errors. Otherwise, we'll keep them. + c.pushErrs() + var valid bool + for _, t := range typ.Types { + if c.in(loc, lloc, rloc, lhs, t) { + valid = true + } + } + errs := c.popErrs() + if !valid { + c.keepErrs(errs) + } + return valid + default: + // If the RHS is not a container, see if they are compatible in terms of + // equality comparison. The in operator for SuperSQL is broader than SQL + // and is true for equality of any value as well as equality containment + // of the LHS in the RHS. + if !comparable(lhs, rhs) { + c.error(loc, fmt.Errorf("scalar type mismatch for 'in' operator where right-hand side is not container type: %s", sup.FormatType(typ))) + } + return false + } + return true +} + +func (c *checker) equality(lhs, rhs super.Type) super.Type { + comparable(lhs, rhs) + return super.TypeBool +} + +func (c *checker) comparison(lhs, rhs super.Type) super.Type { + comparable(lhs, rhs) + return super.TypeBool +} + +func comparable(a, b super.Type) bool { + if isUnknown(a) || isUnknown(b) { + return true + } + if u, ok := super.TypeUnder(a).(*super.TypeUnion); ok { + for _, t := range u.Types { + if comparable(t, b) { + return true + } + } + return false + } + if u, ok := super.TypeUnder(b).(*super.TypeUnion); ok { + for _, t := range u.Types { + if comparable(a, t) { + return true + } + } + return false + } + aid := super.TypeUnder(a).ID() + bid := super.TypeUnder(b).ID() + if aid == bid || aid == super.IDNull || bid == super.IDNull { + return true + } + if super.IsNumber(aid) { + return super.IsNumber(bid) + } + switch super.TypeUnder(a).(type) { + case *super.TypeRecord: + _, ok := super.TypeUnder(b).(*super.TypeRecord) + return ok + case *super.TypeArray: + if _, ok := super.TypeUnder(b).(*super.TypeArray); ok { + return ok + } + _, ok := super.TypeUnder(b).(*super.TypeSet) + return ok + case *super.TypeSet: + if _, ok := super.TypeUnder(b).(*super.TypeArray); ok { + return ok + } + _, ok := super.TypeUnder(b).(*super.TypeSet) + return ok + case *super.TypeMap: + _, ok := super.TypeUnder(b).(*super.TypeMap) + return ok + } + return false +} + +func (c *checker) arithmetic(lloc, rloc ast.Node, lhs, rhs super.Type) super.Type { + if isUnknown(lhs) || isUnknown(rhs) { + return c.unknown + } + c.number(lloc, lhs) + c.number(rloc, rhs) + return c.fuse([]super.Type{lhs, rhs}) +} + +func (c *checker) plus(loc ast.Node, lhs, rhs super.Type) super.Type { + if isUnknown(lhs) || isUnknown(rhs) { + return c.unknown + } + if hasString(lhs) && hasString(rhs) { + return c.fuse([]super.Type{lhs, rhs}) + } + if hasNumber(lhs) && hasNumber(rhs) { + return c.fuse([]super.Type{lhs, rhs}) + } + c.error(loc, errors.New("type mismatch")) + return c.unknown +} + +func hasNumber(typ super.Type) bool { + id := super.TypeUnder(typ).ID() + if super.IsNumber(id) || id == super.IDNull { + return true + } + if u, ok := super.TypeUnder(typ).(*super.TypeUnion); ok { + if slices.ContainsFunc(u.Types, hasNumber) { + return true + } + } + return false +} + +func hasString(typ super.Type) bool { + switch typ := super.TypeUnder(typ).(type) { + case *super.TypeError: + return isUnknown(typ) + case *super.TypeOfString, *super.TypeOfNull: + return true + case *super.TypeUnion: + return slices.ContainsFunc(typ.Types, hasString) + } + return false +} + +func isUnknown(typ super.Type) bool { + if err, ok := super.TypeUnder(typ).(*super.TypeError); ok { + if rec, ok := err.Type.(*super.TypeRecord); ok { + return len(rec.Fields) == 0 + } + } + return false +} + +func hasUnknown(typ super.Type) bool { + if u, ok := super.TypeUnder(typ).(*super.TypeUnion); ok { + if slices.ContainsFunc(u.Types, hasUnknown) { + return true + } + } + return isUnknown(typ) +} + +func (c *checker) indexOf(cloc, iloc ast.Node, container, index super.Type) (super.Type, bool) { + if hasUnknown(container) { + return c.unknown, true + } + switch typ := super.TypeUnder(container).(type) { + case *super.TypeArray: + c.pushErrs() + c.integer(iloc, index) + if errs := c.popErrs(); len(errs) > 0 { + c.keepErrs(errs) + return typ.Type, false + } + return typ.Type, true + case *super.TypeSet: + c.pushErrs() + c.integer(iloc, index) + if errs := c.popErrs(); len(errs) > 0 { + c.keepErrs(errs) + return typ.Type, false + } + return typ.Type, true + case *super.TypeRecord: + ok := typeCheck(index, func(typ super.Type) bool { + id := super.TypeUnder(typ).ID() + return id == super.IDString || super.IsInteger(id) || id == super.IDNull + }) + if !ok { + c.error(iloc, errors.New("string or integer type required to index record")) + } + var types []super.Type + for _, field := range typ.Fields { + types = append(types, field.Type) + } + return c.fuse(types), true + case *super.TypeMap: + if !comparable(typ.KeyType, index) { + c.error(iloc, errors.New("type mismatch indexing map")) + return typ.ValType, false + } + return typ.ValType, true + case *super.TypeUnion: + c.pushErrs() + var types []super.Type + var valid bool + for _, t := range typ.Types { + typ, ok := c.indexOf(cloc, iloc, t, index) + if ok { + types = append(types, typ) + valid = true + } + } + errs := c.popErrs() + if !valid { + c.keepErrs(errs) + } + return c.fuse(types), valid + default: + c.error(cloc, fmt.Errorf("indexed entity is not indexable")) + return c.unknown, false + } +} + +func (c *checker) indexMap(loc ast.Node, m *super.TypeMap, index super.Type) (super.Type, bool) { + if isUnknown(index) { + return c.unknown, true + } + if !c.coerceable(index, m.KeyType) { + c.error(loc, errors.New("type mismatch between map key and index")) + return c.unknown, false + } + return m.ValType, true +} + +func (c *checker) sliceable(loc ast.Node, typ super.Type) { + if hasUnknown(typ) { + return + } + switch super.TypeUnder(typ).(type) { + case *super.TypeArray, *super.TypeSet, *super.TypeRecord: + default: + c.error(loc, fmt.Errorf("sliced entity is not sliceable")) + } +} + +func (c *checker) coerceable(from, to super.Type) bool { + if isUnknown(from) || isUnknown(to) { + return true + } + if u, ok := super.TypeUnder(from).(*super.TypeUnion); ok { + for _, t := range u.Types { + if c.coerceable(t, to) { + return true + } + } + return false + } + if u, ok := super.TypeUnder(to).(*super.TypeUnion); ok { + for _, t := range u.Types { + if c.coerceable(from, t) { + return true + } + } + return false + } + fromID := super.TypeUnder(from).ID() + toID := super.TypeUnder(to).ID() + if fromID == toID || fromID == super.IDNull || toID == super.IDNull { + return true + } + if super.IsNumber(toID) { + return super.IsNumber(fromID) + } + switch super.TypeUnder(to).(type) { + case *super.TypeRecord: + _, ok := super.TypeUnder(from).(*super.TypeRecord) + return ok + case *super.TypeArray: + if _, ok := super.TypeUnder(from).(*super.TypeArray); ok { + return ok + } + _, ok := super.TypeUnder(from).(*super.TypeSet) + return ok + case *super.TypeSet: + if _, ok := super.TypeUnder(from).(*super.TypeArray); ok { + return ok + } + _, ok := super.TypeUnder(from).(*super.TypeSet) + return ok + case *super.TypeMap: + _, ok := super.TypeUnder(from).(*super.TypeMap) + return ok + } + return false +} + +func (c *checker) pushErrs() { + c.estack = append(c.estack, nil) +} + +func (c *checker) popErrs() errlist { + n := len(c.estack) - 1 + errs := c.estack[n] + c.estack = c.estack[:n] + return errs +} + +func (c *checker) keepErrs(errs errlist) { + n := len(c.estack) - 1 + c.estack[n] = append(c.estack[n], errs...) +} + +func (c *checker) error(loc ast.Node, err error) { + c.estack[len(c.estack)-1].error(loc, err) +} + +func (c *checker) newFuser() *fuser { + return &fuser{sctx: c.t.sctx} +} + +type fuser struct { + sctx *super.Context + typ super.Type + sch *agg.Schema +} + +func (f *fuser) fuse(typ super.Type) { + if f.sch != nil { + f.sch.Mixin(typ) + } else if f.typ == nil { + f.typ = typ + } else if f.typ != typ { + f.sch = agg.NewSchema(f.sctx) + f.sch.Mixin(f.typ) + f.sch.Mixin(typ) + } +} + +func (f *fuser) Type(c *checker) super.Type { + if f.sch != nil { + return f.sch.Type() + } + if f.typ != nil { + return f.typ + } + return c.unknown +} diff --git a/compiler/semantic/evaluator.go b/compiler/semantic/evaluator.go index ca55ad91d6..03b233a67b 100644 --- a/compiler/semantic/evaluator.go +++ b/compiler/semantic/evaluator.go @@ -15,7 +15,7 @@ import ( type evaluator struct { translator *translator in map[string]*sem.FuncDef - errs []errloc + errs errlist constThis bool bad bool } @@ -34,7 +34,7 @@ func newEvaluator(t *translator, funcs map[string]*sem.FuncDef) *evaluator { func (e *evaluator) mustEval(sctx *super.Context, expr sem.Expr) (super.Value, bool) { val, ok := e.maybeEval(sctx, expr) - e.flushErrs() + e.errs.flushErrs(e.translator.reporter) return val, ok } @@ -42,7 +42,7 @@ func (e *evaluator) maybeEval(sctx *super.Context, expr sem.Expr) (super.Value, if literal, ok := expr.(*sem.LiteralExpr); ok { val, err := sup.ParseValue(sctx, literal.Value) if err != nil { - e.error(literal.Node, err) + e.errs.error(literal.Node, err) return val, false } return val, true @@ -72,7 +72,7 @@ func (e *evaluator) maybeEval(sctx *super.Context, expr sem.Expr) (super.Value, main := newDagen(e.translator.reporter).assembleExpr(resolvedExpr, funcs) val, err := rungen.EvalAtCompileTime(sctx, main) if err != nil { - e.error(expr, err) + e.errs.error(expr, err) return val, false } return val, true @@ -103,7 +103,7 @@ func (e *evaluator) op(op sem.Op) bool { *sem.PoolMetaScan, *sem.CommitMetaScan, *sem.DeleteScan: - e.error(op, errors.New("cannot read data in constant expression")) + e.errs.error(op, errors.New("cannot read data in constant expression")) return false case *sem.NullScan: return true @@ -276,7 +276,7 @@ func (e *evaluator) expr(expr sem.Expr) bool { return e.seq(expr.Body) case *sem.ThisExpr: if !e.constThis { - e.error(expr, fmt.Errorf("cannot reference '%s' in constant expression", quotedPath(expr.Path))) + e.errs.error(expr, fmt.Errorf("cannot reference '%s' in constant expression", quotedPath(expr.Path))) } return e.constThis case *sem.UnaryExpr: @@ -335,12 +335,14 @@ func (e *evaluator) recordElems(elems []sem.RecordElem) bool { return isConst } -func (e *evaluator) error(loc ast.Node, err error) { - e.errs = append(e.errs, errloc{loc, err}) +type errlist []errloc + +func (e *errlist) error(loc ast.Node, err error) { + *e = append(*e, errloc{loc, err}) } -func (e *evaluator) flushErrs() { - for _, info := range e.errs { - e.translator.error(info.loc, info.err) +func (e errlist) flushErrs(r reporter) { + for _, info := range e { + r.error(info.loc, info.err) } } diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index 354f4cadf8..8b67ffb18f 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -162,6 +162,7 @@ func (t *translator) fileScanColumns(op *sem.FileScan) ([]string, bool) { } defer sr.Close() cols, err := parquetio.TopLevelFieldNames(sr) + op.Type = parquetio.Type(t.sctx, sr) return cols, err == nil } @@ -919,7 +920,7 @@ func (t *translator) semOp(o ast.Op, seq sem.Seq) sem.Seq { &sem.FieldElem{ Node: o.Expr, Name: "on", - Value: sem.NewThis(nil /*XXX*/, nil), + Value: sem.NewThis(o.Expr, nil), }, }, }}, diff --git a/compiler/semantic/projection.go b/compiler/semantic/projection.go index 28bef5f9e6..5328a1ddcc 100644 --- a/compiler/semantic/projection.go +++ b/compiler/semantic/projection.go @@ -22,7 +22,7 @@ import ( // the agg functions in the expr field. type column struct { name string - loc ast.Expr + loc ast.Node expr sem.Expr isAgg bool } diff --git a/compiler/semantic/sem/op.go b/compiler/semantic/sem/op.go index e13a9069f7..6895839d7c 100644 --- a/compiler/semantic/sem/op.go +++ b/compiler/semantic/sem/op.go @@ -8,6 +8,7 @@ package sem import ( + "github.com/brimdata/super" "github.com/brimdata/super/compiler/ast" "github.com/brimdata/super/order" "github.com/segmentio/ksuid" @@ -47,6 +48,7 @@ type ( } FileScan struct { ast.Node + Type super.Type Path string Format string } diff --git a/compiler/semantic/sql.go b/compiler/semantic/sql.go index 2343eca607..d11ebca8c6 100644 --- a/compiler/semantic/sql.go +++ b/compiler/semantic/sql.go @@ -102,7 +102,7 @@ func (t *translator) genColumns(proj projection, sch *selectSchema, seq sem.Seq) if notFirst { elems = append(elems, &sem.SpreadElem{ Node: col.loc, - Expr: sem.NewThis(col.expr, []string{"out"}), + Expr: sem.NewThis(col.loc, []string{"out"}), }) } else { notFirst = true @@ -232,7 +232,7 @@ func (t *translator) genAggregateOutput(loc ast.Node, proj projection, keyExprs // Look for an exact-match of a column alias which would // convert to path out. in the name resolution of the // grouping expression. - alias := sem.NewThis(col.expr, []string{"out", col.name}) + alias := sem.NewThis(col.loc, []string{"out", col.name}) which = exprMatch(alias, keyExprs) } if col.isAgg { @@ -669,7 +669,7 @@ func (t *translator) semProjection(sch *selectSchema, args []ast.SQLAsExpr, func var proj projection for _, as := range args { if isStar(as) { - proj = append(proj, column{}) + proj = append(proj, column{loc: as}) continue } col := t.semAs(sch, as, funcs) diff --git a/compiler/semantic/ztests/checker-drop-union.yaml b/compiler/semantic/ztests/checker-drop-union.yaml new file mode 100644 index 0000000000..f0af44e8d7 --- /dev/null +++ b/compiler/semantic/ztests/checker-drop-union.yaml @@ -0,0 +1,5 @@ +spq: values {x:{y:1,w:1},z:1},1 | drop x.y | values this+1 + +output: | + error("incompatible types") + 2 diff --git a/compiler/semantic/ztests/checker-drop.yaml b/compiler/semantic/ztests/checker-drop.yaml new file mode 100644 index 0000000000..174060e413 --- /dev/null +++ b/compiler/semantic/ztests/checker-drop.yaml @@ -0,0 +1,6 @@ +spq: values {x:{y:1,w:1},z:1} | drop x.y | drop x.y + +error: | + no such field to drop: "y" at line 1, column 44: + values {x:{y:1,w:1},z:1} | drop x.y | drop x.y + ~~~ diff --git a/compiler/semantic/ztests/checker-func.yaml b/compiler/semantic/ztests/checker-func.yaml new file mode 100644 index 0000000000..aad02b7e7f --- /dev/null +++ b/compiler/semantic/ztests/checker-func.yaml @@ -0,0 +1,8 @@ +spq: | + fn z(a):a[0] + values z(1) + +error: | + indexed entity is not indexable at line 1, column 9: + fn z(a):a[0] + ~ diff --git a/compiler/semantic/ztests/checker-in-map.yaml b/compiler/semantic/ztests/checker-in-map.yaml new file mode 100644 index 0000000000..23ae0e6077 --- /dev/null +++ b/compiler/semantic/ztests/checker-in-map.yaml @@ -0,0 +1,7 @@ +spq: | + values 'foo' in |{1:2}| + +error: | + left-hand side of in operator be compatible with map value or key type at line 1, column 8: + values 'foo' in |{1:2}| + ~~~~~ diff --git a/compiler/semantic/ztests/checker-in-net.yaml b/compiler/semantic/ztests/checker-in-net.yaml new file mode 100644 index 0000000000..c2e35df953 --- /dev/null +++ b/compiler/semantic/ztests/checker-in-net.yaml @@ -0,0 +1,6 @@ +spq: values 10.1.1.1 in 192.168.0.0/16 + +error: | + right-hand side of in operator cannot be type net; consider cidr_match() at line 1, column 20: + values 10.1.1.1 in 192.168.0.0/16 + ~~~~~~~~~~~~~~ diff --git a/compiler/semantic/ztests/checker-in-rec.yaml b/compiler/semantic/ztests/checker-in-rec.yaml new file mode 100644 index 0000000000..ea022327e6 --- /dev/null +++ b/compiler/semantic/ztests/checker-in-rec.yaml @@ -0,0 +1,7 @@ +spq: | + values 'foo' in {x:1,y:bool} + +error: | + left-hand side of in operator not compatible with any fields of record on right-hand side at line 1, column 8: + values 'foo' in {x:1,y:bool} + ~~~~~ diff --git a/compiler/semantic/ztests/checker-in-scalar-err.yaml b/compiler/semantic/ztests/checker-in-scalar-err.yaml new file mode 100644 index 0000000000..5e4a489254 --- /dev/null +++ b/compiler/semantic/ztests/checker-in-scalar-err.yaml @@ -0,0 +1,6 @@ +spq: values 1 in 'foo' + +error: | + scalar type mismatch for 'in' operator where right-hand side is not container type: string at line 1, column 8: + values 1 in 'foo' + ~~~~~~~~~~ diff --git a/compiler/semantic/ztests/checker-in-union.yaml b/compiler/semantic/ztests/checker-in-union.yaml new file mode 100644 index 0000000000..20e3dae3cb --- /dev/null +++ b/compiler/semantic/ztests/checker-in-union.yaml @@ -0,0 +1,10 @@ +spq: | + fork + (values 'foo') + (values [1,2]) + | sort this + | values 1 in this + +output: | + false + true diff --git a/compiler/semantic/ztests/checker-map-func-type.yaml b/compiler/semantic/ztests/checker-map-func-type.yaml new file mode 100644 index 0000000000..3e79305818 --- /dev/null +++ b/compiler/semantic/ztests/checker-map-func-type.yaml @@ -0,0 +1,8 @@ +spq: | + fn f(a):a[1] + values map([1,2], &f) + +error: | + in functon called from map: indexed entity is not indexable at line 1, column 9: + fn f(a):a[1] + ~ diff --git a/compiler/semantic/ztests/checker-minus-strings.yaml b/compiler/semantic/ztests/checker-minus-strings.yaml new file mode 100644 index 0000000000..edb21a2ee9 --- /dev/null +++ b/compiler/semantic/ztests/checker-minus-strings.yaml @@ -0,0 +1,9 @@ +spq: values "hello" - "world" + +error: | + numeric type required, encountered string at line 1, column 8: + values "hello" - "world" + ~~~~~~~ + numeric type required, encountered string at line 1, column 18: + values "hello" - "world" + ~~~~~~~ diff --git a/compiler/semantic/ztests/checker-parquet.yaml b/compiler/semantic/ztests/checker-parquet.yaml new file mode 100644 index 0000000000..0ce6c0f8dd --- /dev/null +++ b/compiler/semantic/ztests/checker-parquet.yaml @@ -0,0 +1,10 @@ +script: | + super -f parquet -o t.parquet -c "values {x:1,y:2}" + ! super -c "from t.parquet | values z" + +outputs: + - name: stderr + data: | + no such field "z" at line 1, column 25: + from t.parquet | values z + ~ diff --git a/compiler/semantic/ztests/checker-plus-ip.yaml b/compiler/semantic/ztests/checker-plus-ip.yaml new file mode 100644 index 0000000000..53a88ebf34 --- /dev/null +++ b/compiler/semantic/ztests/checker-plus-ip.yaml @@ -0,0 +1,6 @@ +spq: values 10.1.1.1 + 1 + +error: | + type mismatch at line 1, column 8: + values 10.1.1.1 + 1 + ~~~~~~~~~~~~ diff --git a/compiler/semantic/ztests/checker-plus-mismatch.yaml b/compiler/semantic/ztests/checker-plus-mismatch.yaml new file mode 100644 index 0000000000..2857a5d331 --- /dev/null +++ b/compiler/semantic/ztests/checker-plus-mismatch.yaml @@ -0,0 +1,8 @@ +spq: | + values {x:true,y:1}, {x:"foo",y:1} + | values x+y + +error: | + type mismatch at line 2, column 10: + | values x+y + ~~~ diff --git a/compiler/semantic/ztests/checker-plus-string-int.yaml b/compiler/semantic/ztests/checker-plus-string-int.yaml new file mode 100644 index 0000000000..7af1989470 --- /dev/null +++ b/compiler/semantic/ztests/checker-plus-string-int.yaml @@ -0,0 +1,8 @@ +spq: | + values {x:"foo",y:1}, {x:1::int8,y:1} + | values x+y + +error: | + type mismatch at line 2, column 10: + | values x+y + ~~~ diff --git a/compiler/semantic/ztests/no-such-builtin.yaml b/compiler/semantic/ztests/no-such-builtin.yaml index c752443af2..89abb90a6f 100644 --- a/compiler/semantic/ztests/no-such-builtin.yaml +++ b/compiler/semantic/ztests/no-such-builtin.yaml @@ -1,6 +1,7 @@ spq: | fn foo(f,x):f(x) - values foo(nosuchfunction, 1) + values {nosuchfunction:0} + | values foo(nosuchfunction, 1) | values foo(&nosuchfunction, 2) error: | diff --git a/compiler/sfmt/ztests/dot.yaml b/compiler/sfmt/ztests/dot.yaml index e1453fb63b..9f6123ca8a 100644 --- a/compiler/sfmt/ztests/dot.yaml +++ b/compiler/sfmt/ztests/dot.yaml @@ -1,9 +1,10 @@ script: | - super compile -C -dag 'nested[1].field == 1' + super compile -C -dag 'values {nested:[{field:1}]} | nested[1].field == 1' outputs: - name: stdout data: | null + | values {nested:[{field:1}]} | where nested[1]["field"]==1 | output main diff --git a/compiler/sfmt/ztests/unnest.yaml b/compiler/sfmt/ztests/unnest.yaml index dc8495450d..924daabc49 100644 --- a/compiler/sfmt/ztests/unnest.yaml +++ b/compiler/sfmt/ztests/unnest.yaml @@ -7,29 +7,29 @@ inputs: - name: test.spq data: | -- Expressions - values (unnest a | ?b) + values (unnest [a] | ?b) -- Operators - | unnest a - | unnest a into ( ?b ) + | unnest [a] + | unnest [a] into ( ?b ) outputs: - name: stdout data: | values ( - unnest a + unnest [a] | search b ) - | unnest a - | unnest a into ( + | unnest [a] + | unnest [a] into ( search b ) === null | values ( - unnest a + unnest [a] | where search("b")) - | unnest a - | unnest a into ( + | unnest [a] + | unnest [a] into ( where search("b") ) | output main diff --git a/compiler/ztests/join.yaml b/compiler/ztests/join.yaml index 1d1c2cce36..6bbaeec311 100644 --- a/compiler/ztests/join.yaml +++ b/compiler/ztests/join.yaml @@ -1,5 +1,5 @@ script: | - super compile -dag -C 'join as {j,k} on k.kkey=j.jkey' + super compile -dag -C 'fork (values {jkey:1}) (values {kkey:1}) | join as {j,k} on k.kkey=j.jkey' ! super compile -dag -C 'join as {j,j} on key' echo // === >&2 ! super compile -dag -C 'join on foo.key=left.key' @@ -15,6 +15,13 @@ outputs: - name: stdout data: | null + | fork + ( + values {jkey:1} + ) + ( + values {kkey:1} + ) | inner join as {j,k} on k.kkey==j.jkey | output main - name: stderr diff --git a/compiler/ztests/lift-filters.yaml b/compiler/ztests/lift-filters.yaml index c368c1a68a..6c0c9481e5 100644 --- a/compiler/ztests/lift-filters.yaml +++ b/compiler/ztests/lift-filters.yaml @@ -1,5 +1,5 @@ script: | - super compile -C -O 'values {a:b} | where a==1 and c==2' + super compile -C -O 'values {a:b,c:d} | where a==1 and c==2' echo === super compile -C -O 'values {...a} | where b==1' echo === @@ -14,8 +14,8 @@ outputs: - name: stdout data: | null - | where b==1 and error("missing")==2 - | values {a:b} + | where b==1 and d==2 + | values {a:b,c:d} | output main === null diff --git a/compiler/ztests/merge-filters.yaml b/compiler/ztests/merge-filters.yaml index 90c85c3142..f2353e66a3 100644 --- a/compiler/ztests/merge-filters.yaml +++ b/compiler/ztests/merge-filters.yaml @@ -3,7 +3,7 @@ script: | echo === super compile -C -O 'fork ( from a | where b | where c ) ( from d | where e | where f ) | where g' echo === - super compile -C -O 'unnest a into ( where b | where c )' + super compile -C -O 'unnest [a] into ( where b | where c )' echo === super compile -C -O 'fork ( where a | where b ) ( where c | where d )' @@ -23,7 +23,7 @@ outputs: | output main === null - | unnest a into ( + | unnest [a] into ( where b and c ) | output main diff --git a/compiler/ztests/merge-values.yaml b/compiler/ztests/merge-values.yaml index dbc3da4d50..90f2becfa0 100644 --- a/compiler/ztests/merge-values.yaml +++ b/compiler/ztests/merge-values.yaml @@ -1,7 +1,7 @@ script: | - super compile -C -O 'values {a:1} | values a, {b:a}, c' + super compile -C -O 'values {a:1,c:1} | values a, {b:a}, c' echo === - super compile -C -O 'values {a,b} | aggregate count(a) by b, c' + super compile -C -O 'values {a,b,c:1} | aggregate count(a) by b, c' echo === super compile -C -O 'values {...a} | values {...b.c} | values d, {e}' echo === @@ -21,12 +21,12 @@ outputs: - name: stdout data: | null - | values 1, {b:1}, error("missing") + | values 1, {b:1}, 1 | output main === null | aggregate - count:=count(a) by b:=b,c:=error("missing") + count:=count(a) by b:=b,c:=1 | output main === null diff --git a/db/ztests/meta.yaml b/db/ztests/meta.yaml index b3fa962841..6aaa404bfb 100644 --- a/db/ztests/meta.yaml +++ b/db/ztests/meta.yaml @@ -8,7 +8,7 @@ script: | super db -S -c 'from :pools | drop id | sort name | drop ts' echo === super db -S -c 'from poolA@main:objects | {nameof:nameof(this),...this} | drop id' - super db -S -c 'from poolA:log | cut nameof(this) | drop ts' + super db -S -c 'from poolA:log | cut nameof(this)' inputs: - name: a.sup diff --git a/runtime/sam/expr/expr_test.go b/runtime/sam/expr/expr_test.go index 03bed7b9a8..8f65639e1e 100644 --- a/runtime/sam/expr/expr_test.go +++ b/runtime/sam/expr/expr_test.go @@ -326,9 +326,7 @@ func TestPattern(t *testing.T) { testSuccessful(t, `"abc" == "abc"`, "", "true") testSuccessful(t, `"abc" != "abc"`, "", "false") testSuccessful(t, "cidr_match(10.0.0.0/8, 10.1.1.1)", "", "true") - testSuccessful(t, "10.1.1.1 in 192.168.0.0/16", "", "false") testSuccessful(t, "!cidr_match(10.0.0.0/8, 10.1.1.1)", "", "false") - testSuccessful(t, "!(10.1.1.1 in 192.168.0.0/16)", "", "true") } func TestIn(t *testing.T) { @@ -460,16 +458,6 @@ func TestArithmetic(t *testing.T) { } // Test string concatenation testSuccessful(t, `"hello" + " world"`, record, `"hello world"`) - - // Test string arithmetic other than + fails - testSuccessful(t, `"hello" - " world"`, record, `error("type string incompatible with '-' operator")`) - testSuccessful(t, `"hello" * " world"`, record, `error("type string incompatible with '*' operator")`) - testSuccessful(t, `"hello" / " world"`, record, `error("type string incompatible with '/' operator")`) - - // Test that addition fails on an unsupported type - testSuccessful(t, "10.1.1.1 + 1", record, `error("incompatible types")`) - testSuccessful(t, "10.1.1.1 + 3.14159", record, `error("incompatible types")`) - testSuccessful(t, `10.1.1.1 + "foo"`, record, `error("incompatible types")`) } func TestArrayIndex(t *testing.T) { diff --git a/runtime/sam/expr/filter_test.go b/runtime/sam/expr/filter_test.go index bbd21cb684..782b80df1b 100644 --- a/runtime/sam/expr/filter_test.go +++ b/runtime/sam/expr/filter_test.go @@ -365,7 +365,7 @@ func TestFilters(t *testing.T) { {"a == 192.168.1.50", true}, {"a == 50.1.168.192", false}, {"a != 50.1.168.192", true}, - {"a in 192.168.0.0/16", false}, + {"cidr_match(192.168.0.0/16, a)", true}, {"a == 10.0.0.0/16", false}, {"a != 192.168.0.0/16", true}, }) diff --git a/runtime/sam/op/ztests/stateful-expr-reset.yaml b/runtime/sam/op/ztests/stateful-expr-reset.yaml index cc333e6c12..a7418c4dd0 100644 --- a/runtime/sam/op/ztests/stateful-expr-reset.yaml +++ b/runtime/sam/op/ztests/stateful-expr-reset.yaml @@ -1,6 +1,6 @@ script: | echo // values - super -s -c 'values null, null | unnest this into ( values count() )' + super -s -c 'values null, null | unnest [this] into ( values count() )' echo // filter super -s -c 'values [1,2,3,4], [5,6,7] | unnest this into ( where count() % 3 == 0 )' echo // switch @@ -14,7 +14,7 @@ script: | case sum(this) == 1 ( values "sum is 1" ) )' echo // unnest - super -s -c 'values null, null | unnest this into ( count() )' + super -s -c 'values null, null | unnest [this] into ( count() )' echo // aggregate super -s -c 'values [1], [1] | unnest this into ( sum(this) by c := count() )' diff --git a/runtime/ztests/expr/nested-error.yaml b/runtime/ztests/expr/nested-error.yaml index 2288976a5f..65363ade78 100644 --- a/runtime/ztests/expr/nested-error.yaml +++ b/runtime/ztests/expr/nested-error.yaml @@ -5,5 +5,7 @@ vector: true input: | {} -output: | - {x:error("incompatible types")} +error: | + type mismatch at line 1, column 16: + values {x:'a'+(1+'b')} + ~~~~~ diff --git a/runtime/ztests/op/join-error.yaml b/runtime/ztests/op/join-error.yaml index a13515e3da..133af81aa6 100644 --- a/runtime/ztests/op/join-error.yaml +++ b/runtime/ztests/op/join-error.yaml @@ -11,4 +11,6 @@ inputs: outputs: - name: stderr data: | - join requires two upstream parallel query paths + join requires two inputs at line 1, column 1: + join on left.a=right.b + ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/sio/arrowio/reader.go b/sio/arrowio/reader.go index 3e517e965b..f59a48d220 100644 --- a/sio/arrowio/reader.go +++ b/sio/arrowio/reader.go @@ -58,6 +58,10 @@ func NewReaderFromRecordReader(sctx *super.Context, rr pqarrow.RecordReader) (*R return r, nil } +func (r *Reader) Type() super.Type { + return r.typ +} + func UniquifyFieldNames(fields []super.Field) { names := map[string]int{} for i, f := range fields { diff --git a/sio/parquetio/reader.go b/sio/parquetio/reader.go index 7e2d24ef83..2777a8097c 100644 --- a/sio/parquetio/reader.go +++ b/sio/parquetio/reader.go @@ -57,6 +57,15 @@ func columnIndexes(schema *schema.Schema, fields []field.Path) []int { return indexes } +func Type(sctx *super.Context, r io.Reader) super.Type { + if ar, err := NewReader(sctx, r, nil); err == nil { + typ := ar.Type() + ar.Close() + return typ + } + return nil +} + func TopLevelFieldNames(r io.Reader) ([]string, error) { ras, ok := r.(parquet.ReaderAtSeeker) if !ok {