Skip to content

Commit 97f5f6c

Browse files
authored
compiler/semantic: enable type checking for all input files (#6434)
For file types with an sio.Reader that has a Type method, continue to use that to obtain a super.Type. For other file types, if the input can seek, read all values from the file and fuse their types to obtain a single type. (Fifos and pipes are excluded for now because the runtime must also be able to read the file.) To disable type checking for all input files, specify the -dynamic flag to the super and super compile commands.
1 parent 1699162 commit 97f5f6c

28 files changed

+77
-55
lines changed

cli/inputflags/flags.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
type Flags struct {
1919
anyio.ReaderOpts
20+
Dynamic bool
2021
ReadMax auto.Bytes
2122
ReadSize auto.Bytes
2223
Threads int
@@ -43,6 +44,7 @@ func (f *Flags) SetFlags(fs *flag.FlagSet, validate bool) {
4344
fs.Var(&f.ReadMax, "bsup.readmax", "maximum Super Binary read buffer size in MiB, MB, etc.")
4445
f.ReadSize = auto.NewBytes(bsupio.ReadSize)
4546
fs.Var(&f.ReadSize, "bsup.readsize", "target Super Binary read buffer size in MiB, MB, etc.")
47+
fs.BoolVar(&f.Dynamic, "dynamic", false, "disable static type checking of inputs")
4648
}
4749

4850
// Init is called after flags have been parsed.

cmd/super/compile/shared.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
type Shared struct {
2626
dag bool
27+
dynamic bool
2728
includes queryflags.Includes
2829
optimize bool
2930
parallel int
@@ -33,6 +34,7 @@ type Shared struct {
3334

3435
func (s *Shared) SetFlags(fs *flag.FlagSet) {
3536
fs.BoolVar(&s.dag, "dag", false, "display output as DAG (implied by -O or -P)")
37+
fs.BoolVar(&s.dynamic, "dynamic", false, "disable static type checking of inputs on DAG")
3638
fs.Var(&s.includes, "I", "source file containing query text (may be repeated)")
3739
fs.BoolVar(&s.optimize, "O", false, "display optimized DAG")
3840
fs.IntVar(&s.parallel, "P", 0, "display parallelized DAG")
@@ -80,6 +82,7 @@ func (s *Shared) Run(ctx context.Context, args []string, dbFlags *dbflags.Flags,
8082
}
8183
rctx := runtime.DefaultContext()
8284
env := exec.NewEnvironment(storage.NewLocalEngine(), root)
85+
env.Dynamic = s.dynamic
8386
dag, err := compiler.Analyze(rctx, ast, env, false)
8487
if err != nil {
8588
return err

cmd/super/root/command.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ func (c *Command) Run(args []string) error {
147147
ast.PrependFileScan(args)
148148
}
149149
env := exec.NewEnvironment(storage.NewLocalEngine(), nil)
150+
env.Dynamic = c.inputFlags.Dynamic
150151
env.IgnoreOpenErrors = !c.stopErr
151152
env.ReaderOpts = c.inputFlags.Options()
152153
comp := compiler.NewCompilerWithEnv(env)

cmd/super/ztests/from-file-error.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
script: |
2-
! super -I query.spq
2+
! super -dynamic -I query.spq
33
44
inputs:
55
- name: query.spq

cmd/super/ztests/stop-on-error-2.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
script: |
2-
super -s -e=false good.sup bad.sup
2+
super -dynamic -e=false -s good.sup bad.sup
33
44
inputs:
55
- name: good.sup

cmd/super/ztests/stop-on-error-3.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Second input has bad middle line (detection succeeds).
22
script: |
3-
! super -s -e=false good.sup bad.sup
3+
! super -dynamic -e=false -s good.sup bad.sup
44
55
inputs:
66
- name: good.sup

compiler/parser/ztests/glob-mul.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
script: |
2-
super -s -c "? a*b" in.sup
2+
super -dynamic -s -c "? a*b" in.sup
33
echo ===
4-
super -s -c "? a*b=s" in.sup
4+
super -dynamic -s -c "? a*b=s" in.sup
55
66
inputs:
77
- name: in.sup

compiler/semantic/op.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222
"github.com/brimdata/super/runtime/sam/expr"
2323
"github.com/brimdata/super/runtime/sam/expr/function"
2424
"github.com/brimdata/super/sio"
25-
"github.com/brimdata/super/sio/parquetio"
25+
"github.com/brimdata/super/sio/anyio"
2626
"github.com/brimdata/super/sup"
2727
"github.com/segmentio/ksuid"
2828
)
@@ -269,8 +269,11 @@ func (t *translator) file(n ast.Node, name string, args []ast.OpArg) sem.Op {
269269
}
270270

271271
func (t *translator) fileType(path, format string) (super.Type, error) {
272+
if t.env.Dynamic {
273+
return nil, nil
274+
}
272275
engine := t.env.Engine()
273-
if engine == nil || format != "" && format != "auto" && format != "parquet" {
276+
if engine == nil {
274277
return nil, nil
275278
}
276279
uri, err := storage.ParseURI(path)
@@ -282,7 +285,27 @@ func (t *translator) fileType(path, format string) (super.Type, error) {
282285
return nil, err
283286
}
284287
defer r.Close()
285-
return parquetio.Type(t.sctx, r), nil
288+
var b [1]byte
289+
if _, err := r.ReadAt(b[:], 0); err != nil {
290+
// r can't seek so it's a fifo or pipe.
291+
return nil, nil
292+
}
293+
f, err := anyio.NewFile(t.sctx, r, path, anyio.ReaderOpts{Format: format})
294+
if err != nil {
295+
return nil, err
296+
}
297+
defer f.Close()
298+
if typer, ok := f.Reader.(interface{ Type() super.Type }); ok {
299+
return typer.Type(), nil
300+
}
301+
fuser := t.checker.newFuser()
302+
for {
303+
val, err := f.Read()
304+
if val == nil || err != nil {
305+
return fuser.Type(), err
306+
}
307+
fuser.fuse(val.Type())
308+
}
286309
}
287310

288311
func (t *translator) fromFileGlob(globLoc ast.Node, pattern string, args []ast.OpArg) sem.Op {

compiler/semantic/ztests/pipe-schema-parquet.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ script: |
22
super -o x.parquet -f parquet x.sup
33
super -o y.parquet -f parquet y.sup
44
super -s -c "select x from x.parquet join y.parquet on x=y"
5-
! super -s -c "select x from x.parquet join y.sup on x=y"
5+
touch z.sup
6+
! super -s -c "select x from x.parquet join z.sup on x=y"
67
78
inputs:
89
- name: x.sup
@@ -20,8 +21,8 @@ outputs:
2021
- name: stderr
2122
data: |
2223
"x": ambiguous column reference at line 1, column 39:
23-
select x from x.parquet join y.sup on x=y
24+
select x from x.parquet join z.sup on x=y
2425
~
2526
"x": ambiguous column reference at line 1, column 8:
26-
select x from x.parquet join y.sup on x=y
27+
select x from x.parquet join z.sup on x=y
2728
~

compiler/sfmt/ztests/decls.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
script: |
22
super compile -C -I test.spq
33
echo "==="
4-
super compile -dag -C -I test.spq
4+
super compile -C -dag -dynamic -I test.spq
55
66
inputs:
77
- name: test.spq

0 commit comments

Comments
 (0)