diff --git a/cli/inputflags/flags.go b/cli/inputflags/flags.go index 11aca9bd9d..bb6be53460 100644 --- a/cli/inputflags/flags.go +++ b/cli/inputflags/flags.go @@ -17,6 +17,7 @@ import ( type Flags struct { anyio.ReaderOpts + Dynamic bool ReadMax auto.Bytes ReadSize auto.Bytes Threads int @@ -43,6 +44,7 @@ func (f *Flags) SetFlags(fs *flag.FlagSet, validate bool) { fs.Var(&f.ReadMax, "bsup.readmax", "maximum Super Binary read buffer size in MiB, MB, etc.") f.ReadSize = auto.NewBytes(bsupio.ReadSize) fs.Var(&f.ReadSize, "bsup.readsize", "target Super Binary read buffer size in MiB, MB, etc.") + fs.BoolVar(&f.Dynamic, "dynamic", false, "disable static type checking of inputs") } // Init is called after flags have been parsed. diff --git a/cmd/super/compile/shared.go b/cmd/super/compile/shared.go index 7d08377871..ff1f4e9866 100644 --- a/cmd/super/compile/shared.go +++ b/cmd/super/compile/shared.go @@ -24,6 +24,7 @@ import ( type Shared struct { dag bool + dynamic bool includes queryflags.Includes optimize bool parallel int @@ -33,6 +34,7 @@ type Shared struct { func (s *Shared) SetFlags(fs *flag.FlagSet) { fs.BoolVar(&s.dag, "dag", false, "display output as DAG (implied by -O or -P)") + fs.BoolVar(&s.dynamic, "dynamic", false, "disable static type checking of inputs on DAG") fs.Var(&s.includes, "I", "source file containing query text (may be repeated)") fs.BoolVar(&s.optimize, "O", false, "display optimized DAG") fs.IntVar(&s.parallel, "P", 0, "display parallelized DAG") @@ -80,6 +82,7 @@ func (s *Shared) Run(ctx context.Context, args []string, dbFlags *dbflags.Flags, } rctx := runtime.DefaultContext() env := exec.NewEnvironment(storage.NewLocalEngine(), root) + env.Dynamic = s.dynamic dag, err := compiler.Analyze(rctx, ast, env, false) if err != nil { return err diff --git a/cmd/super/root/command.go b/cmd/super/root/command.go index c31fbc569b..87d55e7d95 100644 --- a/cmd/super/root/command.go +++ b/cmd/super/root/command.go @@ -147,6 +147,7 @@ func (c *Command) Run(args []string) error { ast.PrependFileScan(args) } env := exec.NewEnvironment(storage.NewLocalEngine(), nil) + env.Dynamic = c.inputFlags.Dynamic env.IgnoreOpenErrors = !c.stopErr env.ReaderOpts = c.inputFlags.Options() comp := compiler.NewCompilerWithEnv(env) diff --git a/cmd/super/ztests/from-file-error.yaml b/cmd/super/ztests/from-file-error.yaml index f4c5da2ef4..415cdc97c6 100644 --- a/cmd/super/ztests/from-file-error.yaml +++ b/cmd/super/ztests/from-file-error.yaml @@ -1,5 +1,5 @@ script: | - ! super -I query.spq + ! super -dynamic -I query.spq inputs: - name: query.spq diff --git a/cmd/super/ztests/stop-on-error-2.yaml b/cmd/super/ztests/stop-on-error-2.yaml index b6fd979aaa..f60604add0 100644 --- a/cmd/super/ztests/stop-on-error-2.yaml +++ b/cmd/super/ztests/stop-on-error-2.yaml @@ -1,5 +1,5 @@ script: | - super -s -e=false good.sup bad.sup + super -dynamic -e=false -s good.sup bad.sup inputs: - name: good.sup diff --git a/cmd/super/ztests/stop-on-error-3.yaml b/cmd/super/ztests/stop-on-error-3.yaml index 6d6be6e988..1d7bb153c7 100644 --- a/cmd/super/ztests/stop-on-error-3.yaml +++ b/cmd/super/ztests/stop-on-error-3.yaml @@ -1,6 +1,6 @@ # Second input has bad middle line (detection succeeds). script: | - ! super -s -e=false good.sup bad.sup + ! super -dynamic -e=false -s good.sup bad.sup inputs: - name: good.sup diff --git a/compiler/parser/ztests/glob-mul.yaml b/compiler/parser/ztests/glob-mul.yaml index c665cf1607..3d9b3544f1 100644 --- a/compiler/parser/ztests/glob-mul.yaml +++ b/compiler/parser/ztests/glob-mul.yaml @@ -1,7 +1,7 @@ script: | - super -s -c "? a*b" in.sup + super -dynamic -s -c "? a*b" in.sup echo === - super -s -c "? a*b=s" in.sup + super -dynamic -s -c "? a*b=s" in.sup inputs: - name: in.sup diff --git a/compiler/semantic/op.go b/compiler/semantic/op.go index d3ea8adeb2..3cd0e90c4c 100644 --- a/compiler/semantic/op.go +++ b/compiler/semantic/op.go @@ -22,7 +22,7 @@ import ( "github.com/brimdata/super/runtime/sam/expr" "github.com/brimdata/super/runtime/sam/expr/function" "github.com/brimdata/super/sio" - "github.com/brimdata/super/sio/parquetio" + "github.com/brimdata/super/sio/anyio" "github.com/brimdata/super/sup" "github.com/segmentio/ksuid" ) @@ -269,8 +269,11 @@ func (t *translator) file(n ast.Node, name string, args []ast.OpArg) sem.Op { } func (t *translator) fileType(path, format string) (super.Type, error) { + if t.env.Dynamic { + return nil, nil + } engine := t.env.Engine() - if engine == nil || format != "" && format != "auto" && format != "parquet" { + if engine == nil { return nil, nil } uri, err := storage.ParseURI(path) @@ -282,7 +285,27 @@ func (t *translator) fileType(path, format string) (super.Type, error) { return nil, err } defer r.Close() - return parquetio.Type(t.sctx, r), nil + var b [1]byte + if _, err := r.ReadAt(b[:], 0); err != nil { + // r can't seek so it's a fifo or pipe. + return nil, nil + } + f, err := anyio.NewFile(t.sctx, r, path, anyio.ReaderOpts{Format: format}) + if err != nil { + return nil, err + } + defer f.Close() + if typer, ok := f.Reader.(interface{ Type() super.Type }); ok { + return typer.Type(), nil + } + fuser := t.checker.newFuser() + for { + val, err := f.Read() + if val == nil || err != nil { + return fuser.Type(), err + } + fuser.fuse(val.Type()) + } } func (t *translator) fromFileGlob(globLoc ast.Node, pattern string, args []ast.OpArg) sem.Op { diff --git a/compiler/semantic/ztests/pipe-schema-parquet.yaml b/compiler/semantic/ztests/pipe-schema-parquet.yaml index 3290e5b222..39500b84d9 100644 --- a/compiler/semantic/ztests/pipe-schema-parquet.yaml +++ b/compiler/semantic/ztests/pipe-schema-parquet.yaml @@ -2,7 +2,8 @@ script: | super -o x.parquet -f parquet x.sup super -o y.parquet -f parquet y.sup super -s -c "select x from x.parquet join y.parquet on x=y" - ! super -s -c "select x from x.parquet join y.sup on x=y" + touch z.sup + ! super -s -c "select x from x.parquet join z.sup on x=y" inputs: - name: x.sup @@ -20,8 +21,8 @@ outputs: - name: stderr data: | "x": ambiguous column reference at line 1, column 39: - select x from x.parquet join y.sup on x=y + select x from x.parquet join z.sup on x=y ~ "x": ambiguous column reference at line 1, column 8: - select x from x.parquet join y.sup on x=y + select x from x.parquet join z.sup on x=y ~ diff --git a/compiler/sfmt/ztests/decls.yaml b/compiler/sfmt/ztests/decls.yaml index 39c520f2c7..1fa1226b8f 100644 --- a/compiler/sfmt/ztests/decls.yaml +++ b/compiler/sfmt/ztests/decls.yaml @@ -1,7 +1,7 @@ script: | super compile -C -I test.spq echo "===" - super compile -dag -C -I test.spq + super compile -C -dag -dynamic -I test.spq inputs: - name: test.spq diff --git a/compiler/sfmt/ztests/join.yaml b/compiler/sfmt/ztests/join.yaml index 5dffd7d65c..d8d944fd55 100644 --- a/compiler/sfmt/ztests/join.yaml +++ b/compiler/sfmt/ztests/join.yaml @@ -1,11 +1,11 @@ script: | super compile -C "join (from test.sup) on left.x=right.x" echo === - super compile -C -dag "join (from test.sup) on right.x=left.x" + super compile -C -dag -dynamic "join (from test.sup) on right.x=left.x" echo === super compile -C "right join (from test.sup) as {l,r} on r.x=l.x" echo === - super compile -C -dag "right join (from test.sup) as {l,r} on r.x=l.x" + super compile -C -dag -dynamic "right join (from test.sup) as {l,r} on r.x=l.x" outputs: - name: stdout diff --git a/compiler/ztests/anycase-funcs.yaml b/compiler/ztests/anycase-funcs.yaml index 3ed8b15076..f2aaab2a03 100644 --- a/compiler/ztests/anycase-funcs.yaml +++ b/compiler/ztests/anycase-funcs.yaml @@ -1,7 +1,7 @@ script: | super -s -c 'c:=COUNT(),d:=Count(),Collect(LoweR(s)) by key | sort key' in.sup echo === - super -s -c 'Grep("G.*", this)' in.sup + super -dynamic -s -c 'Grep("G.*", this)' in.sup echo === super -s -c 'values {s,match:RegEXP("(f|B).*", s)}' in.sup echo === diff --git a/compiler/ztests/const-source.yaml b/compiler/ztests/const-source.yaml index 0c7370a168..f85347aa29 100644 --- a/compiler/ztests/const-source.yaml +++ b/compiler/ztests/const-source.yaml @@ -2,14 +2,14 @@ script: | export SUPER_DB=test super db init -q super db create -q test - super db compile -dag -C 'const POOL = "test" from eval(POOL)' | sed -e "s/[a-zA-Z0-9]\{27\}/XXX/" + super db compile -C -dag 'const POOL = "test" from eval(POOL)' | sed -e "s/[a-zA-Z0-9]\{27\}/XXX/" echo "===" - super compile -dag -C 'const FILE = "A.sup" from eval(FILE)' + super compile -C -dag -dynamic 'const FILE = "A.sup" from eval(FILE)' echo "===" - super db compile -dag -C 'const URL = "http://brimdata.io" from eval(URL)' - ! super db compile -dag -C 'const POOL = 3.14 from eval(POOL)' - ! super db compile -dag -C 'const FILE = 127.0.0.1 from eval(FILE)' - ! super db compile -dag -C 'const URL = true from eval(URL)' + super db compile -C -dag 'const URL = "http://brimdata.io" from eval(URL)' + ! super db compile -C -dag 'const POOL = 3.14 from eval(POOL)' + ! super db compile -C -dag 'const FILE = 127.0.0.1 from eval(FILE)' + ! super db compile -C -dag 'const URL = true from eval(URL)' outputs: - name: stdout diff --git a/compiler/ztests/merge-filters.yaml b/compiler/ztests/merge-filters.yaml index 09a2f1118c..e8c730c4ac 100644 --- a/compiler/ztests/merge-filters.yaml +++ b/compiler/ztests/merge-filters.yaml @@ -1,7 +1,7 @@ script: | super compile -C -O 'from /dev/null | where a | where b' echo === - super compile -C -O 'fork ( from /dev/null | where b | where c ) ( from /dev/zero | where e | where f ) | where g' + super compile -C -O -dynamic 'fork ( from /dev/null | where b | where c ) ( from /dev/zero | where e | where f ) | where g' echo === super compile -C -O 'unnest [a] into ( where b | where c )' echo === diff --git a/compiler/ztests/par-aggregate-func.yaml b/compiler/ztests/par-aggregate-func.yaml index 29dfe8b024..87b8ad31af 100644 --- a/compiler/ztests/par-aggregate-func.yaml +++ b/compiler/ztests/par-aggregate-func.yaml @@ -4,7 +4,7 @@ script: | super db create -q -orderby s:asc test super db compile -P 2 -C "from test | union(s) by n:=len(s)" | sed -e 's/pool .*/.../' echo === - SUPER_VAM=1 super compile -C -P 2 'from test.csup | summarize count(a) by b' + SUPER_VAM=1 super compile -C -P 2 -dynamic 'from test.csup | summarize count(a) by b' outputs: - name: stdout diff --git a/compiler/ztests/par-concurrency.yaml b/compiler/ztests/par-concurrency.yaml index bafebe02ba..8b79e0c8f6 100644 --- a/compiler/ztests/par-concurrency.yaml +++ b/compiler/ztests/par-concurrency.yaml @@ -1,11 +1,11 @@ script: | export SUPER_VAM=1 echo === -P 2 - super compile -C -P 2 'fork ( from a.csup | sort b ) ( from c.csup | sort d )' + super compile -C -P 2 -dynamic 'fork ( from a.csup | sort b ) ( from c.csup | sort d )' echo === -P 5 - super compile -C -P 5 'fork ( from a.csup | sort b ) ( from c.csup | sort d )' + super compile -C -P 5 -dynamic 'fork ( from a.csup | sort b ) ( from c.csup | sort d )' echo === -P 6 - super compile -C -P 6 'fork ( from a.csup | sort b ) ( from c.csup | sort d )' + super compile -C -P 6 -dynamic 'fork ( from a.csup | sort b ) ( from c.csup | sort d )' outputs: - name: stdout diff --git a/compiler/ztests/par-sort.yaml b/compiler/ztests/par-sort.yaml index 5b228f5271..d3301c37bd 100644 --- a/compiler/ztests/par-sort.yaml +++ b/compiler/ztests/par-sort.yaml @@ -1,5 +1,5 @@ script: | - SUPER_VAM=1 super compile -C -P 2 'from test.csup | sort a, b desc nulls last | values c' + SUPER_VAM=1 super compile -C -P 2 -dynamic 'from test.csup | sort a, b desc nulls last | values c' outputs: - name: stdout diff --git a/compiler/ztests/par-top.yaml b/compiler/ztests/par-top.yaml index a43f7f2afe..448e63a4fd 100644 --- a/compiler/ztests/par-top.yaml +++ b/compiler/ztests/par-top.yaml @@ -8,7 +8,7 @@ script: | echo === super db compile -C -P 2 'from test | top 3 b desc' | sed -e 's/pool .*/.../' echo === - SUPER_VAM=1 super compile -C -P 2 'from test.csup | top 3 a, b desc nulls last | values c' + SUPER_VAM=1 super compile -C -P 2 -dynamic 'from test.csup | top 3 a, b desc nulls last | values c' outputs: - name: stdout diff --git a/compiler/ztests/pruner-in.yaml b/compiler/ztests/pruner-in.yaml index 7451ac1e74..0279bfcc39 100644 --- a/compiler/ztests/pruner-in.yaml +++ b/compiler/ztests/pruner-in.yaml @@ -1,9 +1,9 @@ script: | export SUPER_VAM=1 - super compile -C -O 'from test.csup | x in ["foo","bar"]' + super compile -C -O -dynamic 'from test.csup | x in ["foo","bar"]' echo // === # Test that we still optimize for a tuple which gets translated into a record. - super compile -C -O 'from test.csup | x in ("foo","bar")' + super compile -C -O -dynamic 'from test.csup | x in ("foo","bar")' outputs: - name: stdout diff --git a/compiler/ztests/pruner-regexp.yaml b/compiler/ztests/pruner-regexp.yaml index 6c460ccb2d..ff9c947ae0 100644 --- a/compiler/ztests/pruner-regexp.yaml +++ b/compiler/ztests/pruner-regexp.yaml @@ -1,10 +1,10 @@ script: | export SUPER_VAM=1 - super compile -C -O 'from test.csup | where x LIKE "cslab%"' + super compile -C -O -dynamic 'from test.csup | where x LIKE "cslab%"' echo // === - super compile -C -O 'from test.csup | where grep("^csla[bB].*", x)' + super compile -C -O -dynamic 'from test.csup | where grep("^csla[bB].*", x)' echo // === - super compile -C -O 'from test.csup | where grep("csla[bB].*", x)' + super compile -C -O -dynamic 'from test.csup | where grep("csla[bB].*", x)' echo // === echo '{x:"a"}' | super -f csup -o x.csup - super -s -c "SELECT x FROM x.csup WHERE x LIKE 'a%'" diff --git a/compiler/ztests/pushdown.yaml b/compiler/ztests/pushdown.yaml index 0a264be0b1..119216319d 100644 --- a/compiler/ztests/pushdown.yaml +++ b/compiler/ztests/pushdown.yaml @@ -4,13 +4,13 @@ script: | echo === distinct super compile -C -O 'from /dev/null | distinct a | values b' echo === fork into hash join - super compile -C -O 'fork ( from /dev/null ) ( from /dev/zero ) | join on right.a=left.b | values left.c,right.d' + super compile -C -O -dynamic 'fork ( from /dev/null ) ( from /dev/zero ) | join on right.a=left.b | values left.c,right.d' echo === fork into nested loop join - super compile -C -O 'fork ( from /dev/null ) ( from /dev/zero ) | join on right.a>left.b | values left.c,right.d' + super compile -C -O -dynamic 'fork ( from /dev/null ) ( from /dev/zero ) | join on right.a>left.b | values left.c,right.d' echo === hash join - super compile -C -O "from /dev/null | join (from /dev/zero) on left.a=right.b | values left.c,right.d" + super compile -C -O -dynamic "from /dev/null | join (from /dev/zero) on left.a=right.b | values left.c,right.d" echo === nested loop join - super compile -C -O "from /dev/null | join (from /dev/zero) on left.a>right.b | values left.c,right.d" + super compile -C -O -dynamic "from /dev/null | join (from /dev/zero) on left.a>right.b | values left.c,right.d" echo === switch into hash join super compile -C -O 'from /dev/null | switch a case b ( put x:=c ) case d ( put x:=e ) | join on left.f=right.g | values left.h,right.i' echo === switch into nested loop join diff --git a/compiler/ztests/sql/drop.yaml b/compiler/ztests/sql/drop.yaml index 471d53361d..c2d6e2ccc0 100644 --- a/compiler/ztests/sql/drop.yaml +++ b/compiler/ztests/sql/drop.yaml @@ -1,7 +1,7 @@ script: | super -s -c 'select * from "a.sup" | drop c' echo === - super -s -c 'select * from "messy.sup" | drop s,t' + super -dynamic -s -c 'select * from "messy.sup" | drop s,t' echo === super -s -c 'select * from "b.sup" | drop b' diff --git a/compiler/ztests/sql/scope-errors.yaml b/compiler/ztests/sql/scope-errors.yaml index 12e980723b..b94738de18 100644 --- a/compiler/ztests/sql/scope-errors.yaml +++ b/compiler/ztests/sql/scope-errors.yaml @@ -1,11 +1,11 @@ script: | - ! super -s -c "select * from (select 1 as a, 2 as b) t1 join (select 2 as b) on a=b" + ! super -dynamic -s -c "select * from (select 1 as a, 2 as b) t1 join (select 2 as b) on a=b" echo === 1>&2 - ! super -s -c "select * from a.json join b.json on a=b" + ! super -dynamic -s -c "select * from a.json join b.json on a=b" echo === 1>&2 - ! super -s -c "select * from (select 2 as x) as a join b.json on x=b.c" + ! super -dynamic -s -c "select * from (select 2 as x) as a join b.json on x=b.c" echo === 1>&2 - ! super -s -c "select * from (select 2 as x) a join b.json a on a.x=a.y" + ! super -dynamic -s -c "select * from (select 2 as x) a join b.json a on a.x=a.y" outputs: - name: stderr diff --git a/compiler/ztests/sql/union-error.yaml b/compiler/ztests/sql/union-error.yaml index bd4ed78c1e..c2ca6f586d 100644 --- a/compiler/ztests/sql/union-error.yaml +++ b/compiler/ztests/sql/union-error.yaml @@ -1,7 +1,7 @@ script: | ! super -c 'select 1 x union all select 1 x, 1 y' echo // === >&2 - ! super -c 'select * from a.sup union all select * from a.sup' + ! super -dynamic -c 'select * from a.sup union all select * from a.sup' inputs: - name: a.sup diff --git a/runtime/exec/environment.go b/runtime/exec/environment.go index 490c2d4ec1..157d03ea8a 100644 --- a/runtime/exec/environment.go +++ b/runtime/exec/environment.go @@ -32,6 +32,7 @@ type Environment struct { db *db.Root useVAM bool + Dynamic bool IgnoreOpenErrors bool ReaderOpts anyio.ReaderOpts } diff --git a/runtime/ztests/expr/queryexpr-unnest.yaml b/runtime/ztests/expr/queryexpr-unnest.yaml index b8bbf86fdb..9847805c23 100644 --- a/runtime/ztests/expr/queryexpr-unnest.yaml +++ b/runtime/ztests/expr/queryexpr-unnest.yaml @@ -1,5 +1,5 @@ script: | - super -s -c 'from input.sup + super -dynamic -s -c 'from input.sup | values (unnest {outer: this, inner: (from foo.sup)} | where inner.id=outer.id | values inner.name) + "_" + (unnest {outer: this, inner: (from bar.sup)} | where inner.id=outer.id | values inner.name)' diff --git a/runtime/ztests/op/join-empty-inner.yaml b/runtime/ztests/op/join-empty-inner.yaml index 13dbadb86f..40ca09f5c3 100644 --- a/runtime/ztests/op/join-empty-inner.yaml +++ b/runtime/ztests/op/join-empty-inner.yaml @@ -1,8 +1,8 @@ script: | echo === hash join - super -s -c 'left join (from C.sup) on left.a=right.a | values {...left,hit:right.sc} | sort' A.sup + super -dynamic -s -c 'left join (from C.sup) on left.a=right.a | values {...left,hit:right.sc} | sort' A.sup echo === nested loop join - super -s -c 'left join (from C.sup) on left.a