diff --git a/runtime/sam/expr/ztests/cast-bytes-string-err.yaml b/runtime/sam/expr/ztests/cast-bytes-string-err.yaml deleted file mode 100644 index c3e5eb69b8..0000000000 --- a/runtime/sam/expr/ztests/cast-bytes-string-err.yaml +++ /dev/null @@ -1,7 +0,0 @@ -spq: 'put s:=b::string' - -input: | - {b:0xc328} - -output: | - {b:0xc328,s:error({message:"cannot cast to string: invalid UTF-8",on:0xc328})} diff --git a/runtime/vam/expr/cast/bool.go b/runtime/vam/expr/cast/bool.go index 5c553b7599..e2404d1259 100644 --- a/runtime/vam/expr/cast/bool.go +++ b/runtime/vam/expr/cast/bool.go @@ -6,7 +6,7 @@ import ( "github.com/brimdata/super/vector/bitvec" ) -func castToBool(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { +func castToBool(vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { var out *vector.Bool switch vec := vec.(type) { case *vector.Int: @@ -17,9 +17,9 @@ func castToBool(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { out = numberToBool(vec.Values, index) case *vector.String: vvec, errs := stringToBool(vec, index) - return vvec, errs, true + return vvec, errs, "", true default: - return nil, nil, false + return nil, nil, "", false } nulls := vector.NullsOf(vec) if index == nil { @@ -27,7 +27,7 @@ func castToBool(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { } else { out.Nulls = nulls.Pick(index) } - return out, nil, true + return out, nil, "", true } func numberToBool[E numeric](s []E, index []uint32) *vector.Bool { diff --git a/runtime/vam/expr/cast/bytes.go b/runtime/vam/expr/cast/bytes.go index d1fa2dc2af..3cea7f3510 100644 --- a/runtime/vam/expr/cast/bytes.go +++ b/runtime/vam/expr/cast/bytes.go @@ -4,14 +4,14 @@ import ( "github.com/brimdata/super/vector" ) -func castToBytes(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { +func castToBytes(vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { strVec, ok := vec.(*vector.String) if !ok { - return nil, nil, false + return nil, nil, "", false } out := vector.Any(vector.NewBytes(strVec.Table(), vector.NullsOf(strVec))) if index != nil { out = vector.Pick(out, index) } - return out, nil, true + return out, nil, "", true } diff --git a/runtime/vam/expr/cast/cast.go b/runtime/vam/expr/cast/cast.go index 265525613a..109999ed45 100644 --- a/runtime/vam/expr/cast/cast.go +++ b/runtime/vam/expr/cast/cast.go @@ -13,7 +13,7 @@ func To(sctx *super.Context, vec vector.Any, typ super.Type) vector.Any { var c caster id := typ.ID() if super.IsNumber(id) { - c = func(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { + c = func(vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { return castToNumber(vec, typ, index) } } else { @@ -29,29 +29,30 @@ func To(sctx *super.Context, vec vector.Any, typ super.Type) vector.Any { case super.IDNet: c = castToNet case super.IDType: - c = func(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { + c = func(vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { return castToType(sctx, vec, index) } default: - return errCastFailed(sctx, vec, typ) + return errCastFailed(sctx, vec, typ, "") } } return assemble(sctx, vec, typ, c) } -type caster func(vector.Any, []uint32) (vector.Any, []uint32, bool) +type caster func(vector.Any, []uint32) (vector.Any, []uint32, string, bool) func assemble(sctx *super.Context, vec vector.Any, typ super.Type, fn caster) vector.Any { var out vector.Any var errs []uint32 + var errMsg string var ok bool switch vec := vec.(type) { case *vector.Const: return castConst(sctx, vec, typ) case *vector.View: - out, errs, ok = fn(vec.Any, vec.Index) + out, errs, errMsg, ok = fn(vec.Any, vec.Index) case *vector.Dict: - out, errs, ok = fn(vec.Any, nil) + out, errs, errMsg, ok = fn(vec.Any, nil) if ok { if len(errs) > 0 { index, counts, nulls, nerrs := vec.RebuildDropTags(errs...) @@ -62,13 +63,13 @@ func assemble(sctx *super.Context, vec vector.Any, typ super.Type, fn caster) ve } } default: - out, errs, ok = fn(vec, nil) + out, errs, errMsg, ok = fn(vec, nil) } if !ok { - return errCastFailed(sctx, vec, typ) + return errCastFailed(sctx, vec, typ, errMsg) } if len(errs) > 0 { - return vector.Combine(out, errs, errCastFailed(sctx, vector.Pick(vec, errs), typ)) + return vector.Combine(out, errs, errCastFailed(sctx, vector.Pick(vec, errs), typ, errMsg)) } return out } @@ -88,17 +89,21 @@ func castConst(sctx *super.Context, vec *vector.Const, typ super.Type) vector.An trueCount++ } } - err := errCastFailed(sctx, vector.NewConst(vec.Value(), vec.Len()-trueCount, bitvec.Zero), typ) + err := errCastFailed(sctx, vector.NewConst(vec.Value(), vec.Len()-trueCount, bitvec.Zero), typ, "") nulls := vector.NewConst(super.NewValue(typ, nil), trueCount, bitvec.Zero) return vector.NewDynamic(index, []vector.Any{err, nulls}) } - return errCastFailed(sctx, vec, typ) + return errCastFailed(sctx, vec, typ, "") } return vector.NewConst(val, vec.Len(), vec.Nulls) } -func errCastFailed(sctx *super.Context, vec vector.Any, typ super.Type) vector.Any { - return vector.NewWrappedError(sctx, "cannot cast to "+sup.FormatType(typ), vec) +func errCastFailed(sctx *super.Context, vec vector.Any, typ super.Type, msgSuffix string) vector.Any { + msg := "cannot cast to " + sup.FormatType(typ) + if msgSuffix != "" { + msg = msg + ": " + msgSuffix + } + return vector.NewWrappedError(sctx, msg, vec) } func lengthOf(vec vector.Any, index []uint32) uint32 { diff --git a/runtime/vam/expr/cast/ip.go b/runtime/vam/expr/cast/ip.go index cb7ebbacee..af722e208e 100644 --- a/runtime/vam/expr/cast/ip.go +++ b/runtime/vam/expr/cast/ip.go @@ -8,10 +8,10 @@ import ( "github.com/brimdata/super/vector/bitvec" ) -func castToIP(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { +func castToIP(vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { switch vec := vec.(type) { case *vector.IP: - return vec, nil, true + return vec, nil, "", true case *vector.String: n := lengthOf(vec, index) var nulls bitvec.Bits @@ -37,16 +37,16 @@ func castToIP(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { } ips = append(ips, ip) } - return vector.NewIP(ips, nulls), errs, true + return vector.NewIP(ips, nulls), errs, "", true default: - return nil, nil, false + return nil, nil, "", false } } -func castToNet(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { +func castToNet(vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { switch vec := vec.(type) { case *vector.Net: - return vec, nil, true + return vec, nil, "", true case *vector.String: n := lengthOf(vec, index) var nulls bitvec.Bits @@ -72,8 +72,8 @@ func castToNet(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { } nets = append(nets, net) } - return vector.NewNet(nets, nulls), errs, true + return vector.NewNet(nets, nulls), errs, "", true default: - return nil, nil, false + return nil, nil, "", false } } diff --git a/runtime/vam/expr/cast/number.go b/runtime/vam/expr/cast/number.go index f269668d69..3858242649 100644 --- a/runtime/vam/expr/cast/number.go +++ b/runtime/vam/expr/cast/number.go @@ -17,10 +17,10 @@ type numeric interface { constraints.Float | constraints.Integer } -func castToNumber(vec vector.Any, typ super.Type, index []uint32) (vector.Any, []uint32, bool) { +func castToNumber(vec vector.Any, typ super.Type, index []uint32) (vector.Any, []uint32, string, bool) { if vec.Type().ID() == super.IDString { out, errs := castStringToNumber(vec, typ, index) - return out, errs, true + return out, errs, "", true } nulls := vector.NullsOf(vec) if index != nil { @@ -32,21 +32,21 @@ func castToNumber(vec vector.Any, typ super.Type, index []uint32) (vector.Any, [ if len(errs) > 0 { nulls = nulls.Pick(inverseIndex(errs, nulls.Len())) } - return vector.NewInt(typ, vals, nulls), errs, true + return vector.NewInt(typ, vals, nulls), errs, "", true case super.IsUnsigned(id): vals, errs := toNumeric[uint64](vec, typ, index) if len(errs) > 0 { nulls = nulls.Pick(inverseIndex(errs, nulls.Len())) } - return vector.NewUint(typ, vals, nulls), errs, true + return vector.NewUint(typ, vals, nulls), errs, "", true case super.IsFloat(id): vals, errs := toNumeric[float64](vec, typ, index) if errs != nil { nulls = nulls.Pick(inverseIndex(errs, nulls.Len())) } - return vector.NewFloat(typ, vals, nulls), errs, true + return vector.NewFloat(typ, vals, nulls), errs, "", true default: - return nil, nil, false + return nil, nil, "", false } } diff --git a/runtime/vam/expr/cast/string.go b/runtime/vam/expr/cast/string.go index 4c27b85a9c..6b24a41539 100644 --- a/runtime/vam/expr/cast/string.go +++ b/runtime/vam/expr/cast/string.go @@ -3,6 +3,7 @@ package cast import ( "strconv" "time" + "unicode/utf8" "github.com/brimdata/super" "github.com/brimdata/super/pkg/nano" @@ -11,7 +12,7 @@ import ( "github.com/brimdata/super/vector" ) -func castToString(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { +func castToString(vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { nulls := vector.NullsOf(vec) if index != nil { nulls = nulls.Pick(index) @@ -56,21 +57,35 @@ func castToString(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { } case *vector.String: if index == nil { - return vec, nil, true + return vec, nil, "", true } for _, idx := range index { bytes = append(bytes, vec.Value(idx)...) offs = append(offs, uint32(len(bytes))) } case *vector.Bytes: + var errs []uint32 for i := range n { idx := i if index != nil { idx = index[i] } - bytes = append(bytes, vec.Value(idx)...) - offs = append(offs, uint32(len(bytes))) + if !utf8.Valid(vec.Value(idx)) { + errs = append(errs, i) + } + } + const errMsg = "invalid UTF-8" + if len(errs) == int(n) { + return nil, nil, errMsg, false + } + out := vector.Any(vector.NewString(vec.Table(), vec.Nulls)) + if index != nil { + out = vector.Pick(out, index) + } + if len(errs) > 0 { + out = vector.ReversePick(out, errs) } + return out, errs, errMsg, true case *vector.IP: for i := range n { idx := i @@ -115,7 +130,7 @@ func castToString(vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { offs = append(offs, uint32(len(bytes))) } } - return vector.NewString(vector.NewBytesTable(offs, bytes), nulls), nil, true + return vector.NewString(vector.NewBytesTable(offs, bytes), nulls), nil, "", true } func timeToString(vec *vector.Int, index []uint32, n uint32) ([]uint32, []byte) { diff --git a/runtime/vam/expr/cast/type.go b/runtime/vam/expr/cast/type.go index 0070eb18d8..5d8f5fa2b7 100644 --- a/runtime/vam/expr/cast/type.go +++ b/runtime/vam/expr/cast/type.go @@ -7,10 +7,10 @@ import ( "github.com/brimdata/super/vector/bitvec" ) -func castToType(sctx *super.Context, vec vector.Any, index []uint32) (vector.Any, []uint32, bool) { +func castToType(sctx *super.Context, vec vector.Any, index []uint32) (vector.Any, []uint32, string, bool) { switch vec := vec.(type) { case *vector.TypeValue: - return vec, nil, true + return vec, nil, "", true case *vector.String: n := lengthOf(vec, index) out := vector.NewTypeValueEmpty(0, bitvec.Zero) @@ -36,8 +36,8 @@ func castToType(sctx *super.Context, vec vector.Any, index []uint32) (vector.Any } out.Append(val.Bytes()) } - return out, errs, true + return out, errs, "", true default: - return nil, nil, false + return nil, nil, "", false } } diff --git a/runtime/ztests/expr/cast/string.yaml b/runtime/ztests/expr/cast/string.yaml index 6877c71ed3..5d9bb90cb1 100644 --- a/runtime/ztests/expr/cast/string.yaml +++ b/runtime/ztests/expr/cast/string.yaml @@ -21,6 +21,8 @@ input: | 1.2.3.4/32 0x68692c20776f726c64 0x666f6f20626172 + 0x00 + 0xc328 "hi, world" "foo bar" {foo:"bar"} @@ -46,6 +48,8 @@ output: | "1.2.3.4/32" "hi, world" "foo bar" + "\u0000" + error({message:"cannot cast to string: invalid UTF-8",on:0xc328}) "hi, world" "foo bar" "{foo:\"bar\"}"