@@ -1102,7 +1102,8 @@ TYPED_TEST(TestStringKernels, Utf8Reverse) {
11021102
11031103 // inputs with malformed utf8 chars would produce garbage output, but the end result
11041104 // would produce arrays with same lengths. Hence checking offset buffer equality
1105- auto malformed_input = ArrayFromJSON (this ->type (), " [\" ɑ\xFF ɑa\" , \" ɽ\xe1\xbd ɽa\" ]" );
1105+ // Use MakeArray for invalid UTF-8 since simdjson validates UTF-8 in JSON
1106+ auto malformed_input = this ->MakeArray ({" ɑ\xFF ɑa" , " ɽ\xe1\xbd ɽa" });
11061107 const Result<Datum>& res = CallFunction (" utf8_reverse" , {malformed_input});
11071108 ASSERT_TRUE (res->array ()->buffers [1 ]->Equals (*malformed_input->data ()->buffers [1 ]));
11081109}
@@ -2513,104 +2514,99 @@ TYPED_TEST(TestBinaryKernels, SliceBytesBasic) {
25132514}
25142515
25152516TYPED_TEST (TestBinaryKernels, SliceBytesPosPos) {
2517+ // Use MakeArray for invalid UTF-8 since simdjson validates UTF-8 in JSON
25162518 SliceOptions options{2 , 4 };
25172519 this ->CheckUnary (
25182520 " binary_slice" ,
2519- " [ \"\" , \" a \ " , \ " ab\ " , \ " a\xc2\xa2\ " , \ " ab\xc2\xa2\ " , \ " ab\xc2\xff Z\" ] " ,
2520- this ->type (), " [ \"\" , \"\ " , \"\ " , \ "\xa2\ " , \ "\xc2\xa2\ " , \ "\xc2\xff\" ] " , &options);
2521+ this -> MakeArray ({ " " , " a " , " ab" , " a\xc2\xa2 " , " ab\xc2\xa2 " , " ab\xc2\xff Z" }) ,
2522+ this ->MakeArray ({ " " , " " , " " , " \xa2 " , " \xc2\xa2 " , " \xc2\xff " }) , &options);
25212523 SliceOptions options_step{1 , 5 , 2 };
25222524 this ->CheckUnary (
25232525 " binary_slice" ,
2524- " [ \"\" , \" a \ " , \ " ab\ " , \ " a\xc2\xa2\ " , \ " ab\xc2\xa2\ " , \ " ab\xc2\xff Z\" ] " ,
2525- this ->type (), " [ \"\" , \"\ " , \" b \ " , \ "\xc2\ " , \ " b\xa2\ " , \ " b\xff\" ] " , &options_step);
2526+ this -> MakeArray ({ " " , " a " , " ab" , " a\xc2\xa2 " , " ab\xc2\xa2 " , " ab\xc2\xff Z" }) ,
2527+ this ->MakeArray ({ " " , " " , " b " , " \xc2 " , " b\xa2 " , " b\xff " }) , &options_step);
25262528 SliceOptions options_step_neg{5 , 1 , -2 };
25272529 this ->CheckUnary (
25282530 " binary_slice" ,
2529- " [\"\" , \" a\" , \" ab\" , \" a\xc2\xa2\" , \" ab\xc2\xa2\" , \" ab\xc2\xff Z\" ]" ,
2530- this ->type (), " [\"\" , \"\" , \"\" , \"\xa2\" , \"\xa2\" , \" Z\xc2\" ]" ,
2531- &options_step_neg);
2531+ this ->MakeArray ({" " , " a" , " ab" , " a\xc2\xa2 " , " ab\xc2\xa2 " , " ab\xc2\xff Z" }),
2532+ this ->MakeArray ({" " , " " , " " , " \xa2 " , " \xa2 " , " Z\xc2 " }), &options_step_neg);
25322533 options_step_neg.stop = 0 ;
25332534 this ->CheckUnary (
25342535 " binary_slice" ,
2535- " [\"\" , \" a\" , \" ab\" , \" a\xc2\xa2\" , \" aZ\xc2\xa2\" , \" ab\xc2\xff Z\" ]" ,
2536- this ->type (), " [\"\" , \"\" , \" b\" , \"\xa2\" , \"\xa2 Z\" , \" Z\xc2\" ]" ,
2537- &options_step_neg);
2536+ this ->MakeArray ({" " , " a" , " ab" , " a\xc2\xa2 " , " aZ\xc2\xa2 " , " ab\xc2\xff Z" }),
2537+ this ->MakeArray ({" " , " " , " b" , " \xa2 " , " \xa2 Z" , " Z\xc2 " }), &options_step_neg);
25382538}
25392539
25402540TYPED_TEST (TestBinaryKernels, SliceBytesPosNeg) {
2541+ // Use MakeArray for invalid UTF-8 since simdjson validates UTF-8 in JSON
25412542 SliceOptions options{2 , -1 };
25422543 this ->CheckUnary (
25432544 " binary_slice" ,
2544- " [ \"\" , \" a \ " , \ " ab\ " , \ " a\xc2\xa2\ " , \ " aZ\xc2\xa2\ " , \ " ab\xc2\xff Z\" ] " ,
2545- this ->type (), " [ \"\" , \"\ " , \"\ " , \"\ " , \ "\xc2\ " , \ "\xc2\xff\" ] " , &options);
2545+ this -> MakeArray ({ " " , " a " , " ab" , " a\xc2\xa2 " , " aZ\xc2\xa2 " , " ab\xc2\xff Z" }) ,
2546+ this ->MakeArray ({ " " , " " , " " , " " , " \xc2 " , " \xc2\xff " }) , &options);
25462547 SliceOptions options_step{1 , -1 , 2 };
25472548 this ->CheckUnary (
25482549 " binary_slice" ,
2549- " [ \"\" , \" a \ " , \ " ab\ " , \ " a\xc2\xa2\ " , \ " aZ\xc2\xa2\ " , \ " ab\xc2\xff Z\" ] " ,
2550- this ->type (), " [ \"\" , \"\ " , \"\ " , \ "\xc2\ " , \" Z \ " , \ " b\xff\" ] " , &options_step);
2550+ this -> MakeArray ({ " " , " a " , " ab" , " a\xc2\xa2 " , " aZ\xc2\xa2 " , " ab\xc2\xff Z" }) ,
2551+ this ->MakeArray ({ " " , " " , " " , " \xc2 " , " Z " , " b\xff " }) , &options_step);
25512552 SliceOptions options_step_neg{3 , -4 , -2 };
25522553 this ->CheckUnary (
25532554 " binary_slice" ,
2554- " [\"\" , \" a\" , \" ab\" , \" Z\xc2\xa2\" , \" aZ\xc2\xa2\" , \" ab\xc2\xff Z\" ]" ,
2555- this ->type (), " [\"\" , \" a\" , \" b\" , \"\xa2 Z\" , \"\xa2 Z\" , \"\xff\" ]" ,
2556- &options_step_neg);
2555+ this ->MakeArray ({" " , " a" , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " ab\xc2\xff Z" }),
2556+ this ->MakeArray ({" " , " a" , " b" , " \xa2 Z" , " \xa2 Z" , " \xff " }), &options_step_neg);
25572557 options_step_neg.stop = -5 ;
25582558 this ->CheckUnary (
25592559 " binary_slice" ,
2560- " [\"\" , \" a\" , \" ab\" , \" Z\xc2\xa2\" , \" aZ\xc2\xa2\" , \" aP\xc2\xff Z\" ]" ,
2561- this ->type (), " [\"\" , \" a\" , \" b\" , \"\xa2 Z\" , \"\xa2 Z\" , \"\xff P\" ]" ,
2562- &options_step_neg);
2560+ this ->MakeArray ({" " , " a" , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }),
2561+ this ->MakeArray ({" " , " a" , " b" , " \xa2 Z" , " \xa2 Z" , " \xff P" }), &options_step_neg);
25632562}
25642563
25652564TYPED_TEST (TestBinaryKernels, SliceBytesNegNeg) {
2565+ // Use MakeArray for invalid UTF-8 since simdjson validates UTF-8 in JSON
25662566 SliceOptions options{-2 , -1 };
25672567 this ->CheckUnary (
25682568 " binary_slice" ,
2569- " [ \"\" , \" a \ " , \ " ab\ " , \ " Z\xc2\xa2\ " , \ " aZ\xc2\xa2\ " , \ " ab\xc2\xff Z\" ] " ,
2570- this ->type (), " [ \"\" , \"\ " , \" a \ " , \ "\xc2\ " , \ "\xc2\ " , \ "\xff\" ] " , &options);
2569+ this -> MakeArray ({ " " , " a " , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " ab\xc2\xff Z" }) ,
2570+ this ->MakeArray ({ " " , " " , " a " , " \xc2 " , " \xc2 " , " \xff " }) , &options);
25712571 SliceOptions options_step{-4 , -1 , 2 };
25722572 this ->CheckUnary (
25732573 " binary_slice" ,
2574- " [ \"\" , \" a \ " , \ " ab\ " , \ " Z\xc2\xa2\ " , \ " aZ\xc2\xa2\ " , \ " aP\xc2\xff Z\" ] " ,
2575- this ->type (), " [ \"\" , \"\ " , \" a \ " , \" Z \ " , \ " a\xc2\ " , \ " P\xff\" ] " , &options_step);
2574+ this -> MakeArray ({ " " , " a " , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }) ,
2575+ this ->MakeArray ({ " " , " " , " a " , " Z " , " a\xc2 " , " P\xff " }) , &options_step);
25762576 SliceOptions options_step_neg{-1 , -3 , -2 };
25772577 this ->CheckUnary (
25782578 " binary_slice" ,
2579- " [ \"\" , \" a \ " , \ " ab\ " , \ " Z\xc2\xa2\ " , \ " aZ\xc2\xa2\ " , \ " aP\xc2\xff Z\" ] " ,
2580- this ->type (), " [ \"\" , \" a \ " , \" b \ " , \ "\xa2\ " , \ "\xa2\ " , \" Z \" ] " , &options_step_neg);
2579+ this -> MakeArray ({ " " , " a " , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }) ,
2580+ this ->MakeArray ({ " " , " a " , " b " , " \xa2 " , " \xa2 " , " Z " }) , &options_step_neg);
25812581 options_step_neg.stop = -4 ;
25822582 this ->CheckUnary (
25832583 " binary_slice" ,
2584- " [\"\" , \" a\" , \" ab\" , \" Z\xc2\xa2\" , \" aZ\xc2\xa2\" , \" aP\xc2\xff Z\" ]" ,
2585- this ->type (), " [\"\" , \" a\" , \" b\" , \"\xa2 Z\" , \"\xa2 Z\" , \" Z\xc2\" ]" ,
2586- &options_step_neg);
2584+ this ->MakeArray ({" " , " a" , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }),
2585+ this ->MakeArray ({" " , " a" , " b" , " \xa2 Z" , " \xa2 Z" , " Z\xc2 " }), &options_step_neg);
25872586}
25882587
25892588TYPED_TEST (TestBinaryKernels, SliceBytesNegPos) {
2589+ // Use MakeArray for invalid UTF-8 since simdjson validates UTF-8 in JSON
25902590 SliceOptions options{-2 , 4 };
25912591 this ->CheckUnary (
25922592 " binary_slice" ,
2593- " [\"\" , \" a\" , \" ab\" , \" Z\xc2\xa2\" , \" aZ\xc2\xa2\" , \" aP\xc2\xff Z\" ]" ,
2594- this ->type (), " [\"\" , \" a\" , \" ab\" , \"\xc2\xa2\" , \"\xc2\xa2\" , \"\xff\" ]" ,
2595- &options);
2593+ this ->MakeArray ({" " , " a" , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }),
2594+ this ->MakeArray ({" " , " a" , " ab" , " \xc2\xa2 " , " \xc2\xa2 " , " \xff " }), &options);
25962595 SliceOptions options_step{-4 , 4 , 2 };
25972596 this ->CheckUnary (
25982597 " binary_slice" ,
2599- " [\"\" , \" a\" , \" ab\" , \" Z\xc2\xa2\" , \" aZ\xc2\xa2\" , \" aP\xc2\xff Z\" ]" ,
2600- this ->type (), " [\"\" , \" a\" , \" a\" , \" Z\xa2\" , \" a\xc2\" , \" P\xff\" ]" ,
2601- &options_step);
2598+ this ->MakeArray ({" " , " a" , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }),
2599+ this ->MakeArray ({" " , " a" , " a" , " Z\xa2 " , " a\xc2 " , " P\xff " }), &options_step);
26022600 SliceOptions options_step_neg{-1 , 1 , -2 };
26032601 this ->CheckUnary (
26042602 " binary_slice" ,
2605- " [\"\" , \" a\" , \" ab\" , \" Z\xc2\xa2\" , \" aZ\xc2\xa2\" , \" aP\xc2\xff Z\" ]" ,
2606- this ->type (), " [\"\" , \"\" , \"\" , \"\xa2\" , \"\xa2\" , \" Z\xc2\" ]" ,
2607- &options_step_neg);
2603+ this ->MakeArray ({" " , " a" , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }),
2604+ this ->MakeArray ({" " , " " , " " , " \xa2 " , " \xa2 " , " Z\xc2 " }), &options_step_neg);
26082605 options_step_neg.stop = 0 ;
26092606 this ->CheckUnary (
26102607 " binary_slice" ,
2611- " [\"\" , \" a\" , \" ab\" , \" Z\xc2\xa2\" , \" aZ\xc2\xa2\" , \" aP\xc2\xff Z\" ]" ,
2612- this ->type (), " [\"\" , \"\" , \" b\" , \"\xa2\" , \"\xa2 Z\" , \" Z\xc2\" ]" ,
2613- &options_step_neg);
2608+ this ->MakeArray ({" " , " a" , " ab" , " Z\xc2\xa2 " , " aZ\xc2\xa2 " , " aP\xc2\xff Z" }),
2609+ this ->MakeArray ({" " , " " , " b" , " \xa2 " , " \xa2 Z" , " Z\xc2 " }), &options_step_neg);
26142610}
26152611
26162612TYPED_TEST (TestStringKernels, PadAscii) {
0 commit comments