@@ -9,6 +9,15 @@ import (
99 "unsafe"
1010)
1111
12+ // bytesToInt8Slice converts a byte slice to an int8 slice without copying.
13+ // This enables use of LoadInt8xNSlice functions which are safer than pointer casts.
14+ func bytesToInt8Slice (b []byte ) []int8 {
15+ if len (b ) == 0 {
16+ return nil
17+ }
18+ return unsafe .Slice ((* int8 )(unsafe .Pointer (unsafe .SliceData (b ))), len (b ))
19+ }
20+
1221// useAVX512 indicates whether AVX-512 instructions are available at runtime.
1322var useAVX512 bool
1423
@@ -177,7 +186,7 @@ func generateMasksAVX512(data []byte, separator byte) (quote, sep, cr, nl uint64
177186
178187// generateMasksAVX512WithCmp generates masks reusing pre-broadcasted comparators.
179188func generateMasksAVX512WithCmp (data []byte , quoteCmp , sepCmp , crCmp , nlCmp archsimd.Int8x64 ) (quote , sep , cr , nl uint64 ) {
180- chunk := archsimd .LoadInt8x64 (( * [ simdChunkSize ] int8 )( unsafe . Pointer ( & data [ 0 ]) ))
189+ chunk := archsimd .LoadInt8x64Slice ( bytesToInt8Slice ( data ))
181190 return chunk .Equal (quoteCmp ).ToBits (),
182191 chunk .Equal (sepCmp ).ToBits (),
183192 chunk .Equal (crCmp ).ToBits (),
@@ -222,17 +231,21 @@ func generateMasksPadded(data []byte, separator byte) (quote, sep, cr, nl uint64
222231}
223232
224233// generateMasksPaddedWithCmp is the AVX-512 version of generateMasksPadded.
234+ // Uses LoadInt8x64SlicePart to safely load partial chunks without manual padding.
225235func generateMasksPaddedWithCmp (data []byte , quoteCmp , sepCmp , crCmp , nlCmp archsimd.Int8x64 ) (quote , sep , cr , nl uint64 , validBits int ) {
226236 validBits = len (data )
227237 if validBits == 0 {
228238 return 0 , 0 , 0 , 0 , 0
229239 }
230240
231- var padded [simdChunkSize ]byte
232- copy (padded [:], data )
233-
234- quote , sep , cr , nl = generateMasksAVX512WithCmp (padded [:], quoteCmp , sepCmp , crCmp , nlCmp )
241+ // SlicePart safely loads partial data, zero-filling unused lanes
242+ chunk := archsimd .LoadInt8x64SlicePart (bytesToInt8Slice (data ))
243+ quote = chunk .Equal (quoteCmp ).ToBits ()
244+ sep = chunk .Equal (sepCmp ).ToBits ()
245+ cr = chunk .Equal (crCmp ).ToBits ()
246+ nl = chunk .Equal (nlCmp ).ToBits ()
235247
248+ // Mask out bits beyond valid data
236249 if validBits < simdChunkSize {
237250 mask := (uint64 (1 ) << validBits ) - 1
238251 quote &= mask
0 commit comments