@@ -194,17 +194,23 @@ public static long ipByteBin128(byte[] q, byte[] d) {
194194        VectorShape .forBitSize (INT_SPECIES_256 .vectorBitSize () / Integer .BYTES )
195195    );
196196
197+     private  static  int  limit (int  length , int  sectionSize ) {
198+         return  length  - (length  % sectionSize );
199+     }
200+ 
197201    static  int  ipByteBit512 (byte [] q , byte [] d ) {
198202        assert  q .length  == d .length  * Byte .SIZE ;
199203        int  i  = 0 ;
200204        int  sum  = 0 ;
201205
202-         if  (q .length  >= INT_SPECIES_512 .length () * 4 ) {
206+         int  sectionLength  = INT_SPECIES_512 .length () * 4 ;
207+         if  (q .length  >= sectionLength ) {
203208            IntVector  acc0  = IntVector .zero (INT_SPECIES_512 );
204209            IntVector  acc1  = IntVector .zero (INT_SPECIES_512 );
205210            IntVector  acc2  = IntVector .zero (INT_SPECIES_512 );
206211            IntVector  acc3  = IntVector .zero (INT_SPECIES_512 );
207-             for  (; i  < INT_SPECIES_512 .loopBound (q .length ); i  += INT_SPECIES_512 .length () * 4 ) {
212+             int  limit  = limit (q .length , sectionLength );
213+             for  (; i  < limit ; i  += sectionLength ) {
208214                var  vals0  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_512 , q , i ).castShape (INT_SPECIES_512 , 0 );
209215                var  vals1  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_512 , q , i  + INT_SPECIES_512 .length ()).castShape (INT_SPECIES_512 , 0 );
210216                var  vals2  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_512 , q , i  + INT_SPECIES_512 .length () * 2 )
@@ -227,12 +233,14 @@ static int ipByteBit512(byte[] q, byte[] d) {
227233                + acc3 .reduceLanes (VectorOperators .ADD );
228234        }
229235
230-         if  (q .length  - i  >= INT_SPECIES_256 .length () * 4 ) {
236+         sectionLength  = INT_SPECIES_256 .length () * 4 ;
237+         if  (q .length  - i  >= sectionLength ) {
231238            IntVector  acc0  = IntVector .zero (INT_SPECIES_256 );
232239            IntVector  acc1  = IntVector .zero (INT_SPECIES_256 );
233240            IntVector  acc2  = IntVector .zero (INT_SPECIES_256 );
234241            IntVector  acc3  = IntVector .zero (INT_SPECIES_256 );
235-             for  (; i  < INT_SPECIES_256 .loopBound (q .length ); i  += INT_SPECIES_256 .length () * 4 ) {
242+             int  limit  = limit (q .length , sectionLength );
243+             for  (; i  < limit ; i  += sectionLength ) {
236244                var  vals0  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_256 , q , i ).castShape (INT_SPECIES_256 , 0 );
237245                var  vals1  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_256 , q , i  + INT_SPECIES_256 .length ()).castShape (INT_SPECIES_256 , 0 );
238246                var  vals2  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_256 , q , i  + INT_SPECIES_256 .length () * 2 )
@@ -257,7 +265,8 @@ static int ipByteBit512(byte[] q, byte[] d) {
257265
258266        if  (i  < q .length ) {
259267            // do the tail 
260-             sum  += DefaultESVectorUtilSupport .ipByteBitImpl (q , d , i );
268+             // default implementation uses length of data vector, not query vector 
269+             sum  += DefaultESVectorUtilSupport .ipByteBitImpl (q , d , i  / 8 );
261270        }
262271        return  sum ;
263272    }
@@ -267,12 +276,14 @@ static int ipByteBit256(byte[] q, byte[] d) {
267276        int  i  = 0 ;
268277        int  sum  = 0 ;
269278
270-         if  (q .length  >= INT_SPECIES_256 .length () * 4 ) {
279+         int  sectionLength  = INT_SPECIES_256 .length () * 4 ;
280+         if  (q .length  >= sectionLength ) {
271281            IntVector  acc0  = IntVector .zero (INT_SPECIES_256 );
272282            IntVector  acc1  = IntVector .zero (INT_SPECIES_256 );
273283            IntVector  acc2  = IntVector .zero (INT_SPECIES_256 );
274284            IntVector  acc3  = IntVector .zero (INT_SPECIES_256 );
275-             for  (; i  < INT_SPECIES_256 .loopBound (q .length ); i  += INT_SPECIES_256 .length () * 4 ) {
285+             int  limit  = limit (q .length , sectionLength );
286+             for  (; i  < limit ; i  += sectionLength ) {
276287                var  vals0  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_256 , q , i ).castShape (INT_SPECIES_256 , 0 );
277288                var  vals1  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_256 , q , i  + INT_SPECIES_256 .length ()).castShape (INT_SPECIES_256 , 0 );
278289                var  vals2  = ByteVector .fromArray (BYTE_SPECIES_FOR_INT_256 , q , i  + INT_SPECIES_256 .length () * 2 )
@@ -297,7 +308,8 @@ static int ipByteBit256(byte[] q, byte[] d) {
297308
298309        if  (i  < q .length ) {
299310            // do the tail 
300-             sum  += DefaultESVectorUtilSupport .ipByteBitImpl (q , d , i );
311+             // default implementation uses length of data vector, not query vector 
312+             sum  += DefaultESVectorUtilSupport .ipByteBitImpl (q , d , i  / 8 );
301313        }
302314        return  sum ;
303315    }
@@ -310,12 +322,14 @@ static float ipFloatBit512(float[] q, byte[] d) {
310322        int  i  = 0 ;
311323        float  sum  = 0 ;
312324
313-         if  (q .length  >= FLOAT_SPECIES_512 .length () * 4 ) {
325+         int  sectionLength  = FLOAT_SPECIES_512 .length () * 4 ;
326+         if  (q .length  >= sectionLength ) {
314327            FloatVector  acc0  = FloatVector .zero (FLOAT_SPECIES_512 );
315328            FloatVector  acc1  = FloatVector .zero (FLOAT_SPECIES_512 );
316329            FloatVector  acc2  = FloatVector .zero (FLOAT_SPECIES_512 );
317330            FloatVector  acc3  = FloatVector .zero (FLOAT_SPECIES_512 );
318-             for  (; i  < FLOAT_SPECIES_512 .loopBound (q .length ); i  += FLOAT_SPECIES_512 .length () * 4 ) {
331+             int  limit  = limit (q .length , sectionLength );
332+             for  (; i  < limit ; i  += sectionLength ) {
319333                var  floats0  = FloatVector .fromArray (FLOAT_SPECIES_512 , q , i );
320334                var  floats1  = FloatVector .fromArray (FLOAT_SPECIES_512 , q , i  + FLOAT_SPECIES_512 .length ());
321335                var  floats2  = FloatVector .fromArray (FLOAT_SPECIES_512 , q , i  + FLOAT_SPECIES_512 .length () * 2 );
@@ -336,12 +350,14 @@ static float ipFloatBit512(float[] q, byte[] d) {
336350                + acc3 .reduceLanes (VectorOperators .ADD );
337351        }
338352
339-         if  (q .length  - i  >= FLOAT_SPECIES_256 .length () * 4 ) {
353+         sectionLength  = FLOAT_SPECIES_256 .length () * 4 ;
354+         if  (q .length  - i  >= sectionLength ) {
340355            FloatVector  acc0  = FloatVector .zero (FLOAT_SPECIES_256 );
341356            FloatVector  acc1  = FloatVector .zero (FLOAT_SPECIES_256 );
342357            FloatVector  acc2  = FloatVector .zero (FLOAT_SPECIES_256 );
343358            FloatVector  acc3  = FloatVector .zero (FLOAT_SPECIES_256 );
344-             for  (; i  < FLOAT_SPECIES_256 .loopBound (q .length ); i  += FLOAT_SPECIES_256 .length () * 4 ) {
359+             int  limit  = limit (q .length , sectionLength );
360+             for  (; i  < limit ; i  += sectionLength ) {
345361                var  floats0  = FloatVector .fromArray (FLOAT_SPECIES_256 , q , i );
346362                var  floats1  = FloatVector .fromArray (FLOAT_SPECIES_256 , q , i  + FLOAT_SPECIES_256 .length ());
347363                var  floats2  = FloatVector .fromArray (FLOAT_SPECIES_256 , q , i  + FLOAT_SPECIES_256 .length () * 2 );
@@ -364,7 +380,8 @@ static float ipFloatBit512(float[] q, byte[] d) {
364380
365381        if  (i  < q .length ) {
366382            // do the tail 
367-             sum  += DefaultESVectorUtilSupport .ipFloatBitImpl (q , d , i );
383+             // default implementation uses length of data vector, not query vector 
384+             sum  += DefaultESVectorUtilSupport .ipFloatBitImpl (q , d , i  / 8 );
368385        }
369386
370387        return  sum ;
@@ -375,12 +392,14 @@ static float ipFloatBit256(float[] q, byte[] d) {
375392        int  i  = 0 ;
376393        float  sum  = 0 ;
377394
378-         if  (q .length  >= FLOAT_SPECIES_256 .length () * 4 ) {
395+         int  sectionLength  = FLOAT_SPECIES_256 .length () * 4 ;
396+         if  (q .length  >= sectionLength ) {
379397            FloatVector  acc0  = FloatVector .zero (FLOAT_SPECIES_256 );
380398            FloatVector  acc1  = FloatVector .zero (FLOAT_SPECIES_256 );
381399            FloatVector  acc2  = FloatVector .zero (FLOAT_SPECIES_256 );
382400            FloatVector  acc3  = FloatVector .zero (FLOAT_SPECIES_256 );
383-             for  (; i  < FLOAT_SPECIES_256 .loopBound (q .length ); i  += FLOAT_SPECIES_256 .length () * 4 ) {
401+             int  limit  = limit (q .length , sectionLength );
402+             for  (; i  < limit ; i  += sectionLength ) {
384403                var  floats0  = FloatVector .fromArray (FLOAT_SPECIES_256 , q , i );
385404                var  floats1  = FloatVector .fromArray (FLOAT_SPECIES_256 , q , i  + FLOAT_SPECIES_256 .length ());
386405                var  floats2  = FloatVector .fromArray (FLOAT_SPECIES_256 , q , i  + FLOAT_SPECIES_256 .length () * 2 );
@@ -403,7 +422,8 @@ static float ipFloatBit256(float[] q, byte[] d) {
403422
404423        if  (i  < q .length ) {
405424            // do the tail 
406-             sum  += DefaultESVectorUtilSupport .ipFloatBitImpl (q , d , i );
425+             // default implementation uses length of data vector, not query vector 
426+             sum  += DefaultESVectorUtilSupport .ipFloatBitImpl (q , d , i  / 8 );
407427        }
408428
409429        return  sum ;
0 commit comments