@@ -214,7 +214,8 @@ size_t match_scalar(const uint32_t *A, const size_t lenA, const uint32_t *B,
214214 */
215215size_t v1 (const uint32_t *rare, size_t lenRare, const uint32_t *freq,
216216 size_t lenFreq, uint32_t *matchOut) {
217- assert (lenRare <= lenFreq);
217+ if (matchOut == freq) { throw invalid_argument (" matchOut should not be freq, when in doubt, use a distinct output buffer." ); }
218+ if (lenRare > lenFreq) { throw invalid_argument (" mismatch freq/rare (programming error?)." ); }
218219 const uint32_t *matchOrig = matchOut;
219220 if (lenFreq == 0 || lenRare == 0 )
220221 return 0 ;
@@ -313,15 +314,16 @@ size_t v1(const uint32_t *rare, size_t lenRare, const uint32_t *freq,
313314 * as well as lenRare and lenFreq could lead to significant performance
314315 * differences.
315316 *
316- * The matchOut pointer can safely be equal to the rare pointer.
317+ * The out pointer can safely be equal to the rare pointer.
317318 *
318319 * This function DOES NOT use inline assembly instructions. Just intrinsics.
319320 */
320321size_t v3 (const uint32_t *rare, const size_t lenRare, const uint32_t *freq,
321322 const size_t lenFreq, uint32_t *out) {
322323 if (lenFreq == 0 || lenRare == 0 )
323324 return 0 ;
324- assert (lenRare <= lenFreq);
325+ if (out == freq) { throw invalid_argument (" matchOut should not be freq, when in doubt, use a distinct output buffer." ); }
326+ if (lenRare > lenFreq) { throw invalid_argument (" mismatch freq/rare (programming error?)." ); }
325327 const uint32_t *const initout (out);
326328 typedef __m128i vec;
327329 const uint32_t veclen = sizeof (vec) / sizeof (uint32_t );
@@ -497,7 +499,7 @@ size_t v3(const uint32_t *rare, const size_t lenRare, const uint32_t *freq,
497499 * as well as lenRare and lenFreq could lead to significant performance
498500 * differences.
499501 *
500- * The matchOut pointer can safely be equal to the rare pointer.
502+ * The out pointer can safely be equal to the rare pointer.
501503 *
502504 * This function DOES NOT use assembly. It only relies on intrinsics.
503505 */
@@ -506,7 +508,8 @@ size_t SIMDgalloping(const uint32_t *rare, const size_t lenRare,
506508 uint32_t *out) {
507509 if (lenFreq == 0 || lenRare == 0 )
508510 return 0 ;
509- assert (lenRare <= lenFreq);
511+ if (out == freq) { throw invalid_argument (" matchOut should not be freq, when in doubt, use a distinct output buffer." ); }
512+ if (lenRare > lenFreq) { throw invalid_argument (" mismatch freq/rare (programming error?)." ); }
510513 const uint32_t *const initout (out);
511514 typedef __m128i vec;
512515 const uint32_t veclen = sizeof (vec) / sizeof (uint32_t );
@@ -706,6 +709,9 @@ size_t SIMDgalloping(const uint32_t *rare, const size_t lenRare,
706709size_t SIMDintersection (const uint32_t *set1, const size_t length1,
707710 const uint32_t *set2, const size_t length2,
708711 uint32_t *out) {
712+ if (((length1 > length2) && (out == set1)) || ((length2 > length1) && (out == set2))) {
713+ throw invalid_argument (" out should not be equal to the largest array." );
714+ }
709715 if ((length1 == 0 ) or (length2 == 0 ))
710716 return 0 ;
711717
@@ -722,7 +728,12 @@ size_t SIMDintersection(const uint32_t *set1, const size_t length1,
722728 else
723729 return v3 (set2, length2, set1, length1, out);
724730 }
725-
731+ if (length1 == length2) {
732+ if (out == set1)
733+ return v1 (set1, length1, set2, length2, out);
734+ else
735+ return v1 (set2, length2, set1, length1, out);
736+ }
726737 if (length1 <= length2)
727738 return v1 (set1, length1, set2, length2, out);
728739 else
0 commit comments