From 0e88d4c7d3ff017e7f1eda6bc39918348c703395 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Mon, 24 Mar 2025 19:13:59 +0000 Subject: [PATCH 1/7] [msan] Handle SSE2 cvtps2dq/cvtpd2dq using handleSSEVectorConvertIntrinsicByProp This generalizes handleSSEVectorConvertIntrinsicByProp from https://github.com/llvm/llvm-project/pull/130705 to handle SSE intrinsics that do not have a rounding mode parameter. cvtps2dq/cvtpd2dq were previously handled strictly. --- .../Instrumentation/MemorySanitizer.cpp | 48 ++++++++++---- .../X86/sse2-intrinsics-x86.ll | 63 ++++++++----------- .../i386/sse2-intrinsics-i386.ll | 63 ++++++++----------- 3 files changed, 85 insertions(+), 89 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index e330c7c89b0c5..f17c5e254f85f 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3274,22 +3274,32 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - /// Handle x86 SSE single-precision to half-precision conversion. + /// Handle x86 SSE vector conversion. /// - /// e.g., + /// e.g., single-precision to half-precision conversion: /// <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) /// <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) + /// + /// floating-point to integer: + /// <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) + /// <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) + /// /// Note: if the output has more elements, they are zero-initialized (and /// therefore the shadow will also be initialized). /// /// This differs from handleSSEVectorConvertIntrinsic() because it /// propagates uninitialized shadow (instead of checking the shadow). - void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I) { - assert(I.arg_size() == 2); + void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I, bool HasRoundingMode = false) { + if (HasRoundingMode) { + assert(I.arg_size() == 2); + [[maybe_unused]] Value *RoundingMode = I.getArgOperand(1); + assert(RoundingMode->getType()->isIntegerTy()); + } else { + assert(I.arg_size() == 1); + } + Value *Src = I.getArgOperand(0); assert(Src->getType()->isVectorTy()); - [[maybe_unused]] Value *RoundingMode = I.getArgOperand(1); - assert(RoundingMode->getType()->isIntegerTy()); // The return type might have more elements than the input. // Temporarily shrink the return type's number of elements. @@ -3305,7 +3315,8 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *S0 = getShadow(&I, 0); /// For scalars: - /// Since they are converting from floating-point to integer, the output is + /// Since they are converting from floating-point to integer (or + /// vice-versa), the output is /// - fully uninitialized if *any* bit of the input is uninitialized /// - fully ininitialized if all bits of the input are ininitialized /// We apply the same principle on a per-field basis for vectors. @@ -4653,6 +4664,23 @@ struct MemorySanitizerVisitor : public InstVisitor { handleSSEVectorConvertIntrinsic(I, 2); break; + // TODO: + // <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) + // <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) + // <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) + + case Intrinsic::x86_vcvtps2ph_128: + case Intrinsic::x86_vcvtps2ph_256: { + handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ true); + break; + } + + case Intrinsic::x86_sse2_cvtps2dq: + case Intrinsic::x86_sse2_cvtpd2dq: { + handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ false); + break; + } + case Intrinsic::x86_avx512_psll_w_512: case Intrinsic::x86_avx512_psll_d_512: case Intrinsic::x86_avx512_psll_q_512: @@ -4998,12 +5026,6 @@ struct MemorySanitizerVisitor : public InstVisitor { break; } - case Intrinsic::x86_vcvtps2ph_128: - case Intrinsic::x86_vcvtps2ph_256: { - handleSSEVectorConvertIntrinsicByProp(I); - break; - } - case Intrinsic::fshl: case Intrinsic::fshr: handleFunnelShift(I); diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index 9d075f7974cd9..7398b9df2a99c 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -160,15 +160,11 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 { ; CHECK-LABEL: @test_x86_sse2_cvtpd2dq( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -181,18 +177,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-LABEL: @test_mm_cvtpd_epi32_zext( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> @@ -206,9 +200,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 2: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: 3: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -216,18 +210,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i1> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %a0 = load <2 x double>, ptr %p0 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) @@ -315,15 +307,10 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 { ; CHECK-LABEL: @test_x86_sse2_cvtps2dq( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll index 3e5db7822b0ef..55a52003db2e7 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll @@ -169,15 +169,11 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1:![0-9]+]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -191,18 +187,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> @@ -217,9 +211,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP7:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: 4: ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 @@ -227,18 +221,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -2147483649 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i1> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %a0 = load <2 x double>, ptr %p0 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) @@ -330,15 +322,10 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] From b696fdb7ba0de95d04e75d97ac1a1679bfbb6f1e Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Mon, 24 Mar 2025 19:27:22 +0000 Subject: [PATCH 2/7] clang-format --- .../Instrumentation/MemorySanitizer.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index f17c5e254f85f..2c94f5c9eba38 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3289,7 +3289,8 @@ struct MemorySanitizerVisitor : public InstVisitor { /// /// This differs from handleSSEVectorConvertIntrinsic() because it /// propagates uninitialized shadow (instead of checking the shadow). - void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I, bool HasRoundingMode = false) { + void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I, + bool HasRoundingMode = false) { if (HasRoundingMode) { assert(I.arg_size() == 2); [[maybe_unused]] Value *RoundingMode = I.getArgOperand(1); @@ -4664,20 +4665,20 @@ struct MemorySanitizerVisitor : public InstVisitor { handleSSEVectorConvertIntrinsic(I, 2); break; - // TODO: - // <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) - // <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) - // <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) + // TODO: + // <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) + // <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) + // <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) case Intrinsic::x86_vcvtps2ph_128: case Intrinsic::x86_vcvtps2ph_256: { - handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ true); + handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/true); break; } case Intrinsic::x86_sse2_cvtps2dq: case Intrinsic::x86_sse2_cvtpd2dq: { - handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ false); + handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/false); break; } From 07e09ba1ae6f1d71591bca57274ff8b2d132cafb Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Mon, 24 Mar 2025 23:36:05 +0000 Subject: [PATCH 3/7] Handled truncated forms cvttps2dq/cvttpd2dq --- .../Instrumentation/MemorySanitizer.cpp | 4 +- .../X86/sse2-intrinsics-x86.ll | 59 +++++++----------- .../i386/sse2-intrinsics-i386.ll | 61 ++++++++----------- 3 files changed, 50 insertions(+), 74 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 2c94f5c9eba38..1777a374e25cd 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4677,7 +4677,9 @@ struct MemorySanitizerVisitor : public InstVisitor { } case Intrinsic::x86_sse2_cvtps2dq: - case Intrinsic::x86_sse2_cvtpd2dq: { + case Intrinsic::x86_sse2_cvtpd2dq: + case Intrinsic::x86_sse2_cvttps2dq: + case Intrinsic::x86_sse2_cvttpd2dq: { handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/false); break; } diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index 7398b9df2a99c..da1ba77306020 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -435,15 +435,11 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 { ; CHECK-LABEL: @test_x86_sse2_cvttpd2dq( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -456,18 +452,16 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-LABEL: @test_mm_cvttpd_epi32_zext( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> @@ -491,18 +485,16 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i1> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %a0 = load <2 x double>, ptr %p0 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) @@ -516,15 +508,10 @@ define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 { ; CHECK-LABEL: @test_x86_sse2_cvttps2dq( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll index 55a52003db2e7..8dbaaf7231207 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll @@ -455,15 +455,11 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -477,18 +473,16 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> @@ -503,7 +497,7 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -513,18 +507,16 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -2147483649 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i1> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]]) -; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> -; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <2 x i64> [[BC]] +; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC1]] ; %a0 = load <2 x double>, ptr %p0 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) @@ -539,15 +531,10 @@ define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] From cd43d6877c31dc934ff5f8b20743dc38cef1c38a Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Mon, 24 Mar 2025 23:59:11 +0000 Subject: [PATCH 4/7] AVX variants --- .../Instrumentation/MemorySanitizer.cpp | 6 ++- .../MemorySanitizer/X86/avx-intrinsics-x86.ll | 44 +++++-------------- .../i386/avx-intrinsics-i386.ll | 44 +++++-------------- 3 files changed, 29 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 1777a374e25cd..1c4c2163fc5a0 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4679,7 +4679,11 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::x86_sse2_cvtps2dq: case Intrinsic::x86_sse2_cvtpd2dq: case Intrinsic::x86_sse2_cvttps2dq: - case Intrinsic::x86_sse2_cvttpd2dq: { + case Intrinsic::x86_sse2_cvttpd2dq: + case Intrinsic::x86_avx_cvt_ps2dq_256: + case Intrinsic::x86_avx_cvt_pd2dq_256: + case Intrinsic::x86_avx_cvtt_ps2dq_256: + case Intrinsic::x86_avx_cvtt_pd2dq_256: { handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/false); break; } diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index d85ab2c2c4bad..483afbdea1f62 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -324,15 +324,10 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-LABEL: @test_x86_avx_cvt_pd2dq_256( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -345,15 +340,10 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-LABEL: @test_x86_avx_cvt_ps2dq_256( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> [[A0:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] @@ -366,15 +356,10 @@ define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-LABEL: @test_x86_avx_cvtt_pd2dq_256( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -387,15 +372,10 @@ define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-LABEL: @test_x86_avx_cvtt_ps2dq_256( ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> [[A0:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll index c059a2f49617d..97beaf915e37d 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll @@ -333,15 +333,10 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -355,15 +350,10 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> [[A0:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] @@ -377,15 +367,10 @@ define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] @@ -399,15 +384,10 @@ define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> [[A0:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] From 50f982709cdd79d5b0a2a208c19e632d77ebd939 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 25 Mar 2025 01:39:03 +0000 Subject: [PATCH 5/7] Handle cvtpd2ps Add comments on strictly handled instructions Fix comment --- .../Instrumentation/MemorySanitizer.cpp | 4 +- .../MemorySanitizer/X86/avx-intrinsics-x86.ll | 13 +++++ .../MemorySanitizer/X86/mmx-intrinsics.ll | 14 ++++++ .../X86/sse2-intrinsics-x86.ll | 46 +++++++++--------- .../i386/avx-intrinsics-i386.ll | 13 +++++ .../MemorySanitizer/i386/mmx-intrinsics.ll | 14 ++++++ .../i386/sse2-intrinsics-i386.ll | 48 +++++++++---------- 7 files changed, 101 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 1c4c2163fc5a0..ac071190c7c14 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3316,8 +3316,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *S0 = getShadow(&I, 0); /// For scalars: - /// Since they are converting from floating-point to integer (or - /// vice-versa), the output is + /// Since they are converting to and/or from floating-point, the output is: /// - fully uninitialized if *any* bit of the input is uninitialized /// - fully ininitialized if all bits of the input are ininitialized /// We apply the same principle on a per-field basis for vectors. @@ -4676,6 +4675,7 @@ struct MemorySanitizerVisitor : public InstVisitor { break; } + case Intrinsic::x86_sse2_cvtpd2ps: case Intrinsic::x86_sse2_cvtps2dq: case Intrinsic::x86_sse2_cvtpd2dq: case Intrinsic::x86_sse2_cvttps2dq: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index 483afbdea1f62..4883fd1160e18 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -1,5 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s +; +; Handled strictly: +; - <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) +; - i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) +; - i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) +; - <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) +; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) +; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) +; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) +; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) +; - <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) +; - void @llvm.x86.avx.vzeroall() +; - void @llvm.x86.avx.vzeroupper() target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index 2ce4c49250d3f..ac3bb56719038 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -1,5 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s +; +; Handled strictly: +; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2 +; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2 +; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 +; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 +; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5 +; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5 +; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5 +; - <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16) +; - <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b) +; - void @llvm.x86.mmx.emms() +; - <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2) +; - i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2) target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index da1ba77306020..8f915a59db8e5 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -1,5 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s +; +; Handled strictly: +; - i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) +; - i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) +; - void @llvm.x86.sse2.pause() +; - void @llvm.x86.sse2.lfence() +; - void @llvm.x86.sse2.mfence() +; - void @llvm.x86.sse2.clflush(ptr %p) target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -233,15 +241,11 @@ define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 { ; CHECK-LABEL: @test_x86_sse2_cvtpd2ps( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] @@ -253,16 +257,13 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 { ; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext( ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) @@ -285,16 +286,13 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i1> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %a0 = load <2 x double>, ptr %p0 diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll index 97beaf915e37d..be6201a5f3b3a 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll @@ -1,5 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s +; +; Handled strictly: +; - <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) +; - i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) +; - i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) +; - <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) +; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) +; - <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) +; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) +; - <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) +; - <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) +; - void @llvm.x86.avx.vzeroall() +; - void @llvm.x86.avx.vzeroupper() target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "i386-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll index 5aafe10fd575a..0a3efaaea149f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -1,5 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s +; +; Handled strictly: +; - i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) #2 +; - void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) #2 +; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 +; - <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %4, i8 3) #5 +; - <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %4) #5 +; - <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) #5 +; - <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) #5 +; - <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16) +; - <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b) +; - void @llvm.x86.mmx.emms() +; - <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2) +; - i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2) target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "i386-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll index 8dbaaf7231207..e771e60e2f294 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll @@ -1,5 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s +; +; Handled strictly: +; - i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) +; - i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) +; - void @llvm.x86.sse2.pause() +; - void @llvm.x86.sse2.lfence() +; - void @llvm.x86.sse2.mfence() +; - void @llvm.x86.sse2.clflush(ptr %p) target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "i386-unknown-linux-gnu" @@ -245,15 +253,11 @@ define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] @@ -266,16 +270,13 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) @@ -289,7 +290,7 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 3: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] ; CHECK-NEXT: unreachable @@ -299,16 +300,13 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { ; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -2147483649 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] -; CHECK: 9: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] -; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i1> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %a0 = load <2 x double>, ptr %p0 From 43b676c67455adfd9b2abfd515cebff0f9d0302d Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 25 Mar 2025 01:46:12 +0000 Subject: [PATCH 6/7] Handle llvm.x86.avx.cvt.pd2.ps.256 --- .../Transforms/Instrumentation/MemorySanitizer.cpp | 1 + .../MemorySanitizer/X86/avx-intrinsics-x86.ll | 14 ++++---------- .../MemorySanitizer/i386/avx-intrinsics-i386.ll | 14 ++++---------- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ac071190c7c14..c4fcf55898832 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4680,6 +4680,7 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::x86_sse2_cvtpd2dq: case Intrinsic::x86_sse2_cvttps2dq: case Intrinsic::x86_sse2_cvttpd2dq: + case Intrinsic::x86_avx_cvt_pd2_ps_256: case Intrinsic::x86_avx_cvt_ps2dq_256: case Intrinsic::x86_avx_cvt_pd2dq_256: case Intrinsic::x86_avx_cvtt_ps2dq_256: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index 4883fd1160e18..d1655bfbb4d23 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -2,7 +2,6 @@ ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s ; ; Handled strictly: -; - <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; - i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; - i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; - <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) @@ -316,15 +315,10 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 { ; CHECK-LABEL: @test_x86_avx_cvt_pd2_ps_256( ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] -; CHECK: 3: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] @@ -504,7 +498,7 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll index be6201a5f3b3a..06c62dd44c99f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll @@ -2,7 +2,6 @@ ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s ; ; Handled strictly: -; - <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; - i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; - i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; - <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) @@ -324,15 +323,10 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1:![0-9]+]] -; CHECK: 4: -; CHECK-NEXT: call void @__msan_warning_noreturn() -; CHECK-NEXT: unreachable -; CHECK: 5: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> [[A0:%.*]]) -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES]] ; %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] @@ -522,7 +516,7 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable From 12b51911dca8b7a0658e42f9e21b1460da3c8b81 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 25 Mar 2025 01:46:35 +0000 Subject: [PATCH 7/7] Remove default --- llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index c4fcf55898832..612aaa5dbd43c 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3290,7 +3290,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// This differs from handleSSEVectorConvertIntrinsic() because it /// propagates uninitialized shadow (instead of checking the shadow). void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I, - bool HasRoundingMode = false) { + bool HasRoundingMode) { if (HasRoundingMode) { assert(I.arg_size() == 2); [[maybe_unused]] Value *RoundingMode = I.getArgOperand(1);