Skip to content

Conversation

@fmayer
Copy link
Contributor

@fmayer fmayer commented Jul 7, 2025

No description provided.

Created using spr 1.3.4
@llvmbot
Copy link
Member

llvmbot commented Jul 7, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Florian Mayer (fmayer)

Changes

Patch is 68.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147407.diff

5 Files Affected:

  • (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+1-1)
  • (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll (+86-25)
  • (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll (+240-30)
  • (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll (+240-30)
  • (modified) llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll (+86-25)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ec94dcaa2c051..5a6520e964c25 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4297,7 +4297,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   void handleAVXVpermilvar(IntrinsicInst &I) {
     IRBuilder<> IRB(&I);
     Value *Shadow = getShadow(&I, 0);
-    insertShadowCheck(I.getArgOperand(1), &I);
+    maskedCheckAVXIndexShadow(IRB, I.getArgOperand(1), &I);
 
     // Shadows are integer-ish types but some intrinsics require a
     // different (e.g., floating-point) type.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
index c4016e749e70c..6542e56b3fefe 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
@@ -950,16 +950,21 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = and i64 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP9]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double>
 ; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK:       7:
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK:       12:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
-; CHECK:       8:
+; CHECK:       13:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]])
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x double> [[RES1]]
@@ -975,16 +980,27 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP15:%.*]] = and i64 [[TMP3]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP15]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = and i64 [[TMP9]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
+; CHECK-NEXT:    [[TMP13:%.*]] = and i64 [[TMP12]], 3
+; CHECK-NEXT:    [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK:       7:
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
+; CHECK:       18:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
-; CHECK:       8:
+; CHECK:       19:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]])
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x double> [[RES1]]
@@ -1014,16 +1030,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP15:%.*]] = and i32 [[TMP3]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP15]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP6]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = and i32 [[TMP9]], 3
+; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[TMP12]], 3
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK:       7:
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP14]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
+; CHECK:       18:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
-; CHECK:       8:
+; CHECK:       19:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]])
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x float> [[RES1]]
@@ -1047,16 +1074,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = and i32 [[TMP8]], 3
+; CHECK-NEXT:    [[TMP20:%.*]] = or i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 1
+; CHECK-NEXT:    [[TMP12:%.*]] = and i32 [[TMP11]], 3
+; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[TMP11]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 2
+; CHECK-NEXT:    [[TMP15:%.*]] = and i32 [[TMP14]], 3
+; CHECK-NEXT:    [[TMP16:%.*]] = or i32 [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 3
+; CHECK-NEXT:    [[TMP18:%.*]] = and i32 [[TMP17]], 3
+; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[TMP17]], [[TMP18]]
 ; CHECK-NEXT:    [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK:       12:
+; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP19]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
+; CHECK:       23:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
-; CHECK:       13:
+; CHECK:       24:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]])
 ; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x float> [[RES1]]
@@ -1073,16 +1111,39 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP27:%.*]] = and i32 [[TMP3]], 7
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP27]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[TMP6]], 7
+; CHECK-NEXT:    [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = and i32 [[TMP9]], 7
+; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[TMP12]], 7
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
+; CHECK-NEXT:    [[TMP16:%.*]] = and i32 [[TMP15]], 7
+; CHECK-NEXT:    [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
+; CHECK-NEXT:    [[TMP19:%.*]] = and i32 [[TMP18]], 7
+; CHECK-NEXT:    [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
+; CHECK-NEXT:    [[TMP22:%.*]] = and i32 [[TMP21]], 7
+; CHECK-NEXT:    [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
+; CHECK-NEXT:    [[TMP25:%.*]] = and i32 [[TMP24]], 7
+; CHECK-NEXT:    [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]]
 ; CHECK-NEXT:    [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
 ; CHECK-NEXT:    [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK:       7:
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP26]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
+; CHECK:       30:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
-; CHECK:       8:
+; CHECK:       31:
 ; CHECK-NEXT:    [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]])
 ; CHECK-NEXT:    store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x float> [[RES1]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
index 2c9c6c7309a09..e7854538aa3e8 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
@@ -8143,16 +8143,39 @@ define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP27:%.*]] = and i64 [[TMP3]], 7
+; CHECK-NEXT:    [[TMP28:%.*]] = or i64 [[TMP3]], [[TMP27]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP6]], 7
+; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP29]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
+; CHECK-NEXT:    [[TMP10:%.*]] = and i64 [[TMP9]], 7
+; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
+; CHECK-NEXT:    [[TMP13:%.*]] = and i64 [[TMP12]], 7
+; CHECK-NEXT:    [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
+; CHECK-NEXT:    [[TMP16:%.*]] = and i64 [[TMP15]], 7
+; CHECK-NEXT:    [[TMP17:%.*]] = or i64 [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
+; CHECK-NEXT:    [[TMP19:%.*]] = and i64 [[TMP18]], 7
+; CHECK-NEXT:    [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
+; CHECK-NEXT:    [[TMP22:%.*]] = and i64 [[TMP21]], 7
+; CHECK-NEXT:    [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
+; CHECK-NEXT:    [[TMP25:%.*]] = and i64 [[TMP24]], 7
+; CHECK-NEXT:    [[TMP26:%.*]] = or i64 [[TMP24]], [[TMP25]]
 ; CHECK-NEXT:    [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK:       7:
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP26]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
+; CHECK:       30:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       8:
+; CHECK:       31:
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]])
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x double> [[TMP5]]
@@ -8169,16 +8192,39 @@ define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0,
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP5]], 7
+; CHECK-NEXT:    [[TMP30:%.*]] = or i64 [[TMP5]], [[TMP29]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP31:%.*]] = and i64 [[TMP8]], 7
+; CHECK-NEXT:    [[TMP34:%.*]] = or i64 [[TMP8]], [[TMP31]]
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
+; CHECK-NEXT:    [[TMP36:%.*]] = and i64 [[TMP35]], 7
+; CHECK-NEXT:    [[TMP37:%.*]] = or i64 [[TMP35]], [[TMP36]]
+; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
+; CHECK-NEXT:    [[TMP39:%.*]] = and i64 [[TMP38]], 7
+; CHECK-NEXT:    [[TMP40:%.*]] = or i64 [[TMP38]], [[TMP39]]
+; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
+; CHECK-NEXT:    [[TMP42:%.*]] = and i64 [[TMP41]], 7
+; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[TMP41]], [[TMP42]]
+; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
+; CHECK-NEXT:    [[TMP21:%.*]] = and i64 [[TMP20]], 7
+; CHECK-NEXT:    [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP21]]
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
+; CHECK-NEXT:    [[TMP24:%.*]] = and i64 [[TMP23]], 7
+; CHECK-NEXT:    [[TMP25:%.*]] = or i64 [[TMP23]], [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
+; CHECK-NEXT:    [[TMP27:%.*]] = and i64 [[TMP26]], 7
+; CHECK-NEXT:    [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
 ; CHECK-NEXT:    [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
 ; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
-; CHECK:       9:
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP28]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]]
+; CHECK:       32:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR8]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       10:
+; CHECK:       33:
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X4:%.*]], <8 x i64> [[X1]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -8204,16 +8250,39 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_512(<8 x double> %x0
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
+; CHECK-NEXT:    [[TMP28:%.*]] = and i64 [[TMP4]], 7
+; CHECK-NEXT:    [[TMP29:%.*]] = or i64 [[TMP4]], [[TMP28]]
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
+; CHECK-NEXT:    [[TMP30:%.*]] = and i64 [[TMP7]], 7
+; CHECK-NEXT:    [[TMP33:%.*]] = or i64 [[TMP7]], [[TMP30]]
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
+; CHECK-NEXT:    [[TMP35:%.*]] = and i64 [[TMP34]], 7
+; CHECK-NEXT:    [[TMP36:%.*]] = or i64 [[TMP34]], [[TMP35]]
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
+; CHECK-NEXT:    [[TMP38:%.*]] = and i64 [[TMP37]], 7
+; CHECK-NEXT:    [[TMP39:%.*]] = or i64 [[TMP37]], [[TMP38]]
+; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
+; CHECK-NEXT:    [[TMP17:%.*]] = and i64 [[TMP40]], 7
+; CHECK-NEXT:    [[TMP18:%.*]] = or i64 [[TMP40]], [[TMP17]]
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
+; CHECK-NEXT:    [[TMP20:%.*]] = and i64 [[TMP19]], 7
+; CHECK-NEXT:    [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]]
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
+; CHECK-NEXT:    [[TMP23:%.*]] = and i64 [[TMP22]], 7
+; CHECK-NEXT:    [[TMP24:%.*]] = or i64 [[TMP22]], [[TMP23]]
+; CHECK-NEXT:    [[TMP25:%.*]]...
[truncated]

@fmayer fmayer requested a review from thurstond July 7, 2025 21:47
@fmayer fmayer merged commit a3afbd3 into main Jul 7, 2025
12 checks passed
@fmayer fmayer deleted the users/fmayer/spr/msan-only-require-needed-bits-to-be-initialized-for-permilvar branch July 7, 2025 23:21
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants