diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ec94dcaa2c051..5a6520e964c25 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4297,7 +4297,7 @@ struct MemorySanitizerVisitor : public InstVisitor { void handleAVXVpermilvar(IntrinsicInst &I) { IRBuilder<> IRB(&I); Value *Shadow = getShadow(&I, 0); - insertShadowCheck(I.getArgOperand(1), &I); + maskedCheckAVXIndexShadow(IRB, I.getArgOperand(1), &I); // Shadows are integer-ish types but some intrinsics require a // different (e.g., floating-point) type. diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll index c4016e749e70c..6542e56b3fefe 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll @@ -950,16 +950,21 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double> ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 13: ; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]]) ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[RES1]] @@ -975,16 +980,27 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP15]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] +; CHECK: 18: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 19: ; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]]) ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[RES1]] @@ -1014,16 +1030,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) # ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP15]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] +; CHECK: 18: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 19: ; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES1]] @@ -1047,16 +1074,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 1 +; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 3 +; CHECK-NEXT: [[TMP13:%.*]] = or i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3 +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 3 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP17]], [[TMP18]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP11]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK: 24: ; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]]) ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES1]] @@ -1073,16 +1111,39 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP3]], 7 +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP27]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 7 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 7 +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7 +; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7 +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP26]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 31: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES1]] diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll index 2c9c6c7309a09..e7854538aa3e8 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll @@ -8143,16 +8143,39 @@ define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP3]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP3]], [[TMP27]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP6]], 7 +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP29]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 7 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7 +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP24]], [[TMP25]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP26]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 31: ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]]) ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x double> [[TMP5]] @@ -8169,16 +8192,39 @@ define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP5]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP5]], [[TMP29]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP31:%.*]] = and i64 [[TMP8]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP8]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP36:%.*]] = and i64 [[TMP35]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP38]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP42:%.*]] = and i64 [[TMP41]], 7 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP23]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> ; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] -; CHECK: 9: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP28]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]] +; CHECK: 32: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK: 33: ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X4:%.*]], <8 x i64> [[X1]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> @@ -8204,16 +8250,39 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_512(<8 x double> %x0 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP4]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP4]], [[TMP28]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP7]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP7]], [[TMP30]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP37]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP40]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP40]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP25]], [[TMP26]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]] +; CHECK: 31: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 32: ; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> @@ -8238,16 +8307,63 @@ define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP3]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP3]], [[TMP51]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP6]], 15 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP53]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 15 +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15 +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP39]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP42]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP42]], [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP45]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP48]], [[TMP49]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP50]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]] +; CHECK: 54: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 55: ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]]) ; CHECK-NEXT: store <16 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x float> [[TMP5]] @@ -8264,16 +8380,63 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP5]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP5]], [[TMP53]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP55:%.*]] = and i32 [[TMP8]], 15 +; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP8]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP60:%.*]] = and i32 [[TMP59]], 15 +; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP63:%.*]] = and i32 [[TMP62]], 15 +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP62]], [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP66:%.*]] = and i32 [[TMP65]], 15 +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP65]], [[TMP66]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i32 [[TMP47]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP50]], [[TMP51]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> ; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] -; CHECK: 9: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP52]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]] +; CHECK: 56: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK: 57: ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X4:%.*]], <16 x i32> [[X1]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> @@ -8300,16 +8463,63 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_512(<16 x float> %x0 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = and i32 [[TMP4]], 15 +; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP4]], [[TMP52]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP54:%.*]] = and i32 [[TMP7]], 15 +; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP7]], [[TMP54]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP59:%.*]] = and i32 [[TMP58]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP62:%.*]] = and i32 [[TMP61]], 15 +; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP64]], 15 +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP64]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15 +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP50]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP51]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP55:%.*]], label [[TMP56:%.*]], !prof [[PROF1]] +; CHECK: 55: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 56: ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll index 43595dccc35bc..3f5d3ab88bbea 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll @@ -9207,16 +9207,39 @@ define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP3]], 7 +; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP27]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 7 +; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 7 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7 +; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7 +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 7 +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP24]], [[TMP25]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> ; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP26]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 31: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]]) ; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x double> [[RES1]] @@ -9232,16 +9255,39 @@ define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP5]], 7 +; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP29]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP8]], 7 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP8]], [[TMP30]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP37]], 7 +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7 +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP23]], 7 +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 7 +; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> ; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] -; CHECK: 9: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP28]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]] +; CHECK: 32: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK: 33: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X3:%.*]], <8 x i64> [[X1]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> ; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> @@ -9268,16 +9314,39 @@ define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP4]], 7 +; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP4]], [[TMP28]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP7]], 7 +; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP7]], [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP34:%.*]] = and i64 [[TMP33]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP36]], 7 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP36]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP25]], [[TMP26]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double> ; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]] +; CHECK: 31: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 32: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]]) ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> ; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> @@ -9304,16 +9373,63 @@ define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP3]], 15 +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP51]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 15 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 15 +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15 +; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15 +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15 +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15 +; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15 +; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP39]], 15 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP39]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP42]], 15 +; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP42]], [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP45]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP48]], [[TMP49]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> ; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]]) ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP50]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]] +; CHECK: 54: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK: 55: ; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]]) ; CHECK-NEXT: store <16 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x float> [[RES1]] @@ -9329,16 +9445,63 @@ define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP5]], 15 +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP53]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP54:%.*]] = and i32 [[TMP8]], 15 +; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP8]], [[TMP54]] +; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP59:%.*]] = and i32 [[TMP58]], 15 +; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP58]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP62:%.*]] = and i32 [[TMP61]], 15 +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP61]], [[TMP62]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15 +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15 +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15 +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15 +; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15 +; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15 +; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15 +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP38]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15 +; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP41]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15 +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP44]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP48:%.*]] = and i32 [[TMP47]], 15 +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15 +; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP50]], [[TMP51]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> ; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] -; CHECK: 9: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP52]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]] +; CHECK: 56: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 10: +; CHECK: 57: ; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X3:%.*]], <16 x i32> [[X1]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> @@ -9365,16 +9528,63 @@ define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = and i32 [[TMP4]], 15 +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP52]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP7]], 15 +; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP7]], [[TMP53]] +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 15 +; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP60]], 15 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP60]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15 +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15 +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15 +; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8 +; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15 +; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9 +; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15 +; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10 +; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15 +; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11 +; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15 +; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12 +; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15 +; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP40]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13 +; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15 +; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14 +; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15 +; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15 +; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], 15 +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP50]] ; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float> ; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP51]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP55:%.*]], label [[TMP56:%.*]], !prof [[PROF1]] +; CHECK: 55: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 56: ; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]]) ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> ; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll index 613146ff8227e..294f2cae7f7ab 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll @@ -990,16 +990,21 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP4]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP4]], [[TMP10]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double> ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 14: ; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]]) ; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <2 x double> [[RES1]] @@ -1016,16 +1021,27 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP4]], [[TMP16]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3 +; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double> ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 20: ; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]]) ; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x double> [[RES1]] @@ -1057,16 +1073,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) # ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP4]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP4]], [[TMP16]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP13]], [[TMP14]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]] +; CHECK: 19: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 20: ; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES1]] @@ -1091,16 +1118,27 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 ; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], -2147483649 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 3 +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 1 +; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3 +; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 3 +; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 3 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]]) ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP12]], 0 -; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 14: +; CHECK: 25: ; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]]) ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x float> [[RES1]] @@ -1118,16 +1156,39 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP4]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP4]], [[TMP28]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 7 +; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7 +; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3 +; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP13]], 7 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5 +; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7 +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6 +; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 7 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7 +; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 7 +; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP25]], [[TMP26]] ; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]]) ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] -; CHECK: 8: +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]] +; CHECK: 31: ; CHECK-NEXT: call void @__msan_warning_noreturn() ; CHECK-NEXT: unreachable -; CHECK: 9: +; CHECK: 32: ; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x float> [[RES1]]