diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll new file mode 100644 index 0000000000000..f9b223dc420b9 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vadd.ll @@ -0,0 +1,3824 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=msan -S | FileCheck %s +; +; Handled heuristically but incorrectly: +; - llvm.aarch64.neon.{addp, faddp} +; +; Unknown instructions handled by visitInstruction: +; - Incorrectly: llvm.aarch64.neon.{addhn, raddhn} +; - Suboptimally: llvm.aarch64.neon.{saddlp, uaddlp} +; +; Forked from llvm/test/CodeGen/AArch64/arm64-vadd.ll + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android9001" + +define <8 x i8> @addhn8b(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i8> @addhn8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i16>, ptr %B + %tmpvar3 = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %tmpvar1, <8 x i16> %tmpvar2) + ret <8 x i8> %tmpvar3 +} + +define <4 x i16> @addhn4h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @addhn4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i32>, ptr %B + %tmpvar3 = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %tmpvar1, <4 x i32> %tmpvar2) + ret <4 x i16> %tmpvar3 +} + +define <2 x i32> @addhn2s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @addhn2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> [[TMPVAR1]], <2 x i64> [[TMPVAR2]]) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %tmpvar3 = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %tmpvar1, <2 x i64> %tmpvar2) + ret <2 x i32> %tmpvar3 +} + +define <16 x i8> @addhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind #0 { +; CHECK-LABEL: define <16 x i8> @addhn2_16b( +; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[VADDHN2_I:%.*]] = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[VADDHN_HIGH2_I:%.*]] = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[VADDHN2_I]], <8 x i8> [[VADDHN_HIGH2_I]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %vaddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vaddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %res = shufflevector <8 x i8> %vaddhn2.i, <8 x i8> %vaddhn_high2.i, <16 x i32> + ret <16 x i8> %res +} + +define <8 x i16> @addhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @addhn2_8h( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[VADDHN2_I:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> [[A]], <4 x i32> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[VADDHN_HIGH3_I:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> [[A]], <4 x i32> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[VADDHN2_I]], <4 x i16> [[VADDHN_HIGH3_I]], <8 x i32> +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %vaddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vaddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %res = shufflevector <4 x i16> %vaddhn2.i, <4 x i16> %vaddhn_high3.i, <8 x i32> + ret <8 x i16> %res +} + +define <4 x i32> @addhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @addhn2_4s( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[VADDHN2_I:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> [[A]], <2 x i64> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[VADDHN_HIGH3_I:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> [[A]], <2 x i64> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[VADDHN2_I]], <2 x i32> [[VADDHN_HIGH3_I]], <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %vaddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vaddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %res = shufflevector <2 x i32> %vaddhn2.i, <2 x i32> %vaddhn_high3.i, <4 x i32> + ret <4 x i32> %res +} + +declare <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i8> @raddhn8b(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i8> @raddhn8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i16>, ptr %B + %tmpvar3 = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %tmpvar1, <8 x i16> %tmpvar2) + ret <8 x i8> %tmpvar3 +} + +define <4 x i16> @raddhn4h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @raddhn4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i32>, ptr %B + %tmpvar3 = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %tmpvar1, <4 x i32> %tmpvar2) + ret <4 x i16> %tmpvar3 +} + +define <2 x i32> @raddhn2s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @raddhn2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[TMPVAR1]], <2 x i64> [[TMPVAR2]]) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %tmpvar3 = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %tmpvar1, <2 x i64> %tmpvar2) + ret <2 x i32> %tmpvar3 +} + +define <16 x i8> @raddhn2_16b(<8 x i16> %a, <8 x i16> %b) nounwind #0 { +; CHECK-LABEL: define <16 x i8> @raddhn2_16b( +; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[VRADDHN2_I:%.*]] = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[VRADDHN_HIGH2_I:%.*]] = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[VRADDHN2_I]], <8 x i8> [[VRADDHN_HIGH2_I]], <16 x i32> +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %vraddhn2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %vraddhn_high2.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) nounwind + %res = shufflevector <8 x i8> %vraddhn2.i, <8 x i8> %vraddhn_high2.i, <16 x i32> + ret <16 x i8> %res +} + +define <8 x i16> @raddhn2_8h(<4 x i32> %a, <4 x i32> %b) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @raddhn2_8h( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[VRADDHN2_I:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[A]], <4 x i32> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[VRADDHN_HIGH3_I:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> [[A]], <4 x i32> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[VRADDHN2_I]], <4 x i16> [[VRADDHN_HIGH3_I]], <8 x i32> +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %vraddhn2.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %vraddhn_high3.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) nounwind + %res = shufflevector <4 x i16> %vraddhn2.i, <4 x i16> %vraddhn_high3.i, <8 x i32> + ret <8 x i16> %res +} + +define <4 x i32> @raddhn2_4s(<2 x i64> %a, <2 x i64> %b) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @raddhn2_4s( +; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[VRADDHN2_I:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[A]], <2 x i64> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[VRADDHN_HIGH3_I:%.*]] = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> [[A]], <2 x i64> [[B]]) #[[ATTR3]] +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[VRADDHN2_I]], <2 x i32> [[VRADDHN_HIGH3_I]], <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %vraddhn2.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %vraddhn_high3.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) nounwind + %res = shufflevector <2 x i32> %vraddhn2.i, <2 x i32> %vraddhn_high3.i, <4 x i32> + ret <4 x i32> %res +} + +declare <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone +declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone + +define <8 x i16> @saddl8h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @saddl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16> +; CHECK-NEXT: [[TMPVAR3:%.*]] = sext <8 x i8> [[TMPVAR1]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP2:%.*]] = sext <8 x i8> [[_MSLD1]] to <8 x i16> +; CHECK-NEXT: [[TMPVAR4:%.*]] = sext <8 x i8> [[TMPVAR2]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSPROP2]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <8 x i16> [[TMPVAR3]], [[TMPVAR4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMPVAR5]] +; + %tmpvar1 = load <8 x i8>, ptr %A + %tmpvar2 = load <8 x i8>, ptr %B + %tmpvar3 = sext <8 x i8> %tmpvar1 to <8 x i16> + %tmpvar4 = sext <8 x i8> %tmpvar2 to <8 x i16> + %tmpvar5 = add <8 x i16> %tmpvar3, %tmpvar4 + ret <8 x i16> %tmpvar5 +} + +define <4 x i32> @saddl4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @saddl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMPVAR3:%.*]] = sext <4 x i16> [[TMPVAR1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = sext <4 x i16> [[_MSLD1]] to <4 x i32> +; CHECK-NEXT: [[TMPVAR4:%.*]] = sext <4 x i16> [[TMPVAR2]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSPROP2]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i32> [[TMPVAR3]], [[TMPVAR4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMPVAR5]] +; + %tmpvar1 = load <4 x i16>, ptr %A + %tmpvar2 = load <4 x i16>, ptr %B + %tmpvar3 = sext <4 x i16> %tmpvar1 to <4 x i32> + %tmpvar4 = sext <4 x i16> %tmpvar2 to <4 x i32> + %tmpvar5 = add <4 x i32> %tmpvar3, %tmpvar4 + ret <4 x i32> %tmpvar5 +} + +define <2 x i64> @saddl2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @saddl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR3:%.*]] = sext <2 x i32> [[TMPVAR1]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = sext <2 x i32> [[_MSLD1]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR4:%.*]] = sext <2 x i32> [[TMPVAR2]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSPROP2]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i64> [[TMPVAR3]], [[TMPVAR4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMPVAR5]] +; + %tmpvar1 = load <2 x i32>, ptr %A + %tmpvar2 = load <2 x i32>, ptr %B + %tmpvar3 = sext <2 x i32> %tmpvar1 to <2 x i64> + %tmpvar4 = sext <2 x i32> %tmpvar2 to <2 x i64> + %tmpvar5 = add <2 x i64> %tmpvar3, %tmpvar4 + ret <2 x i64> %tmpvar5 +} + +define <8 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b, <2 x i64> %param1, <2 x i64> %param2) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @saddl2_8h( +; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <2 x i64> [[PARAM1:%.*]], <2 x i64> [[PARAM2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP7]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> [[PARAM1]], <1 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMPVAR1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <8 x i8> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[TMP4]] to <8 x i16> +; CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = sext <8 x i8> [[TMPVAR1]] to <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR2:%.*]] = bitcast <16 x i8> [[B]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I4_I:%.*]] = shufflevector <2 x i64> [[TMPVAR2]], <2 x i64> [[PARAM2]], <1 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <8 x i8> +; CHECK-NEXT: [[TMPVAR3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I4_I]] to <8 x i8> +; CHECK-NEXT: [[_MSPROP3:%.*]] = sext <8 x i8> [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[VMOVL_I_I5_I:%.*]] = sext <8 x i8> [[TMPVAR3]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i16> [[_MSPROP1]], [[_MSPROP3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I_I]], [[VMOVL_I_I5_I]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[ADD_I]] +; + %tmpvar = bitcast <16 x i8> %a to <2 x i64> + %shuffle.i.i.i = shufflevector <2 x i64> %tmpvar, <2 x i64> %param1, <1 x i32> + %tmpvar1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8> + %vmovl.i.i.i = sext <8 x i8> %tmpvar1 to <8 x i16> + %tmpvar2 = bitcast <16 x i8> %b to <2 x i64> + %shuffle.i.i4.i = shufflevector <2 x i64> %tmpvar2, <2 x i64> %param2, <1 x i32> + %tmpvar3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8> + %vmovl.i.i5.i = sext <8 x i8> %tmpvar3 to <8 x i16> + %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i + ret <8 x i16> %add.i +} + +define <4 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b, <2 x i64> %param1, <2 x i64> %param2) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @saddl2_4s( +; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <2 x i64> [[PARAM1:%.*]], <2 x i64> [[PARAM2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP7]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> [[PARAM1]], <1 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMPVAR1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <4 x i16> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = sext <4 x i16> [[TMPVAR1]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR2:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I4_I:%.*]] = shufflevector <2 x i64> [[TMPVAR2]], <2 x i64> [[PARAM2]], <1 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <4 x i16> +; CHECK-NEXT: [[TMPVAR3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I4_I]] to <4 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[VMOVL_I_I5_I:%.*]] = sext <4 x i16> [[TMPVAR3]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP1]], [[_MSPROP3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I_I]], [[VMOVL_I_I5_I]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[ADD_I]] +; + %tmpvar = bitcast <8 x i16> %a to <2 x i64> + %shuffle.i.i.i = shufflevector <2 x i64> %tmpvar, <2 x i64> %param1, <1 x i32> + %tmpvar1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16> + %vmovl.i.i.i = sext <4 x i16> %tmpvar1 to <4 x i32> + %tmpvar2 = bitcast <8 x i16> %b to <2 x i64> + %shuffle.i.i4.i = shufflevector <2 x i64> %tmpvar2, <2 x i64> %param2, <1 x i32> + %tmpvar3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16> + %vmovl.i.i5.i = sext <4 x i16> %tmpvar3 to <4 x i32> + %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i + ret <4 x i32> %add.i +} + +define <2 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b, <2 x i64> %param1, <2 x i64> %param2) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @saddl2_2d( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i64> [[PARAM1:%.*]], <2 x i64> [[PARAM2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP7]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> [[PARAM1]], <1 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMPVAR1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> +; CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = sext <2 x i32> [[TMPVAR1]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I4_I:%.*]] = shufflevector <2 x i64> [[TMPVAR2]], <2 x i64> [[PARAM2]], <1 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <2 x i32> +; CHECK-NEXT: [[TMPVAR3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I4_I]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = sext <2 x i32> [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[VMOVL_I_I5_I:%.*]] = sext <2 x i32> [[TMPVAR3]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSPROP1]], [[_MSPROP3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I_I]], [[VMOVL_I_I5_I]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[ADD_I]] +; + %tmpvar = bitcast <4 x i32> %a to <2 x i64> + %shuffle.i.i.i = shufflevector <2 x i64> %tmpvar, <2 x i64> %param1, <1 x i32> + %tmpvar1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32> + %vmovl.i.i.i = sext <2 x i32> %tmpvar1 to <2 x i64> + %tmpvar2 = bitcast <4 x i32> %b to <2 x i64> + %shuffle.i.i4.i = shufflevector <2 x i64> %tmpvar2, <2 x i64> %param2, <1 x i32> + %tmpvar3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32> + %vmovl.i.i5.i = sext <2 x i32> %tmpvar3 to <2 x i64> + %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i + ret <2 x i64> %add.i +} + +define <8 x i16> @uaddl8h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @uaddl8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16> +; CHECK-NEXT: [[TMPVAR3:%.*]] = zext <8 x i8> [[TMPVAR1]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP2:%.*]] = zext <8 x i8> [[_MSLD1]] to <8 x i16> +; CHECK-NEXT: [[TMPVAR4:%.*]] = zext <8 x i8> [[TMPVAR2]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSPROP2]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <8 x i16> [[TMPVAR3]], [[TMPVAR4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMPVAR5]] +; + %tmpvar1 = load <8 x i8>, ptr %A + %tmpvar2 = load <8 x i8>, ptr %B + %tmpvar3 = zext <8 x i8> %tmpvar1 to <8 x i16> + %tmpvar4 = zext <8 x i8> %tmpvar2 to <8 x i16> + %tmpvar5 = add <8 x i16> %tmpvar3, %tmpvar4 + ret <8 x i16> %tmpvar5 +} + +define <4 x i32> @uaddl4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @uaddl4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32> +; CHECK-NEXT: [[TMPVAR3:%.*]] = zext <4 x i16> [[TMPVAR1]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = zext <4 x i16> [[_MSLD1]] to <4 x i32> +; CHECK-NEXT: [[TMPVAR4:%.*]] = zext <4 x i16> [[TMPVAR2]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSPROP2]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i32> [[TMPVAR3]], [[TMPVAR4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMPVAR5]] +; + %tmpvar1 = load <4 x i16>, ptr %A + %tmpvar2 = load <4 x i16>, ptr %B + %tmpvar3 = zext <4 x i16> %tmpvar1 to <4 x i32> + %tmpvar4 = zext <4 x i16> %tmpvar2 to <4 x i32> + %tmpvar5 = add <4 x i32> %tmpvar3, %tmpvar4 + ret <4 x i32> %tmpvar5 +} + +define <2 x i64> @uaddl2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @uaddl2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR3:%.*]] = zext <2 x i32> [[TMPVAR1]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = zext <2 x i32> [[_MSLD1]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR4:%.*]] = zext <2 x i32> [[TMPVAR2]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSPROP2]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i64> [[TMPVAR3]], [[TMPVAR4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMPVAR5]] +; + %tmpvar1 = load <2 x i32>, ptr %A + %tmpvar2 = load <2 x i32>, ptr %B + %tmpvar3 = zext <2 x i32> %tmpvar1 to <2 x i64> + %tmpvar4 = zext <2 x i32> %tmpvar2 to <2 x i64> + %tmpvar5 = add <2 x i64> %tmpvar3, %tmpvar4 + ret <2 x i64> %tmpvar5 +} + + +define <8 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b, <2 x i64> %param1, <2 x i64> %param2) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @uaddl2_8h( +; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <2 x i64> [[PARAM1:%.*]], <2 x i64> [[PARAM2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP7]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> [[PARAM1]], <1 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMPVAR1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <8 x i8> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i16> +; CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[TMPVAR1]] to <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR2:%.*]] = bitcast <16 x i8> [[B]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I4_I:%.*]] = shufflevector <2 x i64> [[TMPVAR2]], <2 x i64> [[PARAM2]], <1 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <8 x i8> +; CHECK-NEXT: [[TMPVAR3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I4_I]] to <8 x i8> +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext <8 x i8> [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[VMOVL_I_I5_I:%.*]] = zext <8 x i8> [[TMPVAR3]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i16> [[_MSPROP1]], [[_MSPROP3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I_I]], [[VMOVL_I_I5_I]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[ADD_I]] +; + %tmpvar = bitcast <16 x i8> %a to <2 x i64> + %shuffle.i.i.i = shufflevector <2 x i64> %tmpvar, <2 x i64> %param1, <1 x i32> + %tmpvar1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8> + %vmovl.i.i.i = zext <8 x i8> %tmpvar1 to <8 x i16> + %tmpvar2 = bitcast <16 x i8> %b to <2 x i64> + %shuffle.i.i4.i = shufflevector <2 x i64> %tmpvar2, <2 x i64> %param2, <1 x i32> + %tmpvar3 = bitcast <1 x i64> %shuffle.i.i4.i to <8 x i8> + %vmovl.i.i5.i = zext <8 x i8> %tmpvar3 to <8 x i16> + %add.i = add <8 x i16> %vmovl.i.i.i, %vmovl.i.i5.i + ret <8 x i16> %add.i +} + +define <4 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b, <2 x i64> %param1, <2 x i64> %param2) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @uaddl2_4s( +; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <2 x i64> [[PARAM1:%.*]], <2 x i64> [[PARAM2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP7]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> [[PARAM1]], <1 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMPVAR1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <4 x i16> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMPVAR1]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR2:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I4_I:%.*]] = shufflevector <2 x i64> [[TMPVAR2]], <2 x i64> [[PARAM2]], <1 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <4 x i16> +; CHECK-NEXT: [[TMPVAR3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I4_I]] to <4 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext <4 x i16> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[VMOVL_I_I5_I:%.*]] = zext <4 x i16> [[TMPVAR3]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP1]], [[_MSPROP3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I_I]], [[VMOVL_I_I5_I]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[ADD_I]] +; + %tmpvar = bitcast <8 x i16> %a to <2 x i64> + %shuffle.i.i.i = shufflevector <2 x i64> %tmpvar, <2 x i64> %param1, <1 x i32> + %tmpvar1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16> + %vmovl.i.i.i = zext <4 x i16> %tmpvar1 to <4 x i32> + %tmpvar2 = bitcast <8 x i16> %b to <2 x i64> + %shuffle.i.i4.i = shufflevector <2 x i64> %tmpvar2, <2 x i64> %param2, <1 x i32> + %tmpvar3 = bitcast <1 x i64> %shuffle.i.i4.i to <4 x i16> + %vmovl.i.i5.i = zext <4 x i16> %tmpvar3 to <4 x i32> + %add.i = add <4 x i32> %vmovl.i.i.i, %vmovl.i.i5.i + ret <4 x i32> %add.i +} + +define <2 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b, <2 x i64> %param1, <2 x i64> %param2) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @uaddl2_2d( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i64> [[PARAM1:%.*]], <2 x i64> [[PARAM2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP7]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> [[PARAM1]], <1 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMPVAR1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> +; CHECK-NEXT: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMPVAR1]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP8]], <1 x i32> +; CHECK-NEXT: [[SHUFFLE_I_I4_I:%.*]] = shufflevector <2 x i64> [[TMPVAR2]], <2 x i64> [[PARAM2]], <1 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <2 x i32> +; CHECK-NEXT: [[TMPVAR3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I4_I]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext <2 x i32> [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[VMOVL_I_I5_I:%.*]] = zext <2 x i32> [[TMPVAR3]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSPROP1]], [[_MSPROP3]] +; CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I_I]], [[VMOVL_I_I5_I]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[ADD_I]] +; + %tmpvar = bitcast <4 x i32> %a to <2 x i64> + %shuffle.i.i.i = shufflevector <2 x i64> %tmpvar, <2 x i64> %param1, <1 x i32> + %tmpvar1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32> + %vmovl.i.i.i = zext <2 x i32> %tmpvar1 to <2 x i64> + %tmpvar2 = bitcast <4 x i32> %b to <2 x i64> + %shuffle.i.i4.i = shufflevector <2 x i64> %tmpvar2, <2 x i64> %param2, <1 x i32> + %tmpvar3 = bitcast <1 x i64> %shuffle.i.i4.i to <2 x i32> + %vmovl.i.i5.i = zext <2 x i32> %tmpvar3 to <2 x i64> + %add.i = add <2 x i64> %vmovl.i.i.i, %vmovl.i.i5.i + ret <2 x i64> %add.i +} + +define <8 x i16> @uaddw8h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @uaddw8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD1]] to <8 x i16> +; CHECK-NEXT: [[TMPVAR3:%.*]] = zext <8 x i8> [[TMPVAR2]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i16> [[_MSLD]], [[_MSPROP]] +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMPVAR1]], [[TMPVAR3]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i8>, ptr %B + %tmpvar3 = zext <8 x i8> %tmpvar2 to <8 x i16> + %tmpvar4 = add <8 x i16> %tmpvar1, %tmpvar3 + ret <8 x i16> %tmpvar4 +} + +define <4 x i32> @uaddw4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @uaddw4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD1]] to <4 x i32> +; CHECK-NEXT: [[TMPVAR3:%.*]] = zext <4 x i16> [[TMPVAR2]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSLD]], [[_MSPROP]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMPVAR1]], [[TMPVAR3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i16>, ptr %B + %tmpvar3 = zext <4 x i16> %tmpvar2 to <4 x i32> + %tmpvar4 = add <4 x i32> %tmpvar1, %tmpvar3 + ret <4 x i32> %tmpvar4 +} + +define <2 x i64> @uaddw2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @uaddw2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD1]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR3:%.*]] = zext <2 x i32> [[TMPVAR2]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSLD]], [[_MSPROP]] +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMPVAR1]], [[TMPVAR3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i32>, ptr %B + %tmpvar3 = zext <2 x i32> %tmpvar2 to <2 x i64> + %tmpvar4 = add <2 x i64> %tmpvar1, %tmpvar3 + ret <2 x i64> %tmpvar4 +} + +define <8 x i16> @uaddw2_8h(ptr %A, ptr %B, <16 x i8> %param1) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @uaddw2_8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], <16 x i8> [[PARAM1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD1]], <16 x i8> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[HIGH2:%.*]] = shufflevector <16 x i8> [[TMPVAR2]], <16 x i8> [[PARAM1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16> +; CHECK-NEXT: [[EXT2:%.*]] = zext <8 x i8> [[HIGH2]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSLD]], [[_MSPROP2]] +; CHECK-NEXT: [[RES:%.*]] = add <8 x i16> [[TMPVAR1]], [[EXT2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %tmpvar1 = load <8 x i16>, ptr %A + + %tmpvar2 = load <16 x i8>, ptr %B + %high2 = shufflevector <16 x i8> %tmpvar2, <16 x i8> %param1, <8 x i32> + %ext2 = zext <8 x i8> %high2 to <8 x i16> + + %res = add <8 x i16> %tmpvar1, %ext2 + ret <8 x i16> %res +} + +define <4 x i32> @uaddw2_4s(ptr %A, ptr %B, <8 x i16> %param1) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @uaddw2_4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], <8 x i16> [[PARAM1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD1]], <8 x i16> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[HIGH2:%.*]] = shufflevector <8 x i16> [[TMPVAR2]], <8 x i16> [[PARAM1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32> +; CHECK-NEXT: [[EXT2:%.*]] = zext <4 x i16> [[HIGH2]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[_MSLD]], [[_MSPROP2]] +; CHECK-NEXT: [[RES:%.*]] = add <4 x i32> [[TMPVAR1]], [[EXT2]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %tmpvar1 = load <4 x i32>, ptr %A + + %tmpvar2 = load <8 x i16>, ptr %B + %high2 = shufflevector <8 x i16> %tmpvar2, <8 x i16> %param1, <4 x i32> + %ext2 = zext <4 x i16> %high2 to <4 x i32> + + %res = add <4 x i32> %tmpvar1, %ext2 + ret <4 x i32> %res +} + +define <2 x i64> @uaddw2_2d(ptr %A, ptr %B, <4 x i32> %param1) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @uaddw2_2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], <4 x i32> [[PARAM1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD1]], <4 x i32> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[HIGH2:%.*]] = shufflevector <4 x i32> [[TMPVAR2]], <4 x i32> [[PARAM1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64> +; CHECK-NEXT: [[EXT2:%.*]] = zext <2 x i32> [[HIGH2]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[_MSLD]], [[_MSPROP2]] +; CHECK-NEXT: [[RES:%.*]] = add <2 x i64> [[TMPVAR1]], [[EXT2]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %tmpvar1 = load <2 x i64>, ptr %A + + %tmpvar2 = load <4 x i32>, ptr %B + %high2 = shufflevector <4 x i32> %tmpvar2, <4 x i32> %param1, <2 x i32> + %ext2 = zext <2 x i32> %high2 to <2 x i64> + + %res = add <2 x i64> %tmpvar1, %ext2 + ret <2 x i64> %res +} + +define <8 x i16> @saddw8h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @saddw8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD1]] to <8 x i16> +; CHECK-NEXT: [[TMPVAR3:%.*]] = sext <8 x i8> [[TMPVAR2]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i16> [[_MSLD]], [[_MSPROP]] +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i16> [[TMPVAR1]], [[TMPVAR3]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP4]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i8>, ptr %B + %tmpvar3 = sext <8 x i8> %tmpvar2 to <8 x i16> + %tmpvar4 = add <8 x i16> %tmpvar1, %tmpvar3 + ret <8 x i16> %tmpvar4 +} + +define <4 x i32> @saddw4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @saddw4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD1]] to <4 x i32> +; CHECK-NEXT: [[TMPVAR3:%.*]] = sext <4 x i16> [[TMPVAR2]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSLD]], [[_MSPROP]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMPVAR1]], [[TMPVAR3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP4]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i16>, ptr %B + %tmpvar3 = sext <4 x i16> %tmpvar2 to <4 x i32> + %tmpvar4 = add <4 x i32> %tmpvar1, %tmpvar3 + ret <4 x i32> %tmpvar4 +} + +define <2 x i64> @saddw2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @saddw2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD1]] to <2 x i64> +; CHECK-NEXT: [[TMPVAR3:%.*]] = sext <2 x i32> [[TMPVAR2]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSLD]], [[_MSPROP]] +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> [[TMPVAR1]], [[TMPVAR3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP4]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i32>, ptr %B + %tmpvar3 = sext <2 x i32> %tmpvar2 to <2 x i64> + %tmpvar4 = add <2 x i64> %tmpvar1, %tmpvar3 + ret <2 x i64> %tmpvar4 +} + +define <8 x i16> @saddw2_8h(ptr %A, ptr %B, <16 x i8> %param1) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @saddw2_8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], <16 x i8> [[PARAM1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD1]], <16 x i8> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[HIGH2:%.*]] = shufflevector <16 x i8> [[TMPVAR2]], <16 x i8> [[PARAM1]], <8 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16> +; CHECK-NEXT: [[EXT2:%.*]] = sext <8 x i8> [[HIGH2]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSLD]], [[_MSPROP2]] +; CHECK-NEXT: [[RES:%.*]] = add <8 x i16> [[TMPVAR1]], [[EXT2]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %tmpvar1 = load <8 x i16>, ptr %A + + %tmpvar2 = load <16 x i8>, ptr %B + %high2 = shufflevector <16 x i8> %tmpvar2, <16 x i8> %param1, <8 x i32> + %ext2 = sext <8 x i8> %high2 to <8 x i16> + + %res = add <8 x i16> %tmpvar1, %ext2 + ret <8 x i16> %res +} + +define <4 x i32> @saddw2_4s(ptr %A, ptr %B, <8 x i16> %param1) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @saddw2_4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], <8 x i16> [[PARAM1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD1]], <8 x i16> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[HIGH2:%.*]] = shufflevector <8 x i16> [[TMPVAR2]], <8 x i16> [[PARAM1]], <4 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32> +; CHECK-NEXT: [[EXT2:%.*]] = sext <4 x i16> [[HIGH2]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[_MSLD]], [[_MSPROP2]] +; CHECK-NEXT: [[RES:%.*]] = add <4 x i32> [[TMPVAR1]], [[EXT2]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %tmpvar1 = load <4 x i32>, ptr %A + + %tmpvar2 = load <8 x i16>, ptr %B + %high2 = shufflevector <8 x i16> %tmpvar2, <8 x i16> %param1, <4 x i32> + %ext2 = sext <4 x i16> %high2 to <4 x i32> + + %res = add <4 x i32> %tmpvar1, %ext2 + ret <4 x i32> %res +} + +define <2 x i64> @saddw2_2d(ptr %A, ptr %B, <4 x i32> %param1) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @saddw2_2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], <4 x i32> [[PARAM1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD1]], <4 x i32> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[HIGH2:%.*]] = shufflevector <4 x i32> [[TMPVAR2]], <4 x i32> [[PARAM1]], <2 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64> +; CHECK-NEXT: [[EXT2:%.*]] = sext <2 x i32> [[HIGH2]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[_MSLD]], [[_MSPROP2]] +; CHECK-NEXT: [[RES:%.*]] = add <2 x i64> [[TMPVAR1]], [[EXT2]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %tmpvar1 = load <2 x i64>, ptr %A + + %tmpvar2 = load <4 x i32>, ptr %B + %high2 = shufflevector <4 x i32> %tmpvar2, <4 x i32> %param1, <2 x i32> + %ext2 = sext <2 x i32> %high2 to <2 x i64> + + %res = add <2 x i64> %tmpvar1, %ext2 + ret <2 x i64> %res +} + +define <4 x i16> @saddlp4h(ptr %A) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @saddlp4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmpvar1 = load <8 x i8>, ptr %A + %tmpvar3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmpvar1) + ret <4 x i16> %tmpvar3 +} + +define <2 x i32> @saddlp2s(ptr %A) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @saddlp2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmpvar1 = load <4 x i16>, ptr %A + %tmpvar3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmpvar1) + ret <2 x i32> %tmpvar3 +} + +define <1 x i64> @saddlp1d(ptr %A) nounwind #0 { +; CHECK-LABEL: define <1 x i64> @saddlp1d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]]) +; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmpvar1 = load <2 x i32>, ptr %A + %tmpvar3 = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmpvar1) + ret <1 x i64> %tmpvar3 +} + +define <8 x i16> @saddlp8h(ptr %A) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @saddlp8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmpvar1 = load <16 x i8>, ptr %A + %tmpvar3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmpvar1) + ret <8 x i16> %tmpvar3 +} + +define <4 x i32> @saddlp4s(ptr %A) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @saddlp4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmpvar1) + ret <4 x i32> %tmpvar3 +} + +define <2 x i64> @saddlp2d(ptr %A) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @saddlp2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmpvar1) + ret <2 x i64> %tmpvar3 +} + +declare <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind readnone + +define <4 x i16> @uaddlp4h(ptr %A) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @uaddlp4h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmpvar1 = load <8 x i8>, ptr %A + %tmpvar3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmpvar1) + ret <4 x i16> %tmpvar3 +} + +define <2 x i32> @uaddlp2s(ptr %A) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @uaddlp2s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmpvar1 = load <4 x i16>, ptr %A + %tmpvar3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmpvar1) + ret <2 x i32> %tmpvar3 +} + +define <1 x i64> @uaddlp1d(ptr %A) nounwind #0 { +; CHECK-LABEL: define <1 x i64> @uaddlp1d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[TMPVAR1]]) +; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP3]] +; + %tmpvar1 = load <2 x i32>, ptr %A + %tmpvar3 = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmpvar1) + ret <1 x i64> %tmpvar3 +} + +define <8 x i16> @uaddlp8h(ptr %A) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @uaddlp8h( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmpvar1 = load <16 x i8>, ptr %A + %tmpvar3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmpvar1) + ret <8 x i16> %tmpvar3 +} + +define <4 x i32> @uaddlp4s(ptr %A) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @uaddlp4s( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmpvar1) + ret <4 x i32> %tmpvar3 +} + +define <2 x i64> @uaddlp2d(ptr %A) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @uaddlp2d( +; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmpvar1) + ret <2 x i64> %tmpvar3 +} + +declare <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16>) nounwind readnone +declare <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind readnone + +define <4 x i16> @sadalp4h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @sadalp4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i16> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMPVAR5]] +; + %tmpvar1 = load <8 x i8>, ptr %A + %tmpvar3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmpvar1) + %tmpvar4 = load <4 x i16>, ptr %B + %tmpvar5 = add <4 x i16> %tmpvar3, %tmpvar4 + ret <4 x i16> %tmpvar5 +} + +define <2 x i32> @sadalp2s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @sadalp2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i32> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMPVAR5]] +; + %tmpvar1 = load <4 x i16>, ptr %A + %tmpvar3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmpvar1) + %tmpvar4 = load <2 x i32>, ptr %B + %tmpvar5 = add <2 x i32> %tmpvar3, %tmpvar4 + ret <2 x i32> %tmpvar5 +} + +define <8 x i16> @sadalp8h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @sadalp8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP15]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <8 x i16> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMPVAR5]] +; + %tmpvar1 = load <16 x i8>, ptr %A + %tmpvar3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmpvar1) + %tmpvar4 = load <8 x i16>, ptr %B + %tmpvar5 = add <8 x i16> %tmpvar3, %tmpvar4 + ret <8 x i16> %tmpvar5 +} + +define <4 x i32> @sadalp4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @sadalp4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP15]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i32> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMPVAR5]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmpvar1) + %tmpvar4 = load <4 x i32>, ptr %B + %tmpvar5 = add <4 x i32> %tmpvar3, %tmpvar4 + ret <4 x i32> %tmpvar5 +} + +define <2 x i64> @sadalp2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @sadalp2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP15]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i64> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMPVAR5]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmpvar1) + %tmpvar4 = load <2 x i64>, ptr %B + %tmpvar5 = add <2 x i64> %tmpvar3, %tmpvar4 + ret <2 x i64> %tmpvar5 +} + +define <4 x i16> @uadalp4h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @uadalp4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i16> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMPVAR5]] +; + %tmpvar1 = load <8 x i8>, ptr %A + %tmpvar3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmpvar1) + %tmpvar4 = load <4 x i16>, ptr %B + %tmpvar5 = add <4 x i16> %tmpvar3, %tmpvar4 + ret <4 x i16> %tmpvar5 +} + +define <2 x i32> @uadalp2s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @uadalp2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP15]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i32> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMPVAR5]] +; + %tmpvar1 = load <4 x i16>, ptr %A + %tmpvar3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmpvar1) + %tmpvar4 = load <2 x i32>, ptr %B + %tmpvar5 = add <2 x i32> %tmpvar3, %tmpvar4 + ret <2 x i32> %tmpvar5 +} + +define <8 x i16> @uadalp8h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @uadalp8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP15]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <8 x i16> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMPVAR5]] +; + %tmpvar1 = load <16 x i8>, ptr %A + %tmpvar3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmpvar1) + %tmpvar4 = load <8 x i16>, ptr %B + %tmpvar5 = add <8 x i16> %tmpvar3, %tmpvar4 + ret <8 x i16> %tmpvar5 +} + +define <4 x i32> @uadalp4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @uadalp4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP15]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <4 x i32> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMPVAR5]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmpvar1) + %tmpvar4 = load <4 x i32>, ptr %B + %tmpvar5 = add <4 x i32> %tmpvar3, %tmpvar4 + ret <4 x i32> %tmpvar5 +} + +define <2 x i64> @uadalp2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @uadalp2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[TMPVAR1]]) +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 193514046488576 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP15]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> zeroinitializer, [[_MSLD1]] +; CHECK-NEXT: [[TMPVAR5:%.*]] = add <2 x i64> [[TMPVAR3]], [[TMP4]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMPVAR5]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmpvar1) + %tmpvar4 = load <2 x i64>, ptr %B + %tmpvar5 = add <2 x i64> %tmpvar3, %tmpvar4 + ret <2 x i64> %tmpvar5 +} + +define <8 x i8> @addp_8b(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i8> @addp_8b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i8>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i8>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> [[TMPVAR1]], <8 x i8> [[TMPVAR2]]) +; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %tmpvar1 = load <8 x i8>, ptr %A + %tmpvar2 = load <8 x i8>, ptr %B + %tmpvar3 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %tmpvar1, <8 x i8> %tmpvar2) + ret <8 x i8> %tmpvar3 +} + +define <16 x i8> @addp_16b(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <16 x i8> @addp_16b( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <16 x i8>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <16 x i8>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> [[TMPVAR1]], <16 x i8> [[TMPVAR2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %tmpvar1 = load <16 x i8>, ptr %A + %tmpvar2 = load <16 x i8>, ptr %B + %tmpvar3 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %tmpvar1, <16 x i8> %tmpvar2) + ret <16 x i8> %tmpvar3 +} + +define <4 x i16> @addp_4h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @addp_4h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i16>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i16>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> [[TMPVAR1]], <4 x i16> [[TMPVAR2]]) +; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[TMP3]] +; + %tmpvar1 = load <4 x i16>, ptr %A + %tmpvar2 = load <4 x i16>, ptr %B + %tmpvar3 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %tmpvar1, <4 x i16> %tmpvar2) + ret <4 x i16> %tmpvar3 +} + +define <8 x i16> @addp_8h(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @addp_8h( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> [[TMPVAR1]], <8 x i16> [[TMPVAR2]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i16>, ptr %B + %tmpvar3 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %tmpvar1, <8 x i16> %tmpvar2) + ret <8 x i16> %tmpvar3 +} + +define <2 x i32> @addp_2s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @addp_2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i32>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i32>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> [[TMPVAR1]], <2 x i32> [[TMPVAR2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %tmpvar1 = load <2 x i32>, ptr %A + %tmpvar2 = load <2 x i32>, ptr %B + %tmpvar3 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %tmpvar1, <2 x i32> %tmpvar2) + ret <2 x i32> %tmpvar3 +} + +define <4 x i32> @addp_4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @addp_4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> [[TMPVAR1]], <4 x i32> [[TMPVAR2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i32>, ptr %B + %tmpvar3 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %tmpvar1, <4 x i32> %tmpvar2) + ret <4 x i32> %tmpvar3 +} + +define <2 x i64> @addp_2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i64> @addp_2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[TMPVAR1]], <2 x i64> [[TMPVAR2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %tmpvar3 = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %tmpvar1, <2 x i64> %tmpvar2) + ret <2 x i64> %tmpvar3 +} + +declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone +declare <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone +declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind readnone + +define <2 x float> @faddp_2s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x float> @faddp_2s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x float>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x float>, ptr [[B]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> [[TMPVAR1]], <2 x float> [[TMPVAR2]]) +; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x float> [[TMP3]] +; + %tmpvar1 = load <2 x float>, ptr %A + %tmpvar2 = load <2 x float>, ptr %B + %tmpvar3 = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %tmpvar1, <2 x float> %tmpvar2) + ret <2 x float> %tmpvar3 +} + +define <4 x float> @faddp_4s(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x float> @faddp_4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x float>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> [[TMPVAR1]], <4 x float> [[TMPVAR2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP3]] +; + %tmpvar1 = load <4 x float>, ptr %A + %tmpvar2 = load <4 x float>, ptr %B + %tmpvar3 = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %tmpvar1, <4 x float> %tmpvar2) + ret <4 x float> %tmpvar3 +} + +define <2 x double> @faddp_2d(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x double> @faddp_2d( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x double>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x double>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> [[TMPVAR1]], <2 x double> [[TMPVAR2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[TMP3]] +; + %tmpvar1 = load <2 x double>, ptr %A + %tmpvar2 = load <2 x double>, ptr %B + %tmpvar3 = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %tmpvar1, <2 x double> %tmpvar2) + ret <2 x double> %tmpvar3 +} + +declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>) nounwind readnone +declare <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float>, <4 x float>) nounwind readnone +declare <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double>, <2 x double>) nounwind readnone + +define <2 x i64> @uaddl_duprhs(<4 x i32> %lhs, i32 %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @uaddl_duprhs( +; CHECK-SAME: <4 x i32> [[LHS:%.*]], i32 [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[RHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[RHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[RHSVEC:%.*]] = insertelement <2 x i32> [[RHSVEC_TMP]], i32 [[RHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = zext <2 x i32> [[LHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = zext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = zext <2 x i32> [[RHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = add <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %rhsvec.tmp = insertelement <2 x i32> %param1, i32 %rhs, i32 0 + %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 + + %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64> + %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64> + + %res = add <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @uaddl2_duprhs( +; CHECK-SAME: <4 x i32> [[LHS:%.*]], i32 [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[RHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[RHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[RHSVEC:%.*]] = insertelement <2 x i32> [[RHSVEC_TMP]], i32 [[RHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = zext <2 x i32> [[LHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = zext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = zext <2 x i32> [[RHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = add <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %rhsvec.tmp = insertelement <2 x i32> %param1, i32 %rhs, i32 0 + %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 + + %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64> + %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64> + + %res = add <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <2 x i64> @saddl_duplhs(i32 %lhs, <4 x i32> %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @saddl_duplhs( +; CHECK-SAME: i32 [[LHS:%.*]], <4 x i32> [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[LHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[LHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[LHSVEC:%.*]] = insertelement <2 x i32> [[LHSVEC_TMP]], i32 [[LHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = sext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = sext <2 x i32> [[LHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = sext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = sext <2 x i32> [[RHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = add <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %lhsvec.tmp = insertelement <2 x i32> %param1, i32 %lhs, i32 0 + %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1 + + %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64> + %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64> + + %res = add <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <2 x i64> @saddl2_duplhs(i32 %lhs, <4 x i32> %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @saddl2_duplhs( +; CHECK-SAME: i32 [[LHS:%.*]], <4 x i32> [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[LHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[LHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[LHSVEC:%.*]] = insertelement <2 x i32> [[LHSVEC_TMP]], i32 [[LHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = sext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = sext <2 x i32> [[LHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = sext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = sext <2 x i32> [[RHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = add <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %lhsvec.tmp = insertelement <2 x i32> %param1, i32 %lhs, i32 0 + %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1 + + %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64> + %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64> + + %res = add <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <2 x i64> @usubl_duprhs(<4 x i32> %lhs, i32 %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @usubl_duprhs( +; CHECK-SAME: <4 x i32> [[LHS:%.*]], i32 [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[RHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[RHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[RHSVEC:%.*]] = insertelement <2 x i32> [[RHSVEC_TMP]], i32 [[RHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = zext <2 x i32> [[LHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = zext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = zext <2 x i32> [[RHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = sub <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %rhsvec.tmp = insertelement <2 x i32> %param1, i32 %rhs, i32 0 + %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 + + %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64> + %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64> + + %res = sub <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <2 x i64> @usubl2_duprhs(<4 x i32> %lhs, i32 %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @usubl2_duprhs( +; CHECK-SAME: <4 x i32> [[LHS:%.*]], i32 [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[RHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[RHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[RHSVEC:%.*]] = insertelement <2 x i32> [[RHSVEC_TMP]], i32 [[RHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = zext <2 x i32> [[LHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = zext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = zext <2 x i32> [[RHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = sub <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %rhsvec.tmp = insertelement <2 x i32> %param1, i32 %rhs, i32 0 + %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 + + %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = zext <2 x i32> %lhs.high to <2 x i64> + %rhs.ext = zext <2 x i32> %rhsvec to <2 x i64> + + %res = sub <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <2 x i64> @ssubl_duplhs(i32 %lhs, <4 x i32> %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @ssubl_duplhs( +; CHECK-SAME: i32 [[LHS:%.*]], <4 x i32> [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[LHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[LHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[LHSVEC:%.*]] = insertelement <2 x i32> [[LHSVEC_TMP]], i32 [[LHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = sext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = sext <2 x i32> [[LHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = sext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = sext <2 x i32> [[RHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = sub <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %lhsvec.tmp = insertelement <2 x i32> %param1, i32 %lhs, i32 0 + %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1 + + %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64> + %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64> + + %res = sub <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs, <2 x i32> %param1, <4 x i32> %param2) #0 { +; CHECK-LABEL: define <2 x i64> @ssubl2_duplhs( +; CHECK-SAME: i32 [[LHS:%.*]], <4 x i32> [[RHS:%.*]], <2 x i32> [[PARAM1:%.*]], <4 x i32> [[PARAM2:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[LHSVEC_TMP:%.*]] = insertelement <2 x i32> [[PARAM1]], i32 [[LHS]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i32> [[_MSPROP]], i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[LHSVEC:%.*]] = insertelement <2 x i32> [[LHSVEC_TMP]], i32 [[LHS]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> [[PARAM2]], <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = sext <2 x i32> [[_MSPROP1]] to <2 x i64> +; CHECK-NEXT: [[LHS_EXT:%.*]] = sext <2 x i32> [[LHSVEC]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP4:%.*]] = sext <2 x i32> [[_MSPROP2]] to <2 x i64> +; CHECK-NEXT: [[RHS_EXT:%.*]] = sext <2 x i32> [[RHS_HIGH]] to <2 x i64> +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES:%.*]] = sub <2 x i64> [[LHS_EXT]], [[RHS_EXT]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %lhsvec.tmp = insertelement <2 x i32> %param1, i32 %lhs, i32 0 + %lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1 + + %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> %param2, <2 x i32> + + %lhs.ext = sext <2 x i32> %lhsvec to <2 x i64> + %rhs.ext = sext <2 x i32> %rhs.high to <2 x i64> + + %res = sub <2 x i64> %lhs.ext, %rhs.ext + ret <2 x i64> %res +} + +define <8 x i8> @addhn8b_natural(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i8> @addhn8b_natural( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[SUM:%.*]] = add <8 x i16> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSPROP]], splat (i16 8) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <8 x i16> [[SUM]], splat (i16 8) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <8 x i16> [[HIGH_BITS]] to <8 x i8> +; CHECK-NEXT: store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[NARROWED]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i16>, ptr %B + %sum = add <8 x i16> %tmpvar1, %tmpvar2 + %high_bits = lshr <8 x i16> %sum, + %narrowed = trunc <8 x i16> %high_bits to <8 x i8> + ret <8 x i8> %narrowed +} + +define <4 x i16> @addhn4h_natural(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @addhn4h_natural( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[SUM:%.*]] = add <4 x i32> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSPROP]], splat (i32 16) +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <4 x i32> [[SUM]], splat (i32 16) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <4 x i32> [[HIGH_BITS]] to <4 x i16> +; CHECK-NEXT: store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[NARROWED]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i32>, ptr %B + %sum = add <4 x i32> %tmpvar1, %tmpvar2 + %high_bits = lshr <4 x i32> %sum, + %narrowed = trunc <4 x i32> %high_bits to <4 x i16> + ret <4 x i16> %narrowed +} + +define <2 x i32> @addhn2s_natural(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @addhn2s_natural( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[SUM:%.*]] = add <2 x i64> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSPROP]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <2 x i64> [[SUM]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <2 x i64> [[HIGH_BITS]] to <2 x i32> +; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[NARROWED]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %sum = add <2 x i64> %tmpvar1, %tmpvar2 + %high_bits = lshr <2 x i64> %sum, + %narrowed = trunc <2 x i64> %high_bits to <2 x i32> + ret <2 x i32> %narrowed +} + +define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <16 x i8> @addhn2_16b_natural( +; CHECK-SAME: <8 x i8> [[LOW:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP13]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[SUM:%.*]] = add <8 x i16> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[_MSPROP]], splat (i16 8) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <8 x i16> [[SUM]], splat (i16 8) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <8 x i16> [[HIGH_BITS]] to <8 x i8> +; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[_MSPROP2]], <16 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[LOW]], <8 x i8> [[NARROWED]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i16>, ptr %B + %sum = add <8 x i16> %tmpvar1, %tmpvar2 + %high_bits = lshr <8 x i16> %sum, + %narrowed = trunc <8 x i16> %high_bits to <8 x i8> + %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> + ret <16 x i8> %res +} + +define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @addhn2_8h_natural( +; CHECK-SAME: <4 x i16> [[LOW:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP13]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[SUM:%.*]] = add <4 x i32> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[_MSPROP]], splat (i32 16) +; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <4 x i32> [[SUM]], splat (i32 16) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <4 x i32> [[HIGH_BITS]] to <4 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[_MSPROP2]], <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[LOW]], <4 x i16> [[NARROWED]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i32>, ptr %B + %sum = add <4 x i32> %tmpvar1, %tmpvar2 + %high_bits = lshr <4 x i32> %sum, + %narrowed = trunc <4 x i32> %high_bits to <4 x i16> + %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> + ret <8 x i16> %res +} + +define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @addhn2_4s_natural( +; CHECK-SAME: <2 x i32> [[LOW:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP13]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[SUM:%.*]] = add <2 x i64> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr <2 x i64> [[_MSPROP]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <2 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <2 x i64> [[SUM]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <2 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <2 x i64> [[HIGH_BITS]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[_MSPROP2]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[LOW]], <2 x i32> [[NARROWED]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %sum = add <2 x i64> %tmpvar1, %tmpvar2 + %high_bits = lshr <2 x i64> %sum, + %narrowed = trunc <2 x i64> %high_bits to <2 x i32> + %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> + ret <4 x i32> %res +} + +define <4 x i32> @addhn_addhn2_4s(ptr %A, ptr %B, ptr %C, ptr %D) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @addhn_addhn2_4s( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 193514046488576 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP9]], align 16 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[SUM1:%.*]] = add <2 x i64> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP15:%.*]] = lshr <2 x i64> [[_MSPROP]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <2 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[LOW_BITS:%.*]] = lshr <2 x i64> [[SUM1]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <2 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[NARROWED1:%.*]] = trunc <2 x i64> [[LOW_BITS]] to <2 x i32> +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP9]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[TMPVAR3:%.*]] = load <2 x i64>, ptr [[C]], align 16 +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[C]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = xor i64 [[TMP19]], 193514046488576 +; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr +; CHECK-NEXT: [[_MSLD3:%.*]] = load <2 x i64>, ptr [[TMP21]], align 16 +; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP10]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]] +; CHECK: 22: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 23: +; CHECK-NEXT: [[TMPVAR4:%.*]] = load <2 x i64>, ptr [[D]], align 16 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[D]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], 193514046488576 +; CHECK-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP25]] to ptr +; CHECK-NEXT: [[_MSLD4:%.*]] = load <2 x i64>, ptr [[TMP26]], align 16 +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[_MSLD3]], [[_MSLD4]] +; CHECK-NEXT: [[SUM2:%.*]] = add <2 x i64> [[TMPVAR3]], [[TMPVAR4]] +; CHECK-NEXT: [[TMP27:%.*]] = lshr <2 x i64> [[_MSPROP]], splat (i64 32) +; CHECK-NEXT: [[TMP28:%.*]] = or <2 x i64> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <2 x i64> [[SUM1]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP6:%.*]] = trunc <2 x i64> [[TMP28]] to <2 x i32> +; CHECK-NEXT: [[NARROWED2:%.*]] = trunc <2 x i64> [[HIGH_BITS]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP7:%.*]] = shufflevector <2 x i32> [[_MSPROP2]], <2 x i32> [[_MSPROP6]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[NARROWED1]], <2 x i32> [[NARROWED2]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %sum1 = add <2 x i64> %tmpvar1, %tmpvar2 + %low_bits = lshr <2 x i64> %sum1, + %narrowed1 = trunc <2 x i64> %low_bits to <2 x i32> + %tmpvar3 = load <2 x i64>, ptr %C + %tmpvar4 = load <2 x i64>, ptr %D + %sum2 = add <2 x i64> %tmpvar3, %tmpvar4 + %high_bits = lshr <2 x i64> %sum1, + %narrowed2 = trunc <2 x i64> %high_bits to <2 x i32> + %res = shufflevector <2 x i32> %narrowed1, <2 x i32> %narrowed2, <4 x i32> + ret <4 x i32> %res +} + +define <8 x i8> @subhn8b_natural(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i8> @subhn8b_natural( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[DIFF:%.*]] = sub <8 x i16> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSPROP]], splat (i16 8) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <8 x i16> [[DIFF]], splat (i16 8) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <8 x i16> [[HIGH_BITS]] to <8 x i8> +; CHECK-NEXT: store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[NARROWED]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i16>, ptr %B + %diff = sub <8 x i16> %tmpvar1, %tmpvar2 + %high_bits = lshr <8 x i16> %diff, + %narrowed = trunc <8 x i16> %high_bits to <8 x i8> + ret <8 x i8> %narrowed +} + +define <4 x i16> @subhn4h_natural(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i16> @subhn4h_natural( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[DIFF:%.*]] = sub <4 x i32> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSPROP]], splat (i32 16) +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <4 x i32> [[DIFF]], splat (i32 16) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <4 x i32> [[HIGH_BITS]] to <4 x i16> +; CHECK-NEXT: store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i16> [[NARROWED]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i32>, ptr %B + %diff = sub <4 x i32> %tmpvar1, %tmpvar2 + %high_bits = lshr <4 x i32> %diff, + %narrowed = trunc <4 x i32> %high_bits to <4 x i16> + ret <4 x i16> %narrowed +} + +define <2 x i32> @subhn2s_natural(ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <2 x i32> @subhn2s_natural( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[DIFF:%.*]] = sub <2 x i64> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSPROP]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <2 x i64> [[DIFF]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <2 x i64> [[HIGH_BITS]] to <2 x i32> +; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i32> [[NARROWED]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %diff = sub <2 x i64> %tmpvar1, %tmpvar2 + %high_bits = lshr <2 x i64> %diff, + %narrowed = trunc <2 x i64> %high_bits to <2 x i32> + ret <2 x i32> %narrowed +} + +define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <16 x i8> @subhn2_16b_natural( +; CHECK-SAME: <8 x i8> [[LOW:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <8 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <8 x i16>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP13]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[DIFF:%.*]] = sub <8 x i16> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[_MSPROP]], splat (i16 8) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i16> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <8 x i16> [[DIFF]], splat (i16 8) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <8 x i16> [[HIGH_BITS]] to <8 x i8> +; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[_MSPROP2]], <16 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x i8> [[LOW]], <8 x i8> [[NARROWED]], <16 x i32> +; CHECK-NEXT: store <16 x i8> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %tmpvar1 = load <8 x i16>, ptr %A + %tmpvar2 = load <8 x i16>, ptr %B + %diff = sub <8 x i16> %tmpvar1, %tmpvar2 + %high_bits = lshr <8 x i16> %diff, + %narrowed = trunc <8 x i16> %high_bits to <8 x i8> + %res = shufflevector <8 x i8> %low, <8 x i8> %narrowed, <16 x i32> + ret <16 x i8> %res +} + +define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <8 x i16> @subhn2_8h_natural( +; CHECK-SAME: <4 x i16> [[LOW:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <4 x i32>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP13]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[DIFF:%.*]] = sub <4 x i32> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[_MSPROP]], splat (i32 16) +; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <4 x i32> [[DIFF]], splat (i32 16) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <4 x i32> [[HIGH_BITS]] to <4 x i16> +; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[_MSPROP2]], <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i16> [[LOW]], <4 x i16> [[NARROWED]], <8 x i32> +; CHECK-NEXT: store <8 x i16> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %tmpvar1 = load <4 x i32>, ptr %A + %tmpvar2 = load <4 x i32>, ptr %B + %diff = sub <4 x i32> %tmpvar1, %tmpvar2 + %high_bits = lshr <4 x i32> %diff, + %narrowed = trunc <4 x i32> %high_bits to <4 x i16> + %res = shufflevector <4 x i16> %low, <4 x i16> %narrowed, <8 x i32> + ret <8 x i16> %res +} + +define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind #0 { +; CHECK-LABEL: define <4 x i32> @subhn2_4s_natural( +; CHECK-SAME: <2 x i32> [[LOW:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMPVAR1:%.*]] = load <2 x i64>, ptr [[A]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMPVAR2:%.*]] = load <2 x i64>, ptr [[B]], align 16 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP13]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] +; CHECK-NEXT: [[DIFF:%.*]] = sub <2 x i64> [[TMPVAR1]], [[TMPVAR2]] +; CHECK-NEXT: [[TMP14:%.*]] = lshr <2 x i64> [[_MSPROP]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <2 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[HIGH_BITS:%.*]] = lshr <2 x i64> [[DIFF]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = trunc <2 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[NARROWED:%.*]] = trunc <2 x i64> [[HIGH_BITS]] to <2 x i32> +; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[_MSPROP2]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i32> [[LOW]], <2 x i32> [[NARROWED]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %tmpvar1 = load <2 x i64>, ptr %A + %tmpvar2 = load <2 x i64>, ptr %B + %diff = sub <2 x i64> %tmpvar1, %tmpvar2 + %high_bits = lshr <2 x i64> %diff, + %narrowed = trunc <2 x i64> %high_bits to <2 x i32> + %res = shufflevector <2 x i32> %low, <2 x i32> %narrowed, <4 x i32> + ret <4 x i32> %res +} + +attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;.