diff --git a/llvm/test/Instrumentation/MemorySanitizer/ARM32/vararg-arm32.ll b/llvm/test/Instrumentation/MemorySanitizer/ARM32/vararg-arm32.ll new file mode 100644 index 0000000000000..8c23d95422426 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/ARM32/vararg-arm32.ll @@ -0,0 +1,92 @@ +; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" +target triple = "mips64--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 32, ptr %vl) + call void @llvm.va_start(ptr %vl) + call void @llvm.va_end(ptr %vl) + call void @llvm.lifetime.end.p0(i64 32, ptr %vl) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[B]], i1 false) + +; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 800) +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 +declare void @llvm.va_start(ptr) #2 +declare void @llvm.va_end(ptr) #2 +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. The first argument is stored at position 4, since it's right +; justified. +; CHECK-LABEL: @bar +; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check multiple fixed arguments. +declare i32 @foo2(i32 %g1, i32 %g2, ...) +define i32 @bar2() { + %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; CHECK-LABEL: @bar2 +; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are +; passed to a variadic function. +define dso_local i64 @many_args() { +entry: + %ret = call i64 (i64, ...) @sum(i64 120, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 + ) + ret i64 %ret +} + +; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. +; CHECK-LABEL: @many_args +; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) +; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) +declare i64 @sum(i64 %n, ...) + +; CHECK: declare void @__msan_maybe_warning_1(i8 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_1(i8 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_2(i16 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_2(i16 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_4(i32 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_4(i32 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_8(i64 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_8(i64 signext, ptr, i32 signext) diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips32/vararg-mips.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips32/vararg-mips.ll new file mode 100644 index 0000000000000..8c23d95422426 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/Mips32/vararg-mips.ll @@ -0,0 +1,92 @@ +; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" +target triple = "mips64--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 32, ptr %vl) + call void @llvm.va_start(ptr %vl) + call void @llvm.va_end(ptr %vl) + call void @llvm.lifetime.end.p0(i64 32, ptr %vl) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[B]], i1 false) + +; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 800) +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 +declare void @llvm.va_start(ptr) #2 +declare void @llvm.va_end(ptr) #2 +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. The first argument is stored at position 4, since it's right +; justified. +; CHECK-LABEL: @bar +; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check multiple fixed arguments. +declare i32 @foo2(i32 %g1, i32 %g2, ...) +define i32 @bar2() { + %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; CHECK-LABEL: @bar2 +; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are +; passed to a variadic function. +define dso_local i64 @many_args() { +entry: + %ret = call i64 (i64, ...) @sum(i64 120, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 + ) + ret i64 %ret +} + +; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. +; CHECK-LABEL: @many_args +; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) +; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) +declare i64 @sum(i64 %n, ...) + +; CHECK: declare void @__msan_maybe_warning_1(i8 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_1(i8 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_2(i16 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_2(i16 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_4(i32 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_4(i32 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_8(i64 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_8(i64 signext, ptr, i32 signext) diff --git a/llvm/test/Instrumentation/MemorySanitizer/Mips32/vararg-mipsel.ll b/llvm/test/Instrumentation/MemorySanitizer/Mips32/vararg-mipsel.ll new file mode 100644 index 0000000000000..17f4b826be0be --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/Mips32/vararg-mipsel.ll @@ -0,0 +1,82 @@ +; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" +target triple = "mips64el--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 32, ptr %vl) + call void @llvm.va_start(ptr %vl) + call void @llvm.va_end(ptr %vl) + call void @llvm.lifetime.end.p0(i64 32, ptr %vl) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[B]], i1 false) + +; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 800) +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 +declare void @llvm.va_start(ptr) #2 +declare void @llvm.va_end(ptr) #2 +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. +; CHECK-LABEL: @bar +; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check multiple fixed arguments. +declare i32 @foo2(i32 %g1, i32 %g2, ...) +define i32 @bar2() { + %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; CHECK-LABEL: @bar2 +; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are +; passed to a variadic function. +define dso_local i64 @many_args() { +entry: + %ret = call i64 (i64, ...) @sum(i64 120, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 + ) + ret i64 %ret +} + +; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. +; CHECK-LABEL: @many_args +; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) +; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) +declare i64 @sum(i64 %n, ...) diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/kernel-ppcle.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/kernel-ppcle.ll new file mode 100644 index 0000000000000..b4e472a134abd --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/kernel-ppcle.ll @@ -0,0 +1,149 @@ +; RUN: opt < %s -S -msan-kernel=1 -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le--linux" + +define void @Store1(ptr %p, i8 %x) sanitize_memory { +entry: + store i8 %x, ptr %p + ret void +} + +; CHECK-LABEL: define {{[^@]+}}@Store1( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_1(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: store i8 {{.+}}, ptr [[SHADOW]] +; CHECK: ret void + +define void @Store2(ptr %p, i16 %x) sanitize_memory { +entry: + store i16 %x, ptr %p + ret void +} + +; CHECK-LABEL: define {{[^@]+}}@Store2( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_2(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: store i16 {{.+}}, ptr [[SHADOW]] +; CHECK: ret void + +define void @Store4(ptr %p, i32 %x) sanitize_memory { +entry: + store i32 %x, ptr %p + ret void +} + +; CHECK-LABEL: define {{[^@]+}}@Store4( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_4(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: store i32 {{.+}}, ptr [[SHADOW]] +; CHECK: ret void + +define void @Store8(ptr %p, i64 %x) sanitize_memory { +entry: + store i64 %x, ptr %p + ret void +} + +; CHECK-LABEL: define {{[^@]+}}@Store8( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: store i64 {{.+}}, ptr [[SHADOW]] +; CHECK: ret void + +define void @Store16(ptr %p, i128 %x) sanitize_memory { +entry: + store i128 %x, ptr %p + ret void +} + +; CHECK-LABEL: define {{[^@]+}}@Store16( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_store_n(ptr %p, i64 16) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: store i128 {{.+}}, ptr [[SHADOW]] +; CHECK: ret void + +define i8 @Load1(ptr %p) sanitize_memory { +entry: + %0 = load i8, ptr %p + ret i8 %0 +} + +; CHECK-LABEL: define {{[^@]+}}@Load1( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_1(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: [[SHADOW_VAL:%[a-z0-9_]+]] = load i8, ptr [[SHADOW]] +; CHECK: [[ORIGIN_VAL:%[a-z0-9_]+]] = load i32, ptr [[ORIGIN]] +; CHECK: store i8 [[SHADOW_VAL]], ptr %retval_shadow +; CHECK: store i32 [[ORIGIN_VAL]], ptr %retval_origin +; CHECK: ret i8 {{.+}} + +define i16 @Load2(ptr %p) sanitize_memory { +entry: + %0 = load i16, ptr %p + ret i16 %0 +} + +; CHECK-LABEL: define {{[^@]+}}@Load2( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_2(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: [[SHADOW_VAL:%[a-z0-9_]+]] = load i16, ptr [[SHADOW]] +; CHECK: [[ORIGIN_VAL:%[a-z0-9_]+]] = load i32, ptr [[ORIGIN]] +; CHECK: store i16 [[SHADOW_VAL]], ptr %retval_shadow +; CHECK: store i32 [[ORIGIN_VAL]], ptr %retval_origin +; CHECK: ret i16 {{.+}} + +define i32 @Load4(ptr %p) sanitize_memory { +entry: + %0 = load i32, ptr %p + ret i32 %0 +} + +; CHECK-LABEL: define {{[^@]+}}@Load4( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_4(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: [[SHADOW_VAL:%[a-z0-9_]+]] = load i32, ptr [[SHADOW]] +; CHECK: [[ORIGIN_VAL:%[a-z0-9_]+]] = load i32, ptr [[ORIGIN]] +; CHECK: store i32 [[SHADOW_VAL]], ptr %retval_shadow +; CHECK: store i32 [[ORIGIN_VAL]], ptr %retval_origin +; CHECK: ret i32 {{.+}} + +define i64 @Load8(ptr %p) sanitize_memory { +entry: + %0 = load i64, ptr %p + ret i64 %0 +} + +; CHECK-LABEL: define {{[^@]+}}@Load8( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr %p) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: [[SHADOW_VAL:%[a-z0-9_]+]] = load i64, ptr [[SHADOW]] +; CHECK: [[ORIGIN_VAL:%[a-z0-9_]+]] = load i32, ptr [[ORIGIN]] +; CHECK: store i64 [[SHADOW_VAL]], ptr %retval_shadow +; CHECK: store i32 [[ORIGIN_VAL]], ptr %retval_origin +; CHECK: ret i64 {{.+}} + +define i128 @Load16(ptr %p) sanitize_memory { +entry: + %0 = load i128, ptr %p + ret i128 %0 +} + +; CHECK-LABEL: define {{[^@]+}}@Load16( +; CHECK: [[META:%[a-z0-9_]+]] = call { ptr, ptr } @__msan_metadata_ptr_for_load_n(ptr %p, i64 16) +; CHECK: [[SHADOW:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 0 +; CHECK: [[ORIGIN:%[a-z0-9_]+]] = extractvalue { ptr, ptr } [[META]], 1 +; CHECK: [[SHADOW_VAL:%[a-z0-9_]+]] = load i128, ptr [[SHADOW]] +; CHECK: [[ORIGIN_VAL:%[a-z0-9_]+]] = load i32, ptr [[ORIGIN]] +; CHECK: store i128 [[SHADOW_VAL]], ptr %retval_shadow +; CHECK: store i32 [[ORIGIN_VAL]], ptr %retval_origin +; CHECK: ret i128 {{.+}} diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/vararg-ppc.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/vararg-ppc.ll new file mode 100644 index 0000000000000..db09c5a477186 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/vararg-ppc.ll @@ -0,0 +1,125 @@ +; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 32, ptr %vl) + call void @llvm.va_start(ptr %vl) + call void @llvm.va_end(ptr %vl) + call void @llvm.lifetime.end.p0(i64 32, ptr %vl) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[B]], i1 false) + +; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 800) +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 +declare void @llvm.va_start(ptr) #2 +declare void @llvm.va_end(ptr) #2 +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. The first argument is stored at position 4, since it's right +; justified. +; CHECK-LABEL: @bar +; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check vector argument. +define i32 @bar2() { + %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> ) + ret i32 %1 +} + +; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls +; corresponds to offset 8+ of parameter save area - so the offset from +; __msan_va_arg_tls is actually misaligned. +; CHECK-LABEL: @bar2 +; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i64 array. +define i32 @bar4() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar4 +; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i128 array. +define i32 @bar5() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar5 +; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 8-aligned byval. +define i32 @bar6(ptr %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar6 +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false) +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 16-aligned byval. +define i32 @bar7(ptr %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar7 +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false) +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + + +; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are +; passed to a variadic function. +define dso_local i64 @many_args() { +entry: + %ret = call i64 (i64, ...) @sum(i64 120, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 + ) + ret i64 %ret +} + +; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. +; CHECK-LABEL: @many_args +; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) +; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) +declare i64 @sum(i64 %n, ...) diff --git a/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/vararg-ppcle.ll b/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/vararg-ppcle.ll new file mode 100644 index 0000000000000..63e11dc7cadd0 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/PowerPC32/vararg-ppcle.ll @@ -0,0 +1,123 @@ +; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 32, ptr %vl) + call void @llvm.va_start(ptr %vl) + call void @llvm.va_end(ptr %vl) + call void @llvm.lifetime.end.p0(i64 32, ptr %vl) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[B]], i1 false) + +; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 800) +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 +declare void @llvm.va_start(ptr) #2 +declare void @llvm.va_end(ptr) #2 +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. +; CHECK-LABEL: @bar +; CHECK: store i32 0, ptr @__msan_va_arg_tls, align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check vector argument. +define i32 @bar2() { + %1 = call i32 (i32, ...) @foo(i32 0, <2 x i64> ) + ret i32 %1 +} + +; The vector is at offset 16 of parameter save area, but __msan_va_arg_tls +; corresponds to offset 8+ of parameter save area - so the offset from +; __msan_va_arg_tls is actually misaligned. +; CHECK-LABEL: @bar2 +; CHECK: store <2 x i64> zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i64 array. +define i32 @bar4() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i64] [i64 1, i64 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar4 +; CHECK: store [2 x i64] zeroinitializer, ptr @__msan_va_arg_tls, align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check i128 array. +define i32 @bar5() { + %1 = call i32 (i32, ...) @foo(i32 0, [2 x i128] [i128 1, i128 2]) + ret i32 %1 +} + +; CHECK-LABEL: @bar5 +; CHECK: store [2 x i128] zeroinitializer, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 8-aligned byval. +define i32 @bar6(ptr %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([2 x i64]) align 8 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar6 +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_va_arg_tls, ptr align 8 {{.*}}, i64 16, i1 false) +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check 16-aligned byval. +define i32 @bar7(ptr %arg) { + %1 = call i32 (i32, ...) @foo(i32 0, ptr byval([4 x i64]) align 16 %arg) + ret i32 %1 +} + +; CHECK-LABEL: @bar7 +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), ptr align 8 {{.*}}, i64 32, i1 false) +; CHECK: store {{.*}} 40, {{.*}} @__msan_va_arg_overflow_size_tls + +; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are +; passed to a variadic function. +define dso_local i64 @many_args() { +entry: + %ret = call i64 (i64, ...) @sum(i64 120, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 + ) + ret i64 %ret +} + +; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. +; CHECK-LABEL: @many_args +; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) +; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) +declare i64 @sum(i64 %n, ...) diff --git a/llvm/test/Instrumentation/MemorySanitizer/RISCV32/vararg-riscv32.ll b/llvm/test/Instrumentation/MemorySanitizer/RISCV32/vararg-riscv32.ll new file mode 100644 index 0000000000000..8c23d95422426 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/RISCV32/vararg-riscv32.ll @@ -0,0 +1,92 @@ +; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128" +target triple = "mips64--linux" + +define i32 @foo(i32 %guard, ...) { + %vl = alloca ptr, align 8 + call void @llvm.lifetime.start.p0(i64 32, ptr %vl) + call void @llvm.va_start(ptr %vl) + call void @llvm.va_end(ptr %vl) + call void @llvm.lifetime.end.p0(i64 32, ptr %vl) + ret i32 0 +} + +; First, check allocation of the save area. + +; CHECK-LABEL: @foo +; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls +; CHECK: [[B:%.*]] = add i64 0, [[A]] +; CHECK: [[C:%.*]] = alloca {{.*}} [[B]] + +; CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[C]], i8 0, i64 [[B]], i1 false) + +; CHECK: [[D:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 800) +; CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C]], ptr align 8 @__msan_va_arg_tls, i64 [[D]], i1 false) + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 +declare void @llvm.va_start(ptr) #2 +declare void @llvm.va_end(ptr) #2 +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 + +define i32 @bar() { + %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; Save the incoming shadow value from the arguments in the __msan_va_arg_tls +; array. The first argument is stored at position 4, since it's right +; justified. +; CHECK-LABEL: @bar +; CHECK: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 4) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK: store {{.*}} 24, {{.*}} @__msan_va_arg_overflow_size_tls + +; Check multiple fixed arguments. +declare i32 @foo2(i32 %g1, i32 %g2, ...) +define i32 @bar2() { + %1 = call i32 (i32, i32, ...) @foo2(i32 0, i32 1, i64 2, double 3.000000e+00) + ret i32 %1 +} + +; CHECK-LABEL: @bar2 +; CHECK: store i64 0, ptr @__msan_va_arg_tls, align 8 +; CHECK: store i64 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8 +; CHECK: store {{.*}} 16, {{.*}} @__msan_va_arg_overflow_size_tls + +; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are +; passed to a variadic function. +define dso_local i64 @many_args() { +entry: + %ret = call i64 (i64, ...) @sum(i64 120, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 + ) + ret i64 %ret +} + +; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. +; CHECK-LABEL: @many_args +; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) +; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) +declare i64 @sum(i64 %n, ...) + +; CHECK: declare void @__msan_maybe_warning_1(i8 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_1(i8 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_2(i16 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_2(i16 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_4(i32 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_4(i32 signext, ptr, i32 signext) +; CHECK: declare void @__msan_maybe_warning_8(i64 signext, i32 signext) +; CHECK: declare void @__msan_maybe_store_origin_8(i64 signext, ptr, i32 signext) diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll new file mode 100644 index 0000000000000..3d5e4005de5b1 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-x86.ll @@ -0,0 +1,1457 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_addsub_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_addsub_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_blendv_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i64> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i64> [[TMP14]], <4 x i64> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> [[A0]], <4 x double> [[A1]], <4 x double> [[A2]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_blendv_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i32> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i32> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[TMP14]], <8 x i32> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A0]], <8 x float> [[A1]], <8 x float> [[A2]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_cmp_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + + +define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_cmp_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} + +define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_cmp_ps_256_pseudo_op( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i32> +; CHECK-NEXT: [[A2:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 0) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP1]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <8 x i32> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <8 x i1> [[TMP7]] to <8 x i32> +; CHECK-NEXT: [[A3:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A2]], i8 1) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP1]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <8 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i32> +; CHECK-NEXT: [[A4:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A3]], i8 2) +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i32> [[TMP1]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <8 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = sext <8 x i1> [[TMP13]] to <8 x i32> +; CHECK-NEXT: [[A5:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A4]], i8 3) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i32> [[TMP1]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <8 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <8 x i1> [[TMP16]] to <8 x i32> +; CHECK-NEXT: [[A6:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A5]], i8 4) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i32> [[TMP1]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32> +; CHECK-NEXT: [[A7:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A6]], i8 5) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i32> [[TMP1]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne <8 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP22]] to <8 x i32> +; CHECK-NEXT: [[A8:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A7]], i8 6) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i32> [[TMP1]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i32> +; CHECK-NEXT: [[A9:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A8]], i8 7) +; CHECK-NEXT: [[TMP27:%.*]] = or <8 x i32> [[TMP1]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne <8 x i32> [[TMP27]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP28]] to <8 x i32> +; CHECK-NEXT: [[A10:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A9]], i8 8) +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i32> [[TMP1]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <8 x i32> [[TMP30]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = sext <8 x i1> [[TMP31]] to <8 x i32> +; CHECK-NEXT: [[A11:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A10]], i8 9) +; CHECK-NEXT: [[TMP33:%.*]] = or <8 x i32> [[TMP1]], [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <8 x i32> [[TMP33]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i32> +; CHECK-NEXT: [[A12:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A11]], i8 10) +; CHECK-NEXT: [[TMP36:%.*]] = or <8 x i32> [[TMP1]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <8 x i32> [[TMP36]], zeroinitializer +; CHECK-NEXT: [[TMP38:%.*]] = sext <8 x i1> [[TMP37]] to <8 x i32> +; CHECK-NEXT: [[A13:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A12]], i8 11) +; CHECK-NEXT: [[TMP39:%.*]] = or <8 x i32> [[TMP1]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = icmp ne <8 x i32> [[TMP39]], zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = sext <8 x i1> [[TMP40]] to <8 x i32> +; CHECK-NEXT: [[A14:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A13]], i8 12) +; CHECK-NEXT: [[TMP42:%.*]] = or <8 x i32> [[TMP1]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <8 x i32> [[TMP42]], zeroinitializer +; CHECK-NEXT: [[TMP44:%.*]] = sext <8 x i1> [[TMP43]] to <8 x i32> +; CHECK-NEXT: [[A15:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A14]], i8 13) +; CHECK-NEXT: [[TMP45:%.*]] = or <8 x i32> [[TMP1]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i32> [[TMP45]], zeroinitializer +; CHECK-NEXT: [[TMP47:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i32> +; CHECK-NEXT: [[A16:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A15]], i8 14) +; CHECK-NEXT: [[TMP48:%.*]] = or <8 x i32> [[TMP1]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = icmp ne <8 x i32> [[TMP48]], zeroinitializer +; CHECK-NEXT: [[TMP50:%.*]] = sext <8 x i1> [[TMP49]] to <8 x i32> +; CHECK-NEXT: [[A17:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A16]], i8 15) +; CHECK-NEXT: [[TMP51:%.*]] = or <8 x i32> [[TMP1]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp ne <8 x i32> [[TMP51]], zeroinitializer +; CHECK-NEXT: [[TMP53:%.*]] = sext <8 x i1> [[TMP52]] to <8 x i32> +; CHECK-NEXT: [[A18:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A17]], i8 16) +; CHECK-NEXT: [[TMP54:%.*]] = or <8 x i32> [[TMP1]], [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = icmp ne <8 x i32> [[TMP54]], zeroinitializer +; CHECK-NEXT: [[TMP56:%.*]] = sext <8 x i1> [[TMP55]] to <8 x i32> +; CHECK-NEXT: [[A19:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A18]], i8 17) +; CHECK-NEXT: [[TMP57:%.*]] = or <8 x i32> [[TMP1]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = icmp ne <8 x i32> [[TMP57]], zeroinitializer +; CHECK-NEXT: [[TMP59:%.*]] = sext <8 x i1> [[TMP58]] to <8 x i32> +; CHECK-NEXT: [[A20:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A19]], i8 18) +; CHECK-NEXT: [[TMP60:%.*]] = or <8 x i32> [[TMP1]], [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = icmp ne <8 x i32> [[TMP60]], zeroinitializer +; CHECK-NEXT: [[TMP62:%.*]] = sext <8 x i1> [[TMP61]] to <8 x i32> +; CHECK-NEXT: [[A21:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A20]], i8 19) +; CHECK-NEXT: [[TMP63:%.*]] = or <8 x i32> [[TMP1]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i32> [[TMP63]], zeroinitializer +; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i32> +; CHECK-NEXT: [[A22:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A21]], i8 20) +; CHECK-NEXT: [[TMP66:%.*]] = or <8 x i32> [[TMP1]], [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = icmp ne <8 x i32> [[TMP66]], zeroinitializer +; CHECK-NEXT: [[TMP68:%.*]] = sext <8 x i1> [[TMP67]] to <8 x i32> +; CHECK-NEXT: [[A23:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A22]], i8 21) +; CHECK-NEXT: [[TMP69:%.*]] = or <8 x i32> [[TMP1]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = icmp ne <8 x i32> [[TMP69]], zeroinitializer +; CHECK-NEXT: [[TMP71:%.*]] = sext <8 x i1> [[TMP70]] to <8 x i32> +; CHECK-NEXT: [[A24:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A23]], i8 22) +; CHECK-NEXT: [[TMP72:%.*]] = or <8 x i32> [[TMP1]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne <8 x i32> [[TMP72]], zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = sext <8 x i1> [[TMP73]] to <8 x i32> +; CHECK-NEXT: [[A25:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A24]], i8 23) +; CHECK-NEXT: [[TMP75:%.*]] = or <8 x i32> [[TMP1]], [[TMP74]] +; CHECK-NEXT: [[TMP76:%.*]] = icmp ne <8 x i32> [[TMP75]], zeroinitializer +; CHECK-NEXT: [[TMP77:%.*]] = sext <8 x i1> [[TMP76]] to <8 x i32> +; CHECK-NEXT: [[A26:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A25]], i8 24) +; CHECK-NEXT: [[TMP78:%.*]] = or <8 x i32> [[TMP1]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = icmp ne <8 x i32> [[TMP78]], zeroinitializer +; CHECK-NEXT: [[TMP80:%.*]] = sext <8 x i1> [[TMP79]] to <8 x i32> +; CHECK-NEXT: [[A27:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A26]], i8 25) +; CHECK-NEXT: [[TMP81:%.*]] = or <8 x i32> [[TMP1]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = icmp ne <8 x i32> [[TMP81]], zeroinitializer +; CHECK-NEXT: [[TMP83:%.*]] = sext <8 x i1> [[TMP82]] to <8 x i32> +; CHECK-NEXT: [[A28:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A27]], i8 26) +; CHECK-NEXT: [[TMP84:%.*]] = or <8 x i32> [[TMP1]], [[TMP83]] +; CHECK-NEXT: [[TMP85:%.*]] = icmp ne <8 x i32> [[TMP84]], zeroinitializer +; CHECK-NEXT: [[TMP86:%.*]] = sext <8 x i1> [[TMP85]] to <8 x i32> +; CHECK-NEXT: [[A29:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A28]], i8 27) +; CHECK-NEXT: [[TMP87:%.*]] = or <8 x i32> [[TMP1]], [[TMP86]] +; CHECK-NEXT: [[TMP88:%.*]] = icmp ne <8 x i32> [[TMP87]], zeroinitializer +; CHECK-NEXT: [[TMP89:%.*]] = sext <8 x i1> [[TMP88]] to <8 x i32> +; CHECK-NEXT: [[A30:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A29]], i8 28) +; CHECK-NEXT: [[TMP90:%.*]] = or <8 x i32> [[TMP1]], [[TMP89]] +; CHECK-NEXT: [[TMP91:%.*]] = icmp ne <8 x i32> [[TMP90]], zeroinitializer +; CHECK-NEXT: [[TMP92:%.*]] = sext <8 x i1> [[TMP91]] to <8 x i32> +; CHECK-NEXT: [[A31:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A30]], i8 29) +; CHECK-NEXT: [[TMP93:%.*]] = or <8 x i32> [[TMP1]], [[TMP92]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp ne <8 x i32> [[TMP93]], zeroinitializer +; CHECK-NEXT: [[TMP95:%.*]] = sext <8 x i1> [[TMP94]] to <8 x i32> +; CHECK-NEXT: [[A32:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A31]], i8 30) +; CHECK-NEXT: [[TMP96:%.*]] = or <8 x i32> [[TMP1]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = icmp ne <8 x i32> [[TMP96]], zeroinitializer +; CHECK-NEXT: [[TMP98:%.*]] = sext <8 x i1> [[TMP97]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A32]], i8 31) +; CHECK-NEXT: store <8 x i32> [[TMP98]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] + %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] + %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] + %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] + %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] + %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] + %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] + %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] + %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] + %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] + %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] + %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] + %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] + %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] + %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] + %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] + %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] + %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] + %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] + %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] + %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] + %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] + %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] + %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] + %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] + %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] + %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] + %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] + %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] + %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] + %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvt_pd2_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvt_pd2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvt_ps2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone + + +define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvtt_pd2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone + + +define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_cvtt_ps2dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone + + +define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_dp_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> , <8 x i32> [[TMP3]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP4]]) +; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <8 x i1> zeroinitializer, <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> , <8 x i32> [[TMP3]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP7]]) +; CHECK-NEXT: [[_MSDPP1:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[_MSDPP1]], <8 x i1> zeroinitializer, <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[_MSDPP2:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 -18) +; CHECK-NEXT: store <8 x i32> [[_MSDPP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 -18) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + + +define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hadd_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hadd_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hsub_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_hsub_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 { +; CHECK-LABEL: @test_x86_avx_ldu_dq_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A0:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr [[A0]]) +; CHECK-NEXT: store <32 x i8> [[_MSLD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr %a0) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr) nounwind readonly + + +define <2 x double> @test_x86_avx_maskload_pd(ptr %a0, <2 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.maskload.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx.maskload.pd(ptr %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.maskload.pd(ptr, <2 x i64>) nounwind readonly + + +define <4 x double> @test_x86_avx_maskload_pd_256(ptr %a0, <4 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]]) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.maskload.pd.256(ptr, <4 x i64>) nounwind readonly + + +define <4 x float> @test_x86_avx_maskload_ps(ptr %a0, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.maskload.ps(ptr [[A0:%.*]], <4 x i32> [[MASK:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx.maskload.ps(ptr %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.maskload.ps(ptr, <4 x i32>) nounwind readonly + + +define <8 x float> @test_x86_avx_maskload_ps_256(ptr %a0, <8 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx_maskload_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr [[A0:%.*]], <8 x i32> [[MASK:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>) nounwind readonly + + +define void @test_x86_avx_maskstore_pd(ptr %a0, <2 x i64> %mask, <2 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]], <2 x double> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.pd(ptr %a0, <2 x i64> %mask, <2 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd(ptr, <2 x i64>, <2 x double>) nounwind + + +define void @test_x86_avx_maskstore_pd_256(ptr %a0, <4 x i64> %mask, <4 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]], <4 x double> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.pd.256(ptr %a0, <4 x i64> %mask, <4 x double> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.pd.256(ptr, <4 x i64>, <4 x double>) nounwind + + +define void @test_x86_avx_maskstore_ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps(ptr [[A0:%.*]], <4 x i32> [[MASK:%.*]], <4 x float> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps(ptr, <4 x i32>, <4 x float>) nounwind + + +define void @test_x86_avx_maskstore_ps_256(ptr %a0, <8 x i32> %mask, <8 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_avx_maskstore_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps.256(ptr [[A0:%.*]], <8 x i32> [[MASK:%.*]], <8 x float> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.maskstore.ps.256(ptr %a0, <8 x i32> %mask, <8 x float> %a2) + ret void +} +declare void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>) nounwind + + +define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_max_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_max_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_min_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_min_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_movmsk_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone + + +define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_movmsk_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone + + +define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_ptestc_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_ptestnzc_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_ptestz_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_rcp_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone + + +define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_round_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_round_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone + + +define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_rsqrt_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone + +define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone + + +define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone + +define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256_2( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> ) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> ) ; <<4 x double>> [#uses=1] + ret <4 x double> %res +} + +define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A2:%.*]] = load <4 x i32>, ptr [[A1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A2]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a2 = load <4 x i32>, ptr %a1 + %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone + + +define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestc_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestnzc_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx_vtestz_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone + + +define void @test_x86_avx_vzeroall() #0 { +; CHECK-LABEL: @test_x86_avx_vzeroall( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @llvm.x86.avx.vzeroall() +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.vzeroall() + ret void +} +declare void @llvm.x86.avx.vzeroall() nounwind + + +define void @test_x86_avx_vzeroupper() #0 { +; CHECK-LABEL: @test_x86_avx_vzeroupper( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @llvm.x86.avx.vzeroupper() +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx.vzeroupper() + ret void +} +declare void @llvm.x86.avx.vzeroupper() nounwind + +define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 { +; CHECK-LABEL: @movnt_dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> , <4 x i32> +; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr [[TMP7]], align 32 +; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META1:![0-9]+]] +; CHECK-NEXT: ret void +; + %a2 = add <2 x i64> %a1, + %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> + tail call void @llvm.x86.avx.movnt.dq.256(ptr %p, <4 x i64> %a3) nounwind + ret void +} +declare void @llvm.x86.avx.movnt.dq.256(ptr, <4 x i64>) nounwind + +define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 { +; CHECK-LABEL: @movnt_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP7]], align 32 +; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META1]] +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.avx.movnt.ps.256(ptr %p, <8 x float> %a) nounwind + ret void +} +declare void @llvm.x86.avx.movnt.ps.256(ptr, <8 x float>) nounwind + +define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 { + ; add operation forces the execution domain. +; CHECK-LABEL: @movnt_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A2:%.*]] = fadd <4 x double> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr [[TMP7]], align 32 +; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META1]] +; CHECK-NEXT: ret void +; + %a2 = fadd <4 x double> %a1, + tail call void @llvm.x86.avx.movnt.pd.256(ptr %p, <4 x double> %a2) nounwind + ret void +} +declare void @llvm.x86.avx.movnt.pd.256(ptr, <4 x double>) nounwind + + +define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_pclmulqdq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], i8 0) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll new file mode 100644 index 0000000000000..5efb7eb407898 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-x86.ll @@ -0,0 +1,2154 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packssdw( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_packssdw_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packssdw_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> ) + ret <16 x i16> %res +} + + +define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packsswb( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_packsswb_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packsswb_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> , <16 x i16> zeroinitializer) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> , <16 x i16> zeroinitializer) + ret <32 x i8> %res +} + + +define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packuswb( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_packuswb_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packuswb_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> , <16 x i16> zeroinitializer) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> , <16 x i16> zeroinitializer) + ret <32 x i8> %res +} + + +define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pavg_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pavg_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmadd_wd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone + + +define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pmovmskb( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i8> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmulh_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmulhu_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psad_bw( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psll_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psll_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psll_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pslli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pslli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_pslli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psra_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psra_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrai_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrai_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) #0 { +; CHECK-LABEL: @test_x86_avx2_psrl_w_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i256 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i256 [[TMP11]] to <16 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP2]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i16> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <16 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %a1 = load <8 x i16>, ptr %p + %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} + + +define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_avx2_psrli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone + + +define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phadd_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phadd_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phadd_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phsub_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phsub_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_phsub_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone + +define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw_load_op0( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, ptr [[PTR:%.*]], align 32 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %a0 = load <32 x i8>, ptr %ptr + %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} + +define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pmul_hr_sw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pshuf_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone + + +define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psign_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psign_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psign_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_mpsadbw( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i8> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone + +define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_mpsadbw_load_op0( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, ptr [[PTR:%.*]], align 32 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[_MSLD]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %a0 = load <32 x i8>, ptr %ptr + %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} + +define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_packusdw( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_packusdw_fold() #0 { +; CHECK-LABEL: @test_x86_avx2_packusdw_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> ) + ret <16 x i16> %res +} + + +define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendvb( +; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i8> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> [[TMP11]], <32 x i8> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0]], <32 x i8> [[A1]], <32 x i8> [[A2]]) +; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i8> [[RES]] +; + %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone + + +define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendd_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP3]] +; + %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_pblendd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone + + +define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_permd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly + + +define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_permps( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly + + +define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_q( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.maskload.q(ptr, <2 x i64>) nounwind readonly + + +define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr [[A0:%.*]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr, <4 x i64>) nounwind readonly + + +define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_d( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.maskload.d(ptr, <4 x i32>) nounwind readonly + + +define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_maskload_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr [[A0:%.*]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) nounwind readonly + + +define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_q( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q(ptr [[A0:%.*]], <2 x i64> [[A1:%.*]], <2 x i64> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.q(ptr, <2 x i64>, <2 x i64>) nounwind + + +define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q.256(ptr [[A0:%.*]], <4 x i64> [[A1:%.*]], <4 x i64> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.q.256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.q.256(ptr, <4 x i64>, <4 x i64>) nounwind + + +define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_d( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d(ptr [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.d(ptr, <4 x i32>, <4 x i32>) nounwind + + +define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2) #0 { +; CHECK-LABEL: @test_x86_avx2_maskstore_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d.256(ptr [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> [[A2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx2.maskstore.d.256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.d.256(ptr, <8 x i32>, <8 x i32>) nounwind + + +define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} + +define <4 x i32> @test_x86_avx2_psllv_d_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES2]] +; + %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) + %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) + %res2 = add <4 x i32> %res0, %res1 + ret <4 x i32> %res2 +} +declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} + +define <8 x i32> @test_x86_avx2_psllv_d_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_d_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) + %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) + %res2 = add <8 x i32> %res0, %res1 + ret <8 x i32> %res2 +} +declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +define <2 x i64> @test_x86_avx2_psllv_q_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> zeroinitializer, <2 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> , <2 x i64> ) + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} + +define <4 x i64> @test_x86_avx2_psllv_q_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psllv_q_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> , <4 x i64> ) + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} + +define <4 x i32> @test_x86_avx2_psrlv_d_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES2]] +; + %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) + %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) + %res2 = add <4 x i32> %res0, %res1 + ret <4 x i32> %res2 +} +declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} + +define <8 x i32> @test_x86_avx2_psrlv_d_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_d_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) + %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) + %res2 = add <8 x i32> %res0, %res1 + ret <8 x i32> %res2 +} +declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} + +define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) +; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]]) +; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} + + +define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrlv_q_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) +; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone + + +define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} + +define <4 x i32> @test_x86_avx2_psrav_d_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone + +define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} + +define <8 x i32> @test_x86_avx2_psrav_d_256_const() #0 { +; CHECK-LABEL: @test_x86_avx2_psrav_d_256_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> zeroinitializer, <8 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) +; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone + +define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i32> %idx, <2 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, + ptr %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr, + <4 x i32>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4 x i32> %idx, <4 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, + ptr %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr, + <4 x i32>, <4 x double>, i8) nounwind readonly + +define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i64> %idx, <2 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, + ptr %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr, + <2 x i64>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4 x i64> %idx, <4 x double> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_pd_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[RES]] +; + %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, + ptr %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr, + <4 x i64>, <4 x double>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32> %idx, <4 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, + ptr %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr, + <4 x i32>, <4 x float>, i8) nounwind readonly + +define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x i32> %idx, <8 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, + ptr %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr, + <8 x i32>, <8 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64> %idx, <4 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, + ptr %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr, + <2 x i64>, <4 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x i64> %idx, <4 x float> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, + ptr %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr, + <4 x i64>, <4 x float>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %idx, <2 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, + ptr %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr, + <4 x i32>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32> %idx, <4 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, + ptr %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr, + <4 x i32>, <4 x i64>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, + ptr %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr, + <2 x i64>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64> %idx, <4 x i64> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_q_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[RES]] +; + %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, + ptr %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr, + <4 x i64>, <4 x i64>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %idx, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, + ptr %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr, + <4 x i32>, <4 x i32>, i8) nounwind readonly + +define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32> %idx, <8 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_d_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> [[A0:%.*]], ptr [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, + ptr %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr, + <8 x i32>, <8 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %idx, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, + ptr %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr, + <2 x i64>, <4 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64> %idx, <4 x i32> %mask) #0 { +; CHECK-LABEL: @test_x86_avx2_gather_q_d_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, + ptr %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr, + <4 x i64>, <4 x i32>, i8) nounwind readonly + +define <8 x float> @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, <8 x float> %mask, ptr nocapture %out) #0 { +; CHECK-LABEL: @test_gather_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP4]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 4) +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[OUT:%.*]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080 +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[TMP15]], align 4 +; CHECK-NEXT: store <8 x float> [[MASK]], ptr [[OUT]], align 4 +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES]] +; + %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, + ptr %a, <8 x i32> %idx, <8 x float> %mask, i8 4) ; + + store <8 x float> %mask, ptr %out, align 4 + + ret <8 x float> %res +} + +define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i1> %mask) #0 { +; CHECK-LABEL: @test_mask_demanded_bits( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[MASK1:%.*]] = sext <2 x i1> [[MASK:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSPROP]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK1]], i8 2) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %mask1 = sext <2 x i1> %mask to <2 x i64> + %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, + ptr %a1, <2 x i64> %idx, <2 x i64> %mask1, i8 2) ; + ret <2 x i64> %res +} + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll new file mode 100644 index 0000000000000..1d2e38eb5e63d --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -0,0 +1,3626 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test1( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test88( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test87( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test86( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test85( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test84( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test83( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test82( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test81( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test80( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test79( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test78( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test77( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test76( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]]) +; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test75( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <2 x i1> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]]) +; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test74( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]]) +; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP22]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) nounwind readnone + +define i64 @test73(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test73( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) nounwind readnone + +define i64 @test72(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test72( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +define i64 @test72_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test72_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[TMP1]], i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone + +define i64 @test71(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test71( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> [[TMP2]], i32 3) +; CHECK-NEXT: [[TMP6:%.*]] = or <1 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP4]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to i64 + ret i64 %2 +} + +declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) nounwind readnone + +define i64 @test70(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test70( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +define i64 @test70_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test70_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[TMP1]], i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) nounwind readnone + +define i64 @test69(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test69( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone + +define i64 @test68(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test68( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> [[TMP2]], i32 3) +; CHECK-NEXT: [[TMP6:%.*]] = or <1 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP4]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to i64 + ret i64 %2 +} + +declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) nounwind readnone + +define i64 @test67(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test67( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <2 x i32> + %3 = bitcast <2 x i32> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) nounwind readnone + +define i64 @test66(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test66( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 3) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +define i64 @test66_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test66_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[TMP1]], i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 0) nounwind + %2 = bitcast <1 x i64> %1 to <4 x i16> + %3 = bitcast <4 x i16> %2 to <1 x i64> + %4 = extractelement <1 x i64> %3, i32 0 + ret i64 %4 +} + +declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test65( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test64( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test63( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> [[TMP3]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP16:%.*]] = or <1 x i64> [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP16]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP12]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test62( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test61( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test60( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP13]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP10]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> [[TMP3]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP16:%.*]] = or <1 x i64> [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP16]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP12]] to i64 +; CHECK-NEXT: store i64 [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test59( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test58( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]]) +; CHECK-NEXT: [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP17]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1.i = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test56( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test55( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test54( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test53( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test52( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +define i64 @test51(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test51( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test50( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test49( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP13:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP18]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test48( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test47( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test46( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test45( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +define i64 @test44(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test44( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <1 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]]) +; CHECK-NEXT: [[_MSPROP2:%.*]] = bitcast <1 x i64> [[_MSPROP3]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP6]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var = bitcast i64 %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) + %3 = bitcast <1 x i64> %2 to i64 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone + +declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test43( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test42( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test41( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test40( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test39( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test38( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test37( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test36( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <1 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]]) +; CHECK-NEXT: [[_MSPROP2:%.*]] = bitcast <1 x i64> [[_MSPROP3]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP6]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var = bitcast i64 %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1) + %3 = bitcast <1 x i64> %2 to i64 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test35( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test34( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test33( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test32( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP16]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 48 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64 [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <1 x i64> [[TMP17]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP14]] to i64 +; CHECK-NEXT: store i64 [[TMP15]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test31( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test30( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test29( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test28( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test27( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test26( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare void @llvm.x86.mmx.movnt.dq(ptr, <1 x i64>) nounwind + +define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 { +; CHECK-LABEL: define void @test25( +; CHECK-SAME: ptr [[P:%.*]], <1 x i64> [[A:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store <1 x i64> [[TMP3]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: tail call void @llvm.x86.mmx.movnt.dq(ptr [[P]], <1 x i64> [[MMX_VAR_I]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var.i = bitcast i64 %0 to <1 x i64> + tail call void @llvm.x86.mmx.movnt.dq(ptr %p, <1 x i64> %mmx_var.i) nounwind + ret void +} + +declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) nounwind readnone + +define i32 @test24(<1 x i64> %a) #0 { +; CHECK-LABEL: define i32 @test24( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> [[MMX_VAR_I]]) #[[ATTR2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %0 = bitcast <1 x i64> %a to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %0 to <1 x i64> + %1 = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) nounwind + ret i32 %1 +} + +declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind + +define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 { +; CHECK-LABEL: define void @test23( +; CHECK-SAME: <1 x i64> [[D:%.*]], <1 x i64> [[N:%.*]], ptr [[P:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[N]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[D]] to <8 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP9]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: tail call void @llvm.x86.mmx.maskmovq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]], ptr [[P]]) #[[ATTR2]] +; CHECK-NEXT: ret void +; +entry: + %0 = bitcast <1 x i64> %n to <8 x i8> + %1 = bitcast <1 x i64> %d to <8 x i8> + %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64> + %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64> + tail call void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) nounwind + ret void +} + +declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test22( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP13]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64> + %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone + +define i64 @test21(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test21( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP5]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +define i32 @test21_2(<1 x i64> %a) #0 { +; CHECK-LABEL: define i32 @test21_2( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP5]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <2 x i32> + %5 = extractelement <2 x i32> %4, i32 0 + ret i32 %5 +} + +declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test20( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP5]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[_MSPROP:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64> + %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind + %3 = bitcast <1 x i64> %2 to i64 + ret i64 %3 +} + +declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone + +define <2 x double> @test19(<1 x i64> %a) #0 { +; CHECK-LABEL: define <2 x double> @test19( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP4]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP7]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> [[TMP8]]) #[[ATTR5]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[TMP2]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %1 = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %1) nounwind readnone + ret <2 x double> %2 +} + +declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone + +define i64 @test18(<2 x double> %a) #0 { +; CHECK-LABEL: define i64 @test18( +; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> [[A]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast <1 x i64> %0 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = extractelement <1 x i64> %2, i32 0 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone + +define i64 @test17(<2 x double> %a) #0 { +; CHECK-LABEL: define i64 @test17( +; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> [[A]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone + %1 = bitcast <1 x i64> %0 to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = extractelement <1 x i64> %2, i32 0 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone + +define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test16( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64> +; CHECK-NEXT: [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP4]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP5]] to i64 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]], i8 16) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP10]] to i64 +; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP3]] +; +entry: + %0 = extractelement <1 x i64> %a, i32 0 + %mmx_var = bitcast i64 %0 to <1 x i64> + %1 = extractelement <1 x i64> %b, i32 0 + %mmx_var1 = bitcast i64 %1 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16) + %3 = bitcast <1 x i64> %2 to i64 + ret i64 %3 +} + +declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone + +define i64 @test15(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test15( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; +entry: + %0 = bitcast <1 x i64> %a to <2 x i32> + %1 = bitcast <2 x i32> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <2 x i32> + %4 = bitcast <2 x i32> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone + +define i64 @test14(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test14( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; +entry: + %0 = bitcast <1 x i64> %a to <4 x i16> + %1 = bitcast <4 x i16> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone + +define i64 @test13(<1 x i64> %a) #0 { +; CHECK-LABEL: define i64 @test13( +; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP10]] +; +entry: + %0 = bitcast <1 x i64> %a to <8 x i8> + %1 = bitcast <8 x i8> %0 to <1 x i64> + %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %1) nounwind readnone + %3 = bitcast <1 x i64> %2 to <8 x i8> + %4 = bitcast <8 x i8> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test12( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> + %6 = bitcast <2 x i32> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test11( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test10( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> + %6 = bitcast <8 x i8> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test9( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> + %6 = bitcast <8 x i8> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test8( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test7( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP20]] +; +entry: + %0 = bitcast <1 x i64> %b to <8 x i8> + %1 = bitcast <1 x i64> %a to <8 x i8> + %2 = bitcast <8 x i8> %1 to <1 x i64> + %3 = bitcast <8 x i8> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <8 x i8> + %6 = bitcast <8 x i8> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test6( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test5( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> + %6 = bitcast <2 x i32> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test4( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test3( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %2 = bitcast <4 x i16> %1 to <1 x i64> + %3 = bitcast <4 x i16> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <4 x i16> + %6 = bitcast <4 x i16> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone + +define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: define i64 @test2( +; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[TMP15]] +; +entry: + %0 = bitcast <1 x i64> %b to <2 x i32> + %1 = bitcast <1 x i64> %a to <2 x i32> + %2 = bitcast <2 x i32> %1 to <1 x i64> + %3 = bitcast <2 x i32> %0 to <1 x i64> + %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone + %5 = bitcast <1 x i64> %4 to <2 x i32> + %6 = bitcast <2 x i32> %5 to <1 x i64> + %7 = extractelement <1 x i64> %6, i32 0 + ret i64 %7 +} + +define <4 x float> @test89(<4 x float> %a, <1 x i64> %b) nounwind #0 { +; ALL-LABEL: test89: +; ALL: # %bb.0: +; ALL-NEXT: cvtpi2ps %mm0, %xmm0 +; ALL-NEXT: ret{{[l|q]}} +; CHECK-LABEL: define <4 x float> @test89( +; CHECK-SAME: <4 x float> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP4]] to i64 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[C:%.*]] = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> [[A]], <1 x i64> [[B]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[C]] +; + %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b) + ret <4 x float> %c +} + +declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone + +define void @test90() #0 { +; ALL-LABEL: test90: +; ALL: # %bb.0: +; ALL-NEXT: emms +; ALL-NEXT: ret{{[l|q]}} +; CHECK-LABEL: define void @test90( +; CHECK-SAME: ) #[[ATTR1]] { +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @llvm.x86.mmx.emms() +; CHECK-NEXT: ret void +; + call void @llvm.x86.mmx.emms() + ret void +} + +declare void @llvm.x86.mmx.emms() + +define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind #0 { +; CHECK-LABEL: define <1 x i64> @test_mm_insert_pi16( +; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]], i32 [[D:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> [[A_COERCE]], i32 [[D]], i32 2) +; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <1 x i64> [[TMP9]] +; +entry: + %1 = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2) + ret <1 x i64> %1 +} + +declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32 immarg) + +define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 { +; CHECK-LABEL: define i32 @test_mm_extract_pi16( +; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to i64 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> [[A_COERCE]], i32 2) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %1 = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2) + ret i32 %1 +} + +declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32 immarg) + +attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/msan_x86_bts_asm.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/msan_x86_bts_asm.ll new file mode 100644 index 0000000000000..dd2fecb081be2 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/msan_x86_bts_asm.ll @@ -0,0 +1,89 @@ +; Test for the conservative assembly handling mode used by KMSAN. +; RUN: opt < %s -msan-kernel=1 -msan-check-access-address=0 \ +; RUN: -msan-handle-asm-conservative=0 -S -passes=msan 2>&1 | FileCheck \ +; RUN: "-check-prefix=CHECK" %s +; RUN: opt < %s -msan-kernel=1 -msan-check-access-address=0 \ +; RUN: -msan-handle-asm-conservative=1 -S -passes=msan 2>&1 | FileCheck \ +; RUN: "-check-prefixes=CHECK,CHECK-CONS" %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The IR below was generated from the following source: +; int main() { +; bool bit; +; unsigned long value = 2; +; long nr = 0; +; unsigned long *addr = &value; +; asm("btsq %2, %1; setc %0" : "=qm" (bit), "=m" (addr): "Ir" (nr)); +; if (bit) +; return 0; +; else +; return 1; +; } +; +; In the regular instrumentation mode MSan is unable to understand that |bit| +; is initialized by the asm() call, and therefore reports a false positive on +; the if-statement. +; The conservative assembly handling mode initializes every memory location +; passed by pointer into an asm() call. This prevents false positive reports, +; but may introduce false negatives. +; +; This test makes sure that the conservative mode unpoisons the shadow of |bit| +; by writing 0 to it. + +define dso_local i32 @main() sanitize_memory { +entry: + %retval = alloca i32, align 4 + %bit = alloca i8, align 1 + %value = alloca i64, align 8 + %nr = alloca i64, align 8 + %addr = alloca ptr, align 8 + store i32 0, ptr %retval, align 4 + store i64 2, ptr %value, align 8 + store i64 0, ptr %nr, align 8 + store ptr %value, ptr %addr, align 8 + %0 = load i64, ptr %nr, align 8 + call void asm "btsq $2, $1; setc $0", "=*qm,=*m,Ir,~{dirflag},~{fpsr},~{flags}"(ptr elementtype(i8) %bit, ptr elementtype(ptr) %addr, i64 %0) + %1 = load i8, ptr %bit, align 1 + %tobool = trunc i8 %1 to i1 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + ret i32 0 + +if.else: ; preds = %entry + ret i32 1 +} + +; %nr is first poisoned, then unpoisoned (written to). Need to optimize this in the future. +; CHECK: call void @__msan_poison_alloca(ptr %nr{{.*}}) +; CHECK: call { ptr, ptr } @__msan_metadata_ptr_for_store_8(ptr %nr) + +; Hooks for inputs usually go before the assembly statement. But here we have none, +; because %nr is passed by value. However we check %nr for being initialized. +; CHECK-CONS: call { ptr, ptr } @__msan_metadata_ptr_for_load_8(ptr %nr) + +; In the conservative mode, call the store hooks for %bit and %addr: +; CHECK-CONS: call void @__msan_instrument_asm_store(ptr %bit, i64 1) +; CHECK-CONS: call void @__msan_instrument_asm_store(ptr %addr, i64 8) + +; Landing pad for the %nr check above. +; CHECK-CONS: call void @__msan_warning + +; CHECK: call void asm "btsq $2, $1; setc $0" + +; CHECK: [[META:%.*]] = call {{.*}} @__msan_metadata_ptr_for_load_1(ptr %bit) +; CHECK: [[SHADOW:%.*]] = extractvalue { ptr, ptr } [[META]], 0 + +; Now load the shadow value for the boolean. +; CHECK: [[MSLD:%.*]] = load {{.*}} [[SHADOW]] +; CHECK: [[MSPROP:%.*]] = trunc i8 [[MSLD]] to i1 + +; Is the shadow poisoned? +; CHECK: br i1 [[MSPROP]], label %[[IFTRUE:.*]], label {{.*}} + +; If yes, raise a warning. +; CHECK: [[IFTRUE]]: +; CHECK: call void @__msan_warning + diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/msan_x86intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/msan_x86intrinsics.ll new file mode 100644 index 0000000000000..a83a94a06b98f --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/msan_x86intrinsics.ll @@ -0,0 +1,72 @@ +; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck \ +; RUN: %s +; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S \ +; RUN: -passes=msan 2>&1 | FileCheck -check-prefix=CHECK \ +; RUN: -check-prefix=CHECK-ORIGINS %s +; REQUIRES: x86-registered-target + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Store intrinsic. + +define void @StoreIntrinsic(ptr %p, <4 x float> %x) nounwind uwtable sanitize_memory { + call void @llvm.x86.sse.storeu.ps(ptr %p, <4 x float> %x) + ret void +} + +declare void @llvm.x86.sse.storeu.ps(ptr, <4 x float>) nounwind + +; CHECK-LABEL: @StoreIntrinsic +; CHECK-NOT: br +; CHECK-NOT: = or +; CHECK: store <4 x i32> {{.*}} align 1 +; CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 1{{$}} +; CHECK: ret void + + +; Load intrinsic. + +define <16 x i8> @LoadIntrinsic(ptr %p) nounwind uwtable sanitize_memory { + %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(ptr %p) + ret <16 x i8> %call +} + +declare <16 x i8> @llvm.x86.sse3.ldu.dq(ptr %p) nounwind + +; CHECK-LABEL: @LoadIntrinsic +; CHECK: load <16 x i8>, ptr {{.*}} align 1 +; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32, ptr {{.*}} +; CHECK-NOT: br +; CHECK-NOT: = or +; CHECK: call <16 x i8> @llvm.x86.sse3.ldu.dq +; CHECK: store <16 x i8> {{.*}} @__msan_retval_tls +; CHECK-ORIGINS: store i32 {{.*}}[[ORIGIN]], ptr @__msan_retval_origin_tls +; CHECK: ret <16 x i8> + + +; Simple NoMem intrinsic +; Check that shadow is OR'ed, and origin is Select'ed +; And no shadow checks! + +define <8 x i16> @Pmulhuw128(<8 x i16> %a, <8 x i16> %b) nounwind uwtable sanitize_memory { + %call = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a, <8 x i16> %b) + ret <8 x i16> %call +} + +declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a, <8 x i16> %b) nounwind + +; CHECK-LABEL: @Pmulhuw128 +; CHECK-NEXT: load <8 x i16>, ptr @__msan_param_tls +; CHECK-ORIGINS: load i32, ptr @__msan_param_origin_tls +; CHECK-NEXT: load <8 x i16>, ptr {{.*}} @__msan_param_tls +; CHECK-ORIGINS: load i32, ptr {{.*}} @__msan_param_origin_tls +; CHECK-NEXT: call void @llvm.donothing +; CHECK-NEXT: = or <8 x i16> +; CHECK-ORIGINS: = bitcast <8 x i16> {{.*}} to i128 +; CHECK-ORIGINS-NEXT: = icmp ne i128 {{.*}}, 0 +; CHECK-ORIGINS-NEXT: = select i1 {{.*}}, i32 {{.*}}, i32 +; CHECK-NEXT: call <8 x i16> @llvm.x86.sse2.pmulhu.w +; CHECK-NEXT: store <8 x i16> {{.*}} @__msan_retval_tls +; CHECK-ORIGINS: store i32 {{.*}} @__msan_retval_origin_tls +; CHECK-NEXT: ret <8 x i16> diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse-intrinsics-x86.ll new file mode 100644 index 0000000000000..9d7763a6ef589 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse-intrinsics-x86.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_cmp_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_cmp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone + + +define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comieq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comige_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comige.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comigt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comile_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comile.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comilt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_comineq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_cvtss2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_cvttss2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone + + +define void @test_x86_sse_ldmxcsr(ptr %a0) #0 { +; CHECK-LABEL: @test_x86_sse_ldmxcsr( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A0:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_LDMXCSR:%.*]] = load i32, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_LDMXCSR]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.sse.ldmxcsr(ptr [[A0]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.sse.ldmxcsr(ptr %a0) + ret void +} +declare void @llvm.x86.sse.ldmxcsr(ptr) nounwind + + + +define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_max_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_max_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_min_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_min_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_movmsk_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone + + + +define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rcp_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rcp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rsqrt_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone + + +define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse_rsqrt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone + + +define void @test_x86_sse_stmxcsr(ptr %a0) #0 { +; CHECK-LABEL: @test_x86_sse_stmxcsr( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A0:%.*]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: store i32 0, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.sse.stmxcsr(ptr [[A0]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.sse.stmxcsr(ptr %a0) + ret void +} +declare void @llvm.x86.sse.stmxcsr(ptr) nounwind + + +define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomieq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomige_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomigt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomile_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomilt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone + + +define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse_ucomineq_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @sfence() nounwind { +; CHECK-LABEL: @sfence( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse.sfence() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse.sfence() + ret void +} +declare void @llvm.x86.sse.sfence() nounwind + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll new file mode 100644 index 0000000000000..e9323f6dd3308 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-x86.ll @@ -0,0 +1,1381 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_cmp_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i1> [[TMP4]] to <2 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_cmp_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comieq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comige_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comigt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comile_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comilt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_comineq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone + + +define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvtpd_epi32_zext( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvtpd_epi32_zext_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %a0 = load <2 x double>, ptr %p0 + %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone + +define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) + %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> + ret <4 x float> %res +} + +define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { +; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a0 = load <2 x double>, ptr %p0 + %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) + %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> + ret <4 x float> %res +} + +define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtps2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1 = load <2 x double>, ptr %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + +define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1) optsize #0 { +; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load_optsize( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1 = load <2 x double>, ptr %p1 + %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} + + +define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvttpd2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone + + +define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvttpd_epi32_zext( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { +; CHECK-LABEL: @test_mm_cvttpd_epi32_zext_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]]) +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[BC]] +; + %a0 = load <2 x double>, ptr %p0 + %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) + %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> + %bc = bitcast <4 x i32> %res to <2 x i64> + ret <2 x i64> %bc +} + + +define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvttps2dq( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]]) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone + + +define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_cvttsd2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_max_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_max_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_min_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone + + +define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_min_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_movmsk_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_packssdw_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_packssdw_128_fold() #0 { +; CHECK-LABEL: @test_x86_sse2_packssdw_128_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> ) + ret <8 x i16> %res +} + + +define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_packsswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packsswb_128_fold() #0 { +; CHECK-LABEL: @test_x86_sse2_packsswb_128_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> , <8 x i16> zeroinitializer) + ret <16 x i8> %res +} + + +define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_packuswb_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone + + +define <16 x i8> @test_x86_sse2_packuswb_128_fold() #0 { +; CHECK-LABEL: @test_x86_sse2_packuswb_128_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> , <8 x i16> zeroinitializer) + ret <16 x i8> %res +} + + +define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pavg_b( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pavg_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pmadd_wd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone + + +define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pmovmskb_128( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]]) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pmulh_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_pmulhu_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psad_bw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) +; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psll_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psll_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psll_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pslli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pslli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_pslli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psra_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psra_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrai_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrai_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone + + +define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) #0 { +; CHECK-LABEL: @test_x86_sse2_psrl_w_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 +; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i128 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i128 [[TMP11]] to <8 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP2]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) +; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %a1 = load <8 x i16>, ptr %p + %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} + + +define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone + + +define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x i64> [[RES]] +; + %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse2_psrli_w( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone + + +define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomieq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomige_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomigt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomile_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomilt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone + + +define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse2_ucomineq_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone + +define void @test_x86_sse2_pause() #0 { +; CHECK-LABEL: @test_x86_sse2_pause( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse2.pause() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.pause() + ret void +} +declare void @llvm.x86.sse2.pause() nounwind + +define void @lfence() nounwind #0 { +; CHECK-LABEL: @lfence( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse2.lfence() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.lfence() + ret void +} +declare void @llvm.x86.sse2.lfence() nounwind + +define void @mfence() nounwind #0 { +; CHECK-LABEL: @mfence( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: tail call void @llvm.x86.sse2.mfence() +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.mfence() + ret void +} +declare void @llvm.x86.sse2.mfence() nounwind + +define void @clflush(ptr %p) nounwind #0 { +; CHECK-LABEL: @clflush( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(ptr [[P:%.*]]) +; CHECK-NEXT: ret void +; + tail call void @llvm.x86.sse2.clflush(ptr %p) + ret void +} +declare void @llvm.x86.sse2.clflush(ptr) nounwind + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll new file mode 100644 index 0000000000000..a71455821bd6b --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse41-intrinsics-x86.ll @@ -0,0 +1,431 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { +; CHECK-LABEL: @test_x86_sse41_blendvpd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i64> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <2 x i1> [[TMP8]], <2 x i64> [[TMP14]], <2 x i64> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]]) +; CHECK-NEXT: store <2 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone + + +define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK-LABEL: @test_x86_sse41_blendvps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i32> [[TMP12]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP14]], <4 x i32> [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone + + +define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_dppd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> , <2 x i64> [[TMP3]], <2 x i64> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP4]]) +; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <2 x i1> zeroinitializer, <2 x i1> +; CHECK-NEXT: [[_MSDPP1:%.*]] = sext <2 x i1> [[TMP6]] to <2 x i64> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 -18) +; CHECK-NEXT: store <2 x i64> [[_MSDPP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 -18) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_dpps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> , <4 x i32> [[TMP3]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <4 x i1> zeroinitializer, <4 x i1> +; CHECK-NEXT: [[_MSDPP1:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 -18) +; CHECK-NEXT: store <4 x i32> [[_MSDPP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 -18) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone + + +define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_insertps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 17) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone + + + +define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_mpsadbw( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone + +define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_mpsadbw_load_op0( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0]], <16 x i8> [[A1:%.*]], i8 7) +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %a0 = load <16 x i8>, ptr %ptr + %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} + +define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_packusdw( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]]) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_packusdw_fold() #0 { +; CHECK-LABEL: @test_x86_sse41_packusdw_fold( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer) +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> ) +; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> ) + ret <8 x i16> %res +} + + +define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) #0 { +; CHECK-LABEL: @test_x86_sse41_pblendvb( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], +; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i8> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0]], <16 x i8> [[A1]], <16 x i8> [[A2]]) +; CHECK-NEXT: store <16 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_sse41_phminposuw( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone + + +define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_ptestc( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone + + +define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_ptestnzc( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone + + +define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_ptestz( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 +; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestz(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] + ret i32 %res +} +declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_sse41_round_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone + + +define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_sse41_round_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_round_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone + + +define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, ptr %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_round_sd_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1B:%.*]] = load <2 x double>, ptr [[A1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[_MSLD]], <2 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1B]], i32 7) +; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %a1b = load <2 x double>, ptr %a1 + %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1] + ret <2 x double> %res +} + + +define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, ptr %a1) #0 { +; CHECK-LABEL: @test_x86_sse41_round_ss_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[A1B:%.*]] = load <4 x float>, ptr [[A1:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[_MSLD]], <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1B]], i32 7) +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1b = load <4 x float>, ptr %a1 + %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1b, i32 7) ; <<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/vararg-too-large.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/vararg-too-large.ll new file mode 100644 index 0000000000000..adb3e208d8553 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/vararg-too-large.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -msan-check-access-address=0 -S 2>&1 -passes=msan | FileCheck \ +; RUN: %s + +; Test that MSan doesn't generate code overflowing __msan_va_arg_tls when too many arguments are +; passed to a variadic function. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local i64 @many_args() { +entry: + %ret = call i64 (i64, ...) @sum(i64 120, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, + i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1 + ) + ret i64 %ret +} + +; If the size of __msan_va_arg_tls changes the second argument of `add` must also be changed. +; CHECK-LABEL: @many_args +; CHECK: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 792) +; CHECK-NOT: i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 800) +declare i64 @sum(i64 %n, ...) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/vararg.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/vararg.ll similarity index 100% rename from llvm/test/Instrumentation/MemorySanitizer/X86/vararg.ll rename to llvm/test/Instrumentation/MemorySanitizer/i386/vararg.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/vararg_call.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/vararg_call.ll new file mode 100644 index 0000000000000..32d43e11fbd9b --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/vararg_call.ll @@ -0,0 +1,117 @@ +; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck \ +; RUN: %s +; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S \ +; RUN: -passes=msan 2>&1 | FileCheck %s "--check-prefixes=CHECK,CHECK-ORIGIN" +; RUN: opt < %s -msan-check-access-address=0 -S \ +; RUN: -passes="msan" 2>&1 | FileCheck %s "--check-prefixes=CHECK,CHECK-ORIGIN" +; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=2 -S \ +; RUN: -passes=msan 2>&1 | FileCheck %s "--check-prefixes=CHECK,CHECK-ORIGIN" + +; Test that shadow and origin are stored for variadic function params. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.__va_list_tag = type { i32, i32, ptr, ptr } + +define dso_local i32 @test(i32 %a, i32 %b, i32 %c) local_unnamed_addr { +entry: + %call = tail call i32 (i32, ...) @sum(i32 3, i32 %a, i32 %b, i32 %c) + ret i32 %call +} + +; CHECK: store i32 0, {{.*}} @__msan_param_tls {{.*}} i64 8 +; CHECK: store i32 0, {{.*}} @__msan_param_tls {{.*}} i64 16 +; CHECK: store i32 0, {{.*}} @__msan_param_tls {{.*}} i64 24 +; CHECK: store i32 0, {{.*}} @__msan_va_arg_tls {{.*}} i64 8 +; CHECK-ORIGIN: store i32 0, {{.*}} @__msan_va_arg_origin_tls {{.*}} i64 8 +; CHECK: store i32 0, {{.*}} @__msan_va_arg_tls {{.*}} i64 16 +; CHECK-ORIGIN: store i32 0, {{.*}} @__msan_va_arg_origin_tls {{.*}} i64 16 +; CHECK: store i32 0, {{.*}} @__msan_va_arg_tls {{.*}} i64 24 +; CHECK-ORIGIN: store i32 0, {{.*}} @__msan_va_arg_origin_tls {{.*}} i64 24 + +define dso_local i32 @sum(i32 %n, ...) local_unnamed_addr #0 { +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #2 + call void @llvm.va_start(ptr nonnull %args) + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body.lr.ph, label %for.end + +; CHECK: call void @llvm.memcpy.{{.*}} [[SHADOW_COPY:%[_0-9a-z]+]], {{.*}} @__msan_va_arg_tls +; CHECK-ORIGIN: call void @llvm.memcpy{{.*}} [[ORIGIN_COPY:%[_0-9a-z]+]], {{.*}} @__msan_va_arg_origin_tls + +; CHECK: call void @llvm.va_start +; CHECK: call void @llvm.memcpy.{{.*}}, {{.*}} [[SHADOW_COPY]], i{{.*}} [[REGSAVE:[0-9]+]] +; CHECK-ORIGIN: call void @llvm.memcpy.{{.*}}, {{.*}} [[ORIGIN_COPY]], i{{.*}} [[REGSAVE]] + +; CHECK: [[OVERFLOW_SHADOW:%[_0-9a-z]+]] = getelementptr i8, ptr [[SHADOW_COPY]], i{{.*}} [[REGSAVE]] +; CHECK: call void @llvm.memcpy.{{.*}}[[OVERFLOW_SHADOW]] +; CHECK-ORIGIN: [[OVERFLOW_ORIGIN:%[_0-9a-z]+]] = getelementptr i8, ptr [[ORIGIN_COPY]], i{{.*}} [[REGSAVE]] +; CHECK-ORIGIN: call void @llvm.memcpy.{{.*}}[[OVERFLOW_ORIGIN]] + +for.body.lr.ph: ; preds = %entry + %0 = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %args, i64 0, i64 0, i32 3 + %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %args, i64 0, i64 0, i32 2 + %gp_offset.pre = load i32, ptr %args, align 16 + br label %for.body + +for.body: ; preds = %vaarg.end, %for.body.lr.ph + %gp_offset = phi i32 [ %gp_offset.pre, %for.body.lr.ph ], [ %gp_offset12, %vaarg.end ] + %sum.011 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %vaarg.end ] + %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %vaarg.end ] + %fits_in_gp = icmp ult i32 %gp_offset, 41 + br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem + +vaarg.in_reg: ; preds = %for.body + %reg_save_area = load ptr, ptr %0, align 16 + %1 = sext i32 %gp_offset to i64 + %2 = getelementptr i8, ptr %reg_save_area, i64 %1 + %3 = add i32 %gp_offset, 8 + store i32 %3, ptr %args, align 16 + br label %vaarg.end + +vaarg.in_mem: ; preds = %for.body + %overflow_arg_area = load ptr, ptr %overflow_arg_area_p, align 8 + %overflow_arg_area.next = getelementptr i8, ptr %overflow_arg_area, i64 8 + store ptr %overflow_arg_area.next, ptr %overflow_arg_area_p, align 8 + br label %vaarg.end + +vaarg.end: ; preds = %vaarg.in_mem, %vaarg.in_reg + %gp_offset12 = phi i32 [ %3, %vaarg.in_reg ], [ %gp_offset, %vaarg.in_mem ] + %vaarg.addr.in = phi ptr [ %2, %vaarg.in_reg ], [ %overflow_arg_area, %vaarg.in_mem ] + %4 = load i32, ptr %vaarg.addr.in, align 4 + %add = add nsw i32 %4, %sum.011 + %inc = add nuw nsw i32 %i.010, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %vaarg.end, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %vaarg.end ] + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #2 + ret i32 %sum.0.lcssa +} + + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #1 + +; Function Attrs: nounwind +declare void @llvm.va_start(ptr) #2 + +; Function Attrs: nounwind +declare void @llvm.va_end(ptr) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #1 + +declare dso_local i80 @sum_i80(i32, ...) local_unnamed_addr + +; Unaligned types like i80 should also work. +define dso_local i80 @test_i80(i80 %a, i80 %b, i80 %c) local_unnamed_addr { +entry: + %call = tail call i80 (i32, ...) @sum_i80(i32 3, i80 %a, i80 %b, i80 %c) + ret i80 %call +} + diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/vararg_shadow.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/vararg_shadow.ll new file mode 100644 index 0000000000000..205101564dfe0 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/vararg_shadow.ll @@ -0,0 +1,1315 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.Double4 = type { [4 x double] } +%struct.LongDouble2 = type { [2 x x86_fp80] } +%struct.LongDouble4 = type { [4 x x86_fp80] } +%struct.IntInt = type { i32, i32 } +%struct.Int64Int64 = type { i64, i64 } +%struct.DoubleDouble = type { double, double } +%struct.DoubleFloat = type { double, float } +%struct.__va_list_tag = type { i32, i32, ptr, ptr } + +define linkonce_odr dso_local void @_Z4testIcEvT_(i8 noundef signext %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testIcEvT_( +; CHECK-SAME: i8 noundef signext [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP3]], i8 -1, i64 1, i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store i8 [[TMP0]], ptr [[TMP6]], align 1 +; CHECK-NEXT: store i8 [[ARG]], ptr [[ARG_ADDR]], align 1 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG_ADDR]]) +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARG_ADDR]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[_MSPROP:%.*]] = sext i8 [[_MSLD]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +; CHECK-NEXT: store i8 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i32 [[_MSPROP]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i32 [[_MSPROP]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (i8, i32, ...) @_Z5test2IcEvT_iz(i8 noundef signext [[TMP7]], i32 noundef 1, i32 noundef [[CONV]]) +; CHECK-NEXT: ret void +; +entry: + %arg.addr = alloca i8, align 1 + store i8 %arg, ptr %arg.addr, align 1 + call void @_Z3usePv(ptr noundef nonnull %arg.addr) + %0 = load i8, ptr %arg.addr, align 1 + %conv = sext i8 %0 to i32 + call void (i8, i32, ...) @_Z5test2IcEvT_iz(i8 noundef signext %0, i32 noundef 1, i32 noundef %conv) + ret void +} + +define linkonce_odr dso_local void @_Z4testIiEvT_(i32 noundef %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testIiEvT_( +; CHECK-SAME: i32 noundef [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP3]], i8 -1, i64 4, i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4 +; CHECK-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG_ADDR]]) +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i32 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i32 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (i32, i32, ...) @_Z5test2IiEvT_iz(i32 noundef [[TMP7]], i32 noundef 1, i32 noundef [[TMP7]]) +; CHECK-NEXT: ret void +; +entry: + %arg.addr = alloca i32, align 4 + store i32 %arg, ptr %arg.addr, align 4 + call void @_Z3usePv(ptr noundef nonnull %arg.addr) + %0 = load i32, ptr %arg.addr, align 4 + call void (i32, i32, ...) @_Z5test2IiEvT_iz(i32 noundef %0, i32 noundef 1, i32 noundef %0) + ret void +} + +define linkonce_odr dso_local void @_Z4testIfEvT_(float noundef %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testIfEvT_( +; CHECK-SAME: float noundef [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP3]], i8 -1, i64 4, i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store i32 [[TMP0]], ptr [[TMP6]], align 4 +; CHECK-NEXT: store float [[ARG]], ptr [[ARG_ADDR]], align 4 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG_ADDR]]) +; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARG_ADDR]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[_MSLD]] to i64 +; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP7]] to double +; CHECK-NEXT: store i32 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 [[TMP11]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 [[TMP11]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (float, i32, ...) @_Z5test2IfEvT_iz(float noundef [[TMP7]], i32 noundef 1, double noundef [[CONV]]) +; CHECK-NEXT: ret void +; +entry: + %arg.addr = alloca float, align 4 + store float %arg, ptr %arg.addr, align 4 + call void @_Z3usePv(ptr noundef nonnull %arg.addr) + %0 = load float, ptr %arg.addr, align 4 + %conv = fpext float %0 to double + call void (float, i32, ...) @_Z5test2IfEvT_iz(float noundef %0, i32 noundef 1, double noundef %conv) + ret void +} + +define linkonce_odr dso_local void @_Z4testIdEvT_(double noundef %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testIdEvT_( +; CHECK-SAME: double noundef [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 -1, i64 8, i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store i64 [[TMP0]], ptr [[TMP6]], align 8 +; CHECK-NEXT: store double [[ARG]], ptr [[ARG_ADDR]], align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG_ADDR]]) +; CHECK-NEXT: [[TMP7:%.*]] = load double, ptr [[ARG_ADDR]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP10]], align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (double, i32, ...) @_Z5test2IdEvT_iz(double noundef [[TMP7]], i32 noundef 1, double noundef [[TMP7]]) +; CHECK-NEXT: ret void +; +entry: + %arg.addr = alloca double, align 8 + store double %arg, ptr %arg.addr, align 8 + call void @_Z3usePv(ptr noundef nonnull %arg.addr) + %0 = load double, ptr %arg.addr, align 8 + call void (double, i32, ...) @_Z5test2IdEvT_iz(double noundef %0, i32 noundef 1, double noundef %0) + ret void +} + +define linkonce_odr dso_local void @_Z4testIeEvT_(x86_fp80 noundef %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testIeEvT_( +; CHECK-SAME: x86_fp80 noundef [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i80, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca x86_fp80, align 16 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP3]], i8 -1, i64 16, i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store i80 [[TMP0]], ptr [[TMP6]], align 16 +; CHECK-NEXT: store x86_fp80 [[ARG]], ptr [[ARG_ADDR]], align 16 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG_ADDR]]) +; CHECK-NEXT: [[TMP7:%.*]] = load x86_fp80, ptr [[ARG_ADDR]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i80, ptr [[TMP10]], align 16 +; CHECK-NEXT: store i80 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i80 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i80 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), align 8 +; CHECK-NEXT: store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (x86_fp80, i32, ...) @_Z5test2IeEvT_iz(x86_fp80 noundef [[TMP7]], i32 noundef 1, x86_fp80 noundef [[TMP7]]) +; CHECK-NEXT: ret void +; +entry: + %arg.addr = alloca x86_fp80, align 16 + store x86_fp80 %arg, ptr %arg.addr, align 16 + call void @_Z3usePv(ptr noundef nonnull %arg.addr) + %0 = load x86_fp80, ptr %arg.addr, align 16 + call void (x86_fp80, i32, ...) @_Z5test2IeEvT_iz(x86_fp80 noundef %0, i32 noundef 1, x86_fp80 noundef %0) + ret void +} + +define linkonce_odr dso_local void @_Z4testI6IntIntEvT_(i64 %arg.coerce) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testI6IntIntEvT_( +; CHECK-SAME: i64 [[ARG_COERCE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG:%.*]] = alloca [[STRUCT_INTINT:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 -1, i64 8, i1 false) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store i64 [[TMP0]], ptr [[TMP6]], align 8 +; CHECK-NEXT: store i64 [[ARG_COERCE]], ptr [[ARG]], align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (i64, i32, ...) @_Z5test2I6IntIntEvT_iz(i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i32 noundef 1, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]]) +; CHECK-NEXT: ret void +; +entry: + %arg = alloca %struct.IntInt, align 8 + store i64 %arg.coerce, ptr %arg, align 8 + call void @_Z3usePv(ptr noundef nonnull %arg) + %agg.tmp.sroa.0.0.copyload = load i64, ptr %arg, align 8 + call void (i64, i32, ...) @_Z5test2I6IntIntEvT_iz(i64 %agg.tmp.sroa.0.0.copyload, i32 noundef 1, i64 %agg.tmp.sroa.0.0.copyload) + ret void +} + +define linkonce_odr dso_local void @_Z4testI10Int64Int64EvT_(i64 %arg.coerce0, i64 %arg.coerce1) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testI10Int64Int64EvT_( +; CHECK-SAME: i64 [[ARG_COERCE0:%.*]], i64 [[ARG_COERCE1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG:%.*]] = alloca [[STRUCT_INT64INT64:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP4]], i8 -1, i64 16, i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store i64 [[ARG_COERCE0]], ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds { i64, i64 }, ptr [[ARG]], i64 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +; CHECK-NEXT: store i64 [[ARG_COERCE1]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP17]], align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i64 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (i64, i64, i32, ...) @_Z5test2I10Int64Int64EvT_iz(i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], i32 noundef 1, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]]) +; CHECK-NEXT: ret void +; +entry: + %arg = alloca %struct.Int64Int64, align 8 + store i64 %arg.coerce0, ptr %arg, align 8 + %0 = getelementptr inbounds { i64, i64 }, ptr %arg, i64 0, i32 1 + store i64 %arg.coerce1, ptr %0, align 8 + call void @_Z3usePv(ptr noundef nonnull %arg) + %agg.tmp.sroa.0.0.copyload = load i64, ptr %arg, align 8 + %agg.tmp.sroa.2.0.copyload = load i64, ptr %0, align 8 + call void (i64, i64, i32, ...) @_Z5test2I10Int64Int64EvT_iz(i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, i32 noundef 1, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload) + ret void +} + +define linkonce_odr dso_local void @_Z4testI12DoubleDoubleEvT_(double %arg.coerce0, double %arg.coerce1) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testI12DoubleDoubleEvT_( +; CHECK-SAME: double [[ARG_COERCE0:%.*]], double [[ARG_COERCE1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG:%.*]] = alloca [[STRUCT_DOUBLEDOUBLE:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP4]], i8 -1, i64 16, i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store double [[ARG_COERCE0]], ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds { double, double }, ptr [[ARG]], i64 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +; CHECK-NEXT: store double [[ARG_COERCE1]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load double, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP17]], align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i64 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (double, double, i32, ...) @_Z5test2I12DoubleDoubleEvT_iz(double [[AGG_TMP_SROA_0_0_COPYLOAD]], double [[AGG_TMP_SROA_2_0_COPYLOAD]], i32 noundef 1, double [[AGG_TMP_SROA_0_0_COPYLOAD]], double [[AGG_TMP_SROA_2_0_COPYLOAD]]) +; CHECK-NEXT: ret void +; +entry: + %arg = alloca %struct.DoubleDouble, align 8 + store double %arg.coerce0, ptr %arg, align 8 + %0 = getelementptr inbounds { double, double }, ptr %arg, i64 0, i32 1 + store double %arg.coerce1, ptr %0, align 8 + call void @_Z3usePv(ptr noundef nonnull %arg) + %agg.tmp.sroa.0.0.copyload = load double, ptr %arg, align 8 + %agg.tmp.sroa.2.0.copyload = load double, ptr %0, align 8 + call void (double, double, i32, ...) @_Z5test2I12DoubleDoubleEvT_iz(double %agg.tmp.sroa.0.0.copyload, double %agg.tmp.sroa.2.0.copyload, i32 noundef 1, double %agg.tmp.sroa.0.0.copyload, double %agg.tmp.sroa.2.0.copyload) + ret void +} + +define linkonce_odr dso_local void @_Z4testI7Double4EvT_(ptr noundef byval(%struct.Double4) align 8 %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testI7Double4EvT_( +; CHECK-SAME: ptr noundef byval([[STRUCT_DOUBLE4:%.*]]) align 8 [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080 +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_param_tls, i64 32, i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_param_tls, ptr align 8 [[TMP5]], i64 32, i1 false) +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP8]], i64 32, i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false) +; CHECK-NEXT: store i64 32, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (ptr, i32, ...) @_Z5test2I7Double4EvT_iz(ptr noundef nonnull byval([[STRUCT_DOUBLE4]]) align 8 [[ARG]], i32 noundef 1, ptr noundef nonnull byval([[STRUCT_DOUBLE4]]) align 8 [[ARG]]) +; CHECK-NEXT: ret void +; +entry: + call void @_Z3usePv(ptr noundef nonnull %arg) + call void (ptr, i32, ...) @_Z5test2I7Double4EvT_iz(ptr noundef nonnull byval(%struct.Double4) align 8 %arg, i32 noundef 1, ptr noundef nonnull byval(%struct.Double4) align 8 %arg) + ret void +} + +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 + +define linkonce_odr dso_local void @_Z4testI11DoubleFloatEvT_(double %arg.coerce0, float %arg.coerce1) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testI11DoubleFloatEvT_( +; CHECK-SAME: double [[ARG_COERCE0:%.*]], float [[ARG_COERCE1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARG:%.*]] = alloca [[STRUCT_DOUBLEFLOAT:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP4]], i8 -1, i64 16, i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store double [[ARG_COERCE0]], ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds { double, float }, ptr [[ARG]], i64 0, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 8 +; CHECK-NEXT: store float [[ARG_COERCE1]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP14]], align 8 +; CHECK-NEXT: [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load float, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP8]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: [[_MSLD1:%.*]] = load i32, ptr [[TMP17]], align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr @__msan_param_tls, align 8 +; CHECK-NEXT: store i32 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: store i32 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: store i32 [[_MSLD1]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (double, float, i32, ...) @_Z5test2I11DoubleFloatEvT_iz(double [[AGG_TMP_SROA_0_0_COPYLOAD]], float [[AGG_TMP_SROA_2_0_COPYLOAD]], i32 noundef 1, double [[AGG_TMP_SROA_0_0_COPYLOAD]], float [[AGG_TMP_SROA_2_0_COPYLOAD]]) +; CHECK-NEXT: ret void +; +entry: + %arg = alloca %struct.DoubleFloat, align 8 + store double %arg.coerce0, ptr %arg, align 8 + %0 = getelementptr inbounds { double, float }, ptr %arg, i64 0, i32 1 + store float %arg.coerce1, ptr %0, align 8 + call void @_Z3usePv(ptr noundef nonnull %arg) + %agg.tmp.sroa.0.0.copyload = load double, ptr %arg, align 8 + %agg.tmp.sroa.2.0.copyload = load float, ptr %0, align 8 + call void (double, float, i32, ...) @_Z5test2I11DoubleFloatEvT_iz(double %agg.tmp.sroa.0.0.copyload, float %agg.tmp.sroa.2.0.copyload, i32 noundef 1, double %agg.tmp.sroa.0.0.copyload, float %agg.tmp.sroa.2.0.copyload) + ret void +} + +define linkonce_odr dso_local void @_Z4testI11LongDouble2EvT_(ptr noundef byval(%struct.LongDouble2) align 16 %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testI11LongDouble2EvT_( +; CHECK-SAME: ptr noundef byval([[STRUCT_LONGDOUBLE2:%.*]]) align 16 [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080 +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_param_tls, i64 32, i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_param_tls, ptr align 8 [[TMP5]], i64 32, i1 false) +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP8]], i64 32, i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), ptr align 8 [[TMP11]], i64 32, i1 false) +; CHECK-NEXT: store i64 32, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (ptr, i32, ...) @_Z5test2I11LongDouble2EvT_iz(ptr noundef nonnull byval([[STRUCT_LONGDOUBLE2]]) align 16 [[ARG]], i32 noundef 1, ptr noundef nonnull byval([[STRUCT_LONGDOUBLE2]]) align 16 [[ARG]]) +; CHECK-NEXT: ret void +; +entry: + call void @_Z3usePv(ptr noundef nonnull %arg) + call void (ptr, i32, ...) @_Z5test2I11LongDouble2EvT_iz(ptr noundef nonnull byval(%struct.LongDouble2) align 16 %arg, i32 noundef 1, ptr noundef nonnull byval(%struct.LongDouble2) align 16 %arg) + ret void +} + +define linkonce_odr dso_local void @_Z4testI11LongDouble4EvT_(ptr noundef byval(%struct.LongDouble4) align 16 %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4testI11LongDouble4EvT_( +; CHECK-SAME: ptr noundef byval([[STRUCT_LONGDOUBLE4:%.*]]) align 16 [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080 +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_param_tls, i64 64, i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_param_tls, ptr align 8 [[TMP5]], i64 64, i1 false) +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), ptr align 8 [[TMP8]], i64 64, i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), ptr align 8 [[TMP11]], i64 64, i1 false) +; CHECK-NEXT: store i64 64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (ptr, i32, ...) @_Z5test2I11LongDouble4EvT_iz(ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], i32 noundef 1, ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]]) +; CHECK-NEXT: ret void +; +entry: + call void @_Z3usePv(ptr noundef nonnull %arg) + call void (ptr, i32, ...) @_Z5test2I11LongDouble4EvT_iz(ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, i32 noundef 1, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg) + ret void +} + +declare void @_Z3usePv(ptr noundef) local_unnamed_addr #3 + +define linkonce_odr dso_local void @_Z5test2IcEvT_iz(i8 noundef signext %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2IcEvT_iz( +; CHECK-SAME: i8 noundef signext [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #4 + +declare void @llvm.va_start(ptr) #5 + +declare void @llvm.va_end(ptr) #5 + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #4 + +define linkonce_odr dso_local void @_Z5test2IiEvT_iz(i32 noundef %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2IiEvT_iz( +; CHECK-SAME: i32 noundef [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2IfEvT_iz(float noundef %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2IfEvT_iz( +; CHECK-SAME: float noundef [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2IdEvT_iz(double noundef %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2IdEvT_iz( +; CHECK-SAME: double noundef [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2IeEvT_iz(x86_fp80 noundef %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2IeEvT_iz( +; CHECK-SAME: x86_fp80 noundef [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz(i64 %t.coerce, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2I6IntIntEvT_iz( +; CHECK-SAME: i64 [[T_COERCE:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz(i64 %t.coerce0, i64 %t.coerce1, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2I10Int64Int64EvT_iz( +; CHECK-SAME: i64 [[T_COERCE0:%.*]], i64 [[T_COERCE1:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz(double %t.coerce0, double %t.coerce1, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2I12DoubleDoubleEvT_iz( +; CHECK-SAME: double [[T_COERCE0:%.*]], double [[T_COERCE1:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz(ptr noundef byval(%struct.Double4) align 8 %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2I7Double4EvT_iz( +; CHECK-SAME: ptr noundef byval([[STRUCT_DOUBLE4:%.*]]) align 8 [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz(double %t.coerce0, float %t.coerce1, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2I11DoubleFloatEvT_iz( +; CHECK-SAME: double [[T_COERCE0:%.*]], float [[T_COERCE1:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz(ptr noundef byval(%struct.LongDouble2) align 16 %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2I11LongDouble2EvT_iz( +; CHECK-SAME: ptr noundef byval([[STRUCT_LONGDOUBLE2:%.*]]) align 16 [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz(ptr noundef byval(%struct.LongDouble4) align 16 %t, i32 noundef %n, ...) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z5test2I11LongDouble4EvT_iz( +; CHECK-SAME: ptr noundef byval([[STRUCT_LONGDOUBLE4:%.*]]) align 16 [[T:%.*]], i32 noundef [[N:%.*]], ...) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 176, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[ARGS:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[TMP6]], i8 -1, i64 24, i1 false) +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 24, i1 false) +; CHECK-NEXT: call void @llvm.va_start.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080 +; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP16]], ptr align 16 [[TMP2]], i64 176, i1 false) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARGS]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP24]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.va_end.p0(ptr nonnull [[ARGS]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[ARGS]]) +; CHECK-NEXT: ret void +; +entry: + %args = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %args) #6 + call void @llvm.va_start(ptr nonnull %args) + call void @_Z3usePv(ptr noundef nonnull %args) + call void @llvm.va_end(ptr nonnull %args) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %args) #6 + ret void +} + +define linkonce_odr dso_local void @_Z4test3I11LongDouble4EvT_(ptr noundef byval(%struct.LongDouble4) align 16 %arg) sanitize_memory { +; CHECK-LABEL: define linkonce_odr dso_local void @_Z4test3I11LongDouble4EvT_( +; CHECK-SAME: ptr noundef byval([[STRUCT_LONGDOUBLE4:%.*]]) align 16 [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080 +; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_param_tls, i64 64, i1 false) +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: store i64 0, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @_Z3usePv(ptr noundef nonnull [[ARG]]) +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @__msan_param_tls, ptr align 8 [[TMP5]], i64 64, i1 false) +; CHECK-NEXT: store i32 0, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), ptr align 8 [[TMP8]], i64 64, i1 false) +; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080 +; CHECK-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), ptr align 8 [[TMP11]], i64 64, i1 false) +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), ptr align 8 [[TMP14]], i64 64, i1 false) +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 264) to ptr), ptr align 8 [[TMP17]], i64 64, i1 false) +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 87960930222080 +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 328) to ptr), ptr align 8 [[TMP20]], i64 64, i1 false) +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 392) to ptr), ptr align 8 [[TMP23]], i64 64, i1 false) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], 87960930222080 +; CHECK-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP25]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 456) to ptr), ptr align 8 [[TMP26]], i64 64, i1 false) +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP28:%.*]] = xor i64 [[TMP27]], 87960930222080 +; CHECK-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 520) to ptr), ptr align 8 [[TMP29]], i64 64, i1 false) +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 584) to ptr), ptr align 8 [[TMP32]], i64 64, i1 false) +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = xor i64 [[TMP33]], 87960930222080 +; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 648) to ptr), ptr align 8 [[TMP35]], i64 64, i1 false) +; CHECK-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP37:%.*]] = xor i64 [[TMP36]], 87960930222080 +; CHECK-NEXT: [[TMP38:%.*]] = inttoptr i64 [[TMP37]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 712) to ptr), ptr align 8 [[TMP38]], i64 64, i1 false) +; CHECK-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = xor i64 [[TMP39]], 87960930222080 +; CHECK-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), ptr align 8 [[TMP41]], i64 64, i1 false) +; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP43:%.*]] = xor i64 [[TMP42]], 87960930222080 +; CHECK-NEXT: [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 240) to ptr), ptr align 8 [[TMP44]], i64 64, i1 false) +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP46:%.*]] = xor i64 [[TMP45]], 87960930222080 +; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 304) to ptr), ptr align 8 [[TMP47]], i64 64, i1 false) +; CHECK-NEXT: [[TMP48:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = xor i64 [[TMP48]], 87960930222080 +; CHECK-NEXT: [[TMP50:%.*]] = inttoptr i64 [[TMP49]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 368) to ptr), ptr align 8 [[TMP50]], i64 64, i1 false) +; CHECK-NEXT: [[TMP51:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP52:%.*]] = xor i64 [[TMP51]], 87960930222080 +; CHECK-NEXT: [[TMP53:%.*]] = inttoptr i64 [[TMP52]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 432) to ptr), ptr align 8 [[TMP53]], i64 64, i1 false) +; CHECK-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP55:%.*]] = xor i64 [[TMP54]], 87960930222080 +; CHECK-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 496) to ptr), ptr align 8 [[TMP56]], i64 64, i1 false) +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = xor i64 [[TMP57]], 87960930222080 +; CHECK-NEXT: [[TMP59:%.*]] = inttoptr i64 [[TMP58]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 560) to ptr), ptr align 8 [[TMP59]], i64 64, i1 false) +; CHECK-NEXT: [[TMP60:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP61:%.*]] = xor i64 [[TMP60]], 87960930222080 +; CHECK-NEXT: [[TMP62:%.*]] = inttoptr i64 [[TMP61]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 624) to ptr), ptr align 8 [[TMP62]], i64 64, i1 false) +; CHECK-NEXT: [[TMP63:%.*]] = ptrtoint ptr [[ARG]] to i64 +; CHECK-NEXT: [[TMP64:%.*]] = xor i64 [[TMP63]], 87960930222080 +; CHECK-NEXT: [[TMP65:%.*]] = inttoptr i64 [[TMP64]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 688) to ptr), ptr align 8 [[TMP65]], i64 64, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 752) to ptr), i8 0, i32 48, i1 false) +; CHECK-NEXT: store i64 1280, ptr @__msan_va_arg_overflow_size_tls, align 8 +; CHECK-NEXT: call void (ptr, i32, ...) @_Z5test2I11LongDouble4EvT_iz(ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], i32 noundef 20, ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]], ptr noundef nonnull byval([[STRUCT_LONGDOUBLE4]]) align 16 [[ARG]]) +; CHECK-NEXT: ret void +; +entry: + call void @_Z3usePv(ptr noundef nonnull %arg) + call void (ptr, i32, ...) @_Z5test2I11LongDouble4EvT_iz(ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, i32 noundef 20, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg, ptr noundef nonnull byval(%struct.LongDouble4) align 16 %arg) + ret void +}