Skip to content

Commit 9be30e5

Browse files
authored
[clang][LoongArch] Introduce LASX and LSX conversion intrinsics (#157819)
This patch introduces the LASX and LSX conversion intrinsics: - __m256 __lasx_cast_128_s (__m128) - __m256d __lasx_cast_128_d (__m128d) - __m256i __lasx_cast_128 (__m128i) - __m256 __lasx_concat_128_s (__m128, __m128) - __m256d __lasx_concat_128_d (__m128, __m128d) - __m256i __lasx_concat_128 (__m128, __m128i) - __m128 __lasx_extract_128_lo_s (__m256) - __m128d __lasx_extract_128_lo_d (__m256d) - __m128i __lasx_extract_128_lo (__m256i) - __m128 __lasx_extract_128_hi_s (__m256) - __m128d __lasx_extract_128_hi_d (__m256d) - __m128i __lasx_extract_128_hi (__m256i) - __m256 __lasx_insert_128_lo_s (__m256, __m128) - __m256d __lasx_insert_128_lo_d (__m256d, __m128d) - __m256i __lasx_insert_128_lo (__m256i, __m128i) - __m256 __lasx_insert_128_hi_s (__m256, __m128) - __m256d __lasx_insert_128_hi_d (__m256d, __m128d) - __m256i __lasx_insert_128_hi (__m256i, __m128i) Relevant GCC patch: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c2013267642fea4a6e89b826940c8aa80a76089d
1 parent 4b35ff5 commit 9be30e5

File tree

6 files changed

+482
-0
lines changed

6 files changed

+482
-0
lines changed

clang/include/clang/Basic/BuiltinsLoongArchLASX.def

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,3 +986,22 @@ TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx")
986986
TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx")
987987
TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx")
988988
TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx")
989+
990+
TARGET_BUILTIN(__builtin_lasx_cast_128_s, "V8fV4f", "nc", "lasx")
991+
TARGET_BUILTIN(__builtin_lasx_cast_128_d, "V4dV2d", "nc", "lasx")
992+
TARGET_BUILTIN(__builtin_lasx_cast_128, "V4LLiV2LLi", "nc", "lasx")
993+
TARGET_BUILTIN(__builtin_lasx_concat_128_s, "V8fV4fV4f", "nc", "lasx")
994+
TARGET_BUILTIN(__builtin_lasx_concat_128_d, "V4dV2dV2d", "nc", "lasx")
995+
TARGET_BUILTIN(__builtin_lasx_concat_128, "V4LLiV2LLiV2LLi", "nc", "lasx")
996+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_s, "V4fV8f", "nc", "lasx")
997+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo_d, "V2dV4d", "nc", "lasx")
998+
TARGET_BUILTIN(__builtin_lasx_extract_128_lo, "V2LLiV4LLi", "nc", "lasx")
999+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_s, "V4fV8f", "nc", "lasx")
1000+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi_d, "V2dV4d", "nc", "lasx")
1001+
TARGET_BUILTIN(__builtin_lasx_extract_128_hi, "V2LLiV4LLi", "nc", "lasx")
1002+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_s, "V8fV8fV4f", "nc", "lasx")
1003+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo_d, "V4dV4dV2d", "nc", "lasx")
1004+
TARGET_BUILTIN(__builtin_lasx_insert_128_lo, "V4LLiV4LLiV2LLi", "nc", "lasx")
1005+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_s, "V8fV8fV4f", "nc", "lasx")
1006+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi_d, "V4dV4dV2d", "nc", "lasx")
1007+
TARGET_BUILTIN(__builtin_lasx_insert_128_hi, "V4LLiV4LLiV2LLi", "nc", "lasx")

clang/lib/Basic/Targets/LoongArch.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts,
242242
Builder.defineMacro("__loongarch_simd_width", "256");
243243
Builder.defineMacro("__loongarch_sx", Twine(1));
244244
Builder.defineMacro("__loongarch_asx", Twine(1));
245+
Builder.defineMacro("__loongarch_asx_sx_conv", Twine(1));
245246
} else if (HasFeatureLSX) {
246247
Builder.defineMacro("__loongarch_simd_width", "128");
247248
Builder.defineMacro("__loongarch_sx", Twine(1));

clang/lib/Headers/lasxintrin.h

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#ifndef _LOONGSON_ASXINTRIN_H
1111
#define _LOONGSON_ASXINTRIN_H 1
1212

13+
#include <lsxintrin.h>
14+
1315
#if defined(__loongarch_asx)
1416

1517
typedef signed char v32i8 __attribute__((vector_size(32), aligned(32)));
@@ -3882,5 +3884,116 @@ extern __inline
38823884

38833885
#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1)))
38843886

3887+
#if defined(__loongarch_asx_sx_conv)
3888+
3889+
extern __inline
3890+
__attribute__((__gnu_inline__, __always_inline__,
3891+
__artificial__)) __m256 __lasx_cast_128_s(__m128 _1) {
3892+
return (__m256)__builtin_lasx_cast_128_s((v4f32)_1);
3893+
}
3894+
3895+
extern __inline
3896+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3897+
__lasx_cast_128_d(__m128d _1) {
3898+
return (__m256d)__builtin_lasx_cast_128_d((v2f64)_1);
3899+
}
3900+
3901+
extern __inline
3902+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3903+
__lasx_cast_128(__m128i _1) {
3904+
return (__m256i)__builtin_lasx_cast_128((v2i64)_1);
3905+
}
3906+
3907+
extern __inline
3908+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3909+
__lasx_concat_128_s(__m128 _1, __m128 _2) {
3910+
return (__m256)__builtin_lasx_concat_128_s((v4f32)_1, (v4f32)_2);
3911+
}
3912+
3913+
extern __inline
3914+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3915+
__lasx_concat_128_d(__m128d _1, __m128d _2) {
3916+
return (__m256d)__builtin_lasx_concat_128_d((v2f64)_1, (v2f64)_2);
3917+
}
3918+
3919+
extern __inline
3920+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3921+
__lasx_concat_128(__m128i _1, __m128i _2) {
3922+
return (__m256i)__builtin_lasx_concat_128((v2i64)_1, (v2i64)_2);
3923+
}
3924+
3925+
extern __inline
3926+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
3927+
__lasx_extract_128_lo_s(__m256 _1) {
3928+
return (__m128)__builtin_lasx_extract_128_lo_s((v8f32)_1);
3929+
}
3930+
3931+
extern __inline
3932+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
3933+
__lasx_extract_128_lo_d(__m256d _1) {
3934+
return (__m128d)__builtin_lasx_extract_128_lo_d((v4f64)_1);
3935+
}
3936+
3937+
extern __inline
3938+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
3939+
__lasx_extract_128_lo(__m256i _1) {
3940+
return (__m128i)__builtin_lasx_extract_128_lo((v4i64)_1);
3941+
}
3942+
3943+
extern __inline
3944+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128
3945+
__lasx_extract_128_hi_s(__m256 _1) {
3946+
return (__m128)__builtin_lasx_extract_128_hi_s((v8f32)_1);
3947+
}
3948+
3949+
extern __inline
3950+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d
3951+
__lasx_extract_128_hi_d(__m256d _1) {
3952+
return (__m128d)__builtin_lasx_extract_128_hi_d((v4f64)_1);
3953+
}
3954+
3955+
extern __inline
3956+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i
3957+
__lasx_extract_128_hi(__m256i _1) {
3958+
return (__m128i)__builtin_lasx_extract_128_hi((v4i64)_1);
3959+
}
3960+
3961+
extern __inline
3962+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3963+
__lasx_insert_128_lo_s(__m256 _1, __m128 _2) {
3964+
return (__m256)__builtin_lasx_insert_128_lo_s((v8f32)_1, (v4f32)_2);
3965+
}
3966+
3967+
extern __inline
3968+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3969+
__lasx_insert_128_lo_d(__m256d _1, __m128d _2) {
3970+
return (__m256d)__builtin_lasx_insert_128_lo_d((v4f64)_1, (v2f64)_2);
3971+
}
3972+
3973+
extern __inline
3974+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3975+
__lasx_insert_128_lo(__m256i _1, __m128i _2) {
3976+
return (__m256i)__builtin_lasx_insert_128_lo((v4i64)_1, (v2i64)_2);
3977+
}
3978+
3979+
extern __inline
3980+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256
3981+
__lasx_insert_128_hi_s(__m256 _1, __m128 _2) {
3982+
return (__m256)__builtin_lasx_insert_128_hi_s((v8f32)_1, (v4f32)_2);
3983+
}
3984+
3985+
extern __inline
3986+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d
3987+
__lasx_insert_128_hi_d(__m256d _1, __m128d _2) {
3988+
return (__m256d)__builtin_lasx_insert_128_hi_d((v4f64)_1, (v2f64)_2);
3989+
}
3990+
3991+
extern __inline
3992+
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i
3993+
__lasx_insert_128_hi(__m256i _1, __m128i _2) {
3994+
return (__m256i)__builtin_lasx_insert_128_hi((v4i64)_1, (v2i64)_2);
3995+
}
3996+
3997+
#endif /* defined(__loongarch_asx_sx_conv). */
38853998
#endif /* defined(__loongarch_asx). */
38863999
#endif /* _LOONGSON_ASXINTRIN_H. */

clang/test/CodeGen/LoongArch/lasx/builtin-alias.c

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7120,6 +7120,177 @@ v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
71207120
// CHECK-NEXT: ret void
71217121
//
71227122
v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); }
7123+
// CHECK-LABEL: define dso_local void @cast_128_s(
7124+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
7125+
// CHECK-NEXT: [[ENTRY:.*:]]
7126+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <4 x float>
7127+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> [[TMP0]])
7128+
// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7129+
// CHECK-NEXT: ret void
7130+
//
7131+
v8f32 cast_128_s(v4f32 _1) { return __lasx_cast_128_s(_1); }
7132+
// CHECK-LABEL: define dso_local void @cast_128_d(
7133+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
7134+
// CHECK-NEXT: [[ENTRY:.*:]]
7135+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x double>
7136+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> [[TMP0]])
7137+
// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7138+
// CHECK-NEXT: ret void
7139+
//
7140+
v4f64 cast_128_d(v2f64 _1) { return __lasx_cast_128_d(_1); }
7141+
// CHECK-LABEL: define dso_local void @cast_128(
7142+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
7143+
// CHECK-NEXT: [[ENTRY:.*:]]
7144+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x i64>
7145+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> [[TMP0]])
7146+
// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7147+
// CHECK-NEXT: ret void
7148+
//
7149+
v4i64 cast_128(v2i64 _1) { return __lasx_cast_128(_1); }
7150+
// CHECK-LABEL: define dso_local void @concat_128_s(
7151+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
7152+
// CHECK-NEXT: [[ENTRY:.*:]]
7153+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <4 x float>
7154+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <4 x float>
7155+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
7156+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7157+
// CHECK-NEXT: ret void
7158+
//
7159+
v8f32 concat_128_s(v4f32 _1, v4f32 _2) { return __lasx_concat_128_s(_1, _2); }
7160+
// CHECK-LABEL: define dso_local void @concat_128_d(
7161+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
7162+
// CHECK-NEXT: [[ENTRY:.*:]]
7163+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x double>
7164+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x double>
7165+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
7166+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7167+
// CHECK-NEXT: ret void
7168+
//
7169+
v4f64 concat_128_d(v2f64 _1, v2f64 _2) { return __lasx_concat_128_d(_1, _2); }
7170+
// CHECK-LABEL: define dso_local void @concat_128(
7171+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], i128 noundef [[_1_COERCE:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
7172+
// CHECK-NEXT: [[ENTRY:.*:]]
7173+
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE]] to <2 x i64>
7174+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x i64>
7175+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
7176+
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7177+
// CHECK-NEXT: ret void
7178+
//
7179+
v4i64 concat_128(v2i64 _1, v2i64 _2) { return __lasx_concat_128(_1, _2); }
7180+
// CHECK-LABEL: define dso_local i128 @extract_128_lo_s(
7181+
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
7182+
// CHECK-NEXT: [[ENTRY:.*:]]
7183+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7184+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> [[_1]])
7185+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
7186+
// CHECK-NEXT: ret i128 [[TMP2]]
7187+
//
7188+
v4f32 extract_128_lo_s(v8f32 _1) { return __lasx_extract_128_lo_s(_1); }
7189+
// CHECK-LABEL: define dso_local i128 @extract_128_lo_d(
7190+
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
7191+
// CHECK-NEXT: [[ENTRY:.*:]]
7192+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7193+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> [[_1]])
7194+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
7195+
// CHECK-NEXT: ret i128 [[TMP2]]
7196+
//
7197+
v2f64 extract_128_lo_d(v4f64 _1) { return __lasx_extract_128_lo_d(_1); }
7198+
// CHECK-LABEL: define dso_local i128 @extract_128_lo(
7199+
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
7200+
// CHECK-NEXT: [[ENTRY:.*:]]
7201+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7202+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> [[_1]])
7203+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
7204+
// CHECK-NEXT: ret i128 [[TMP2]]
7205+
//
7206+
v2i64 extract_128_lo(v4i64 _1) { return __lasx_extract_128_lo(_1); }
7207+
// CHECK-LABEL: define dso_local i128 @extract_128_hi_s(
7208+
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
7209+
// CHECK-NEXT: [[ENTRY:.*:]]
7210+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7211+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> [[_1]])
7212+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
7213+
// CHECK-NEXT: ret i128 [[TMP2]]
7214+
//
7215+
v4f32 extract_128_hi_s(v8f32 _1) { return __lasx_extract_128_hi_s(_1); }
7216+
// CHECK-LABEL: define dso_local i128 @extract_128_hi_d(
7217+
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
7218+
// CHECK-NEXT: [[ENTRY:.*:]]
7219+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7220+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> [[_1]])
7221+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
7222+
// CHECK-NEXT: ret i128 [[TMP2]]
7223+
//
7224+
v2f64 extract_128_hi_d(v4f64 _1) { return __lasx_extract_128_hi_d(_1); }
7225+
// CHECK-LABEL: define dso_local i128 @extract_128_hi(
7226+
// CHECK-SAME: ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR7]] {
7227+
// CHECK-NEXT: [[ENTRY:.*:]]
7228+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7229+
// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> [[_1]])
7230+
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
7231+
// CHECK-NEXT: ret i128 [[TMP2]]
7232+
//
7233+
v2i64 extract_128_hi(v4i64 _1) { return __lasx_extract_128_hi(_1); }
7234+
// CHECK-LABEL: define dso_local void @insert_128_lo_s(
7235+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
7236+
// CHECK-NEXT: [[ENTRY:.*:]]
7237+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7238+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <4 x float>
7239+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
7240+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7241+
// CHECK-NEXT: ret void
7242+
//
7243+
v8f32 insert_128_lo_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_lo_s(_1, _2); }
7244+
// CHECK-LABEL: define dso_local void @insert_128_lo_d(
7245+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
7246+
// CHECK-NEXT: [[ENTRY:.*:]]
7247+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7248+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x double>
7249+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
7250+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7251+
// CHECK-NEXT: ret void
7252+
//
7253+
v4f64 insert_128_lo_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_lo_d(_1, _2); }
7254+
// CHECK-LABEL: define dso_local void @insert_128_lo(
7255+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
7256+
// CHECK-NEXT: [[ENTRY:.*:]]
7257+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7258+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x i64>
7259+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
7260+
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7261+
// CHECK-NEXT: ret void
7262+
//
7263+
v4i64 insert_128_lo(v4i64 _1, v2i64 _2) { return __lasx_insert_128_lo(_1, _2); }
7264+
// CHECK-LABEL: define dso_local void @insert_128_hi_s(
7265+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<8 x float>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
7266+
// CHECK-NEXT: [[ENTRY:.*:]]
7267+
// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7268+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <4 x float>
7269+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
7270+
// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7271+
// CHECK-NEXT: ret void
7272+
//
7273+
v8f32 insert_128_hi_s(v8f32 _1, v4f32 _2) { return __lasx_insert_128_hi_s(_1, _2); }
7274+
// CHECK-LABEL: define dso_local void @insert_128_hi_d(
7275+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x double>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
7276+
// CHECK-NEXT: [[ENTRY:.*:]]
7277+
// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7278+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x double>
7279+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
7280+
// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7281+
// CHECK-NEXT: ret void
7282+
//
7283+
v4f64 insert_128_hi_d(v4f64 _1, v2f64 _2) { return __lasx_insert_128_hi_d(_1, _2); }
7284+
// CHECK-LABEL: define dso_local void @insert_128_hi(
7285+
// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<4 x i64>) align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr dead_on_return noundef readonly captures(none) [[TMP0:%.*]], i128 noundef [[_2_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
7286+
// CHECK-NEXT: [[ENTRY:.*:]]
7287+
// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0]], align 32, !tbaa [[CHAR_TBAA6]]
7288+
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE]] to <2 x i64>
7289+
// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
7290+
// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT]], align 32, !tbaa [[CHAR_TBAA6]]
7291+
// CHECK-NEXT: ret void
7292+
//
7293+
v4i64 insert_128_hi(v4i64 _1, v2i64 _2) { return __lasx_insert_128_hi(_1, _2); }
71237294
//.
71247295
// CHECK: [[META4:![0-9]+]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
71257296
// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}

0 commit comments

Comments
 (0)