-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LoongArch] Pre-commit tests for tls-le merge base offset. NFC #122998
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LoongArch] Pre-commit tests for tls-le merge base offset. NFC #122998
Conversation
This commit add relax relocations for tls_le code sequence. Handwritten assembly and generating source code by clang are both affected. Scheduled tls_le code sequence can be relaxed normally. So we can add relax relocations when code emitting according to their relocs. Other relaxable macros' code sequence cannot be scheduled when relaxation enabled. Attaching relax relocations for them will be implemented in later commit.
… relocs If linker relaxation enabled, relaxable code sequence expanded from pseudos should avoid being separated by instruction scheduling. This commit tags scheduling boundary for them to avoid being scheduled. (Except for `tls_le` and `call36/tail36`. Because `tls_le` can be scheduled and have no influence to relax, `call36/tail36` are expanded later in `LoongArchExpandPseudo` pass.) A new mask target-flag is added to attach relax relocs to the relaxable code sequence. (No need to add it for `tls_le` and `call36/tail36` because of the reasons shown above.) Because of this, get "direct" flags is necessary when using their target-flags. In addition, code sequence after being optimized by `MergeBaseOffset` pass may not relaxable any more, so the relax "bitmask" flag should be removed.
Similar to tests in `merge-base-offset.ll`, except for tests of blockaddress. A later commit will optimize this.
|
@llvm/pr-subscribers-backend-loongarch Author: ZhaoQi (zhaoqi5) ChangesSimilar to tests in A later commit will optimize this. Patch is 37.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122998.diff 2 Files Affected:
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
new file mode 100644
index 00000000000000..7e995d224ce1d2
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
@@ -0,0 +1,971 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA32 %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA64 %s
+
+@g_i8 = dso_local thread_local(localexec) global i8 0
+
+define dso_local signext i8 @tlsle_load_s8() nounwind {
+; LA32-LABEL: tlsle_load_s8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: ld.b $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_s8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: ld.b $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ %1 = load i8, ptr %0
+ ret i8 %1
+}
+
+define dso_local zeroext i8 @tlsle_load_u8() nounwind {
+; LA32-LABEL: tlsle_load_u8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: ld.bu $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_u8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: ld.bu $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ %1 = load i8, ptr %0
+ ret i8 %1
+}
+
+define dso_local void @tlsle_store_i8() nounwind {
+; LA32-LABEL: tlsle_store_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.b $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.b $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ store i8 1, ptr %0
+ ret void
+}
+
+@g_i16 = dso_local thread_local(localexec) global i16 0
+
+define dso_local signext i16 @tlsle_load_s16() nounwind {
+; LA32-LABEL: tlsle_load_s16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_s16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT: ld.h $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+ %1 = load i16, ptr %0
+ ret i16 %1
+}
+
+define dso_local zeroext i16 @tlsle_load_u16() nounwind {
+; LA32-LABEL: tlsle_load_u16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_u16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+ %1 = load i16, ptr %0
+ ret i16 %1
+}
+
+define dso_local void @tlsle_store_i16() nounwind {
+; LA32-LABEL: tlsle_store_i16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.h $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.h $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+ store i16 1, ptr %0
+ ret void
+}
+
+@g_i32 = dso_local thread_local(localexec) global i32 0
+
+define dso_local signext i32 @tlsle_load_s32() nounwind {
+; LA32-LABEL: tlsle_load_s32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_s32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT: ld.w $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+ %1 = load i32, ptr %0
+ ret i32 %1
+}
+
+define dso_local zeroext i32 @tlsle_load_u32() nounwind {
+; LA32-LABEL: tlsle_load_u32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_u32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT: ld.wu $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+ %1 = load i32, ptr %0
+ ret i32 %1
+}
+
+define dso_local void @tlsle_store_i32() nounwind {
+; LA32-LABEL: tlsle_store_i32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+ store i32 1, ptr %0
+ ret void
+}
+
+@g_i64 = dso_local thread_local(localexec) global i64 0
+
+define dso_local i64 @tlsle_load_i64() nounwind {
+; LA32-LABEL: tlsle_load_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i64)
+; LA32-NEXT: addi.w $a1, $a0, %le_lo12_r(g_i64)
+; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $a1, $a1, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
+ %1 = load i64, ptr %0
+ ret i64 %1
+}
+
+define dso_local void @tlsle_store_i64() nounwind {
+; LA32-LABEL: tlsle_store_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i64)
+; LA32-NEXT: st.w $zero, $a0, 4
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
+ store i64 1, ptr %0
+ ret void
+}
+
+@g_f32 = dso_local thread_local(localexec) global float 0.0
+
+define dso_local float @tlsle_load_f32() nounwind {
+; LA32-LABEL: tlsle_load_f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
+; LA32-NEXT: fld.s $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
+; LA64-NEXT: fld.s $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
+ %1 = load float, ptr %0
+ ret float %1
+}
+
+define dso_local void @tlsle_store_f32() nounwind {
+; LA32-LABEL: tlsle_store_f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
+; LA32-NEXT: lu12i.w $a1, 260096
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
+; LA64-NEXT: lu12i.w $a1, 260096
+; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
+ store float 1.0, ptr %0
+ ret void
+}
+
+@g_f64 = dso_local thread_local(localexec) global double 0.0
+
+define dso_local double @tlsle_load_f64() nounwind {
+; LA32-LABEL: tlsle_load_f64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
+; LA32-NEXT: fld.d $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_f64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
+; LA64-NEXT: fld.d $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
+ %1 = load double, ptr %0
+ ret double %1
+}
+
+define dso_local void @tlsle_store_f64() nounwind {
+; LA32-LABEL: tlsle_store_f64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
+; LA32-NEXT: vldi $vr0, -912
+; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_f64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
+; LA64-NEXT: lu52i.d $a1, $zero, 1023
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
+ store double 1.0, ptr %0
+ ret void
+}
+
+@g_m64 = dso_local thread_local(localexec) global i64 0
+
+define dso_local void @tlsle_store_multi() nounwind {
+; LA32-LABEL: tlsle_store_multi:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_m64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_m64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_m64)
+; LA32-NEXT: st.w $zero, $a0, 4
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $zero, $a0, 4
+; LA32-NEXT: ori $a1, $zero, 2
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_multi:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_m64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_m64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_m64)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ori $a1, $zero, 2
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_m64)
+ store volatile i64 1, ptr %0
+ store volatile i64 2, ptr %0
+ ret void
+}
+
+@g_sf32 = dso_local thread_local(localexec) global float 0.0
+
+define dso_local void @tlsle_store_sf32() nounwind {
+; LA32-LABEL: tlsle_store_sf32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf32)
+; LA32-NEXT: fld.s $fa0, $a0, 0
+; LA32-NEXT: fst.s $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_sf32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf32)
+; LA64-NEXT: fld.s $fa0, $a0, 0
+; LA64-NEXT: fst.s $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf32)
+ %1 = load float, ptr %0
+ store volatile float %1, ptr %0
+ ret void
+}
+
+@g_sf64 = dso_local thread_local(localexec) global double 0.0
+
+define dso_local void @tlsle_store_sf64() nounwind {
+; LA32-LABEL: tlsle_store_sf64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf64)
+; LA32-NEXT: fld.d $fa0, $a0, 0
+; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_sf64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf64)
+; LA64-NEXT: fld.d $fa0, $a0, 0
+; LA64-NEXT: fst.d $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf64)
+ %1 = load double, ptr %0
+ store volatile double %1, ptr %0
+ ret void
+}
+
+@g_i32x4_src = dso_local thread_local(localexec) global [4 x i32] zeroinitializer, align 16
+@g_i32x4_dst = dso_local thread_local(localexec) global [4 x i32] zeroinitializer, align 16
+
+define dso_local void @tlsle_copy_i32x4() nounwind {
+; LA32-LABEL: tlsle_copy_i32x4:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_src)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_src)
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_dst)
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i32x4:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_src)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_src)
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_dst)
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_src)
+ %1 = load <4 x i32>, ptr %0, align 16
+ %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_dst)
+ store <4 x i32> %1, ptr %2, align 16
+ ret void
+}
+
+@g_i32x8_src = dso_local thread_local(localexec) global [8 x i32] zeroinitializer, align 32
+@g_i32x8_dst = dso_local thread_local(localexec) global [8 x i32] zeroinitializer, align 32
+
+define dso_local void @tlsle_copy_i32x8() nounwind {
+; LA32-LABEL: tlsle_copy_i32x8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_src)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_src)
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_dst)
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i32x8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_src)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_src)
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_dst)
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_src)
+ %1 = load <8 x i32>, ptr %0, align 32
+ %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_dst)
+ store <8 x i32> %1, ptr %2, align 32
+ ret void
+}
+
+@g_i8x16 = dso_local thread_local(localexec) global <16 x i8> zeroinitializer, align 16
+
+define dso_local void @tlsle_copy_i8_to_i8x16() nounwind {
+; LA32-LABEL: tlsle_copy_i8_to_i8x16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: vldrepl.b $vr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x16)
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i8_to_i8x16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: vldrepl.b $vr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x16)
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ %1 = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr %0, i32 0)
+ %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8x16)
+ store <16 x i8> %1, ptr %2, align 16
+ ret void
+}
+
+@g_i8x32 = dso_local thread_local(localexec) global <32 x i8> zeroinitializer, align 32
+
+define dso_local void @tlsle_copy_i8_to_i8x32() nounwind {
+; LA32-LABEL: tlsle_copy_i8_to_i8x32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x32)
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i8_to_i8x32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x32)
+; LA64-NEXT: addi.d ...
[truncated]
|
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/7828 Here is the relevant piece of the build log for the reference |
…122998) Similar to tests in `merge-base-offset.ll`, except for tests of blockaddress. A later commit will optimize this.
Similar to tests in
merge-base-offset.ll, except for tests of blockaddress.A later commit will optimize this.