Skip to content

Conversation

@zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Jan 15, 2025

Similar to tests in merge-base-offset.ll, except for tests of blockaddress.

A later commit will optimize this.

This commit add relax relocations for tls_le code sequence.
Handwritten assembly and generating source code by clang are
both affected.

Scheduled tls_le code sequence can be relaxed normally. So we
can add relax relocations when code emitting according to their
relocs. Other relaxable macros' code sequence cannot be
scheduled when relaxation enabled. Attaching relax relocations
for them will be implemented in later commit.
… relocs

If linker relaxation enabled, relaxable code sequence expanded
from pseudos should avoid being separated by instruction scheduling.
This commit tags scheduling boundary for them to avoid being
scheduled. (Except for `tls_le` and `call36/tail36`. Because
`tls_le` can be scheduled and have no influence to relax,
`call36/tail36` are expanded later in `LoongArchExpandPseudo` pass.)

A new mask target-flag is added to attach relax relocs to the
relaxable code sequence. (No need to add it for `tls_le` and
`call36/tail36` because of the reasons shown above.) Because of this,
get "direct" flags is necessary when using their target-flags.
In addition, code sequence after being optimized by `MergeBaseOffset`
pass may not relaxable any more, so the relax "bitmask" flag should
be removed.
Similar to tests in `merge-base-offset.ll`, except for tests
of blockaddress.

A later commit will optimize this.
@llvmbot
Copy link
Member

llvmbot commented Jan 15, 2025

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Similar to tests in merge-base-offset.ll, except for tests of blockaddress.

A later commit will optimize this.


Patch is 37.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122998.diff

2 Files Affected:

  • (added) llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll (+971)
  • (modified) llvm/test/CodeGen/LoongArch/merge-base-offset.ll (+22-22)
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
new file mode 100644
index 00000000000000..7e995d224ce1d2
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
@@ -0,0 +1,971 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN:   | FileCheck --check-prefix=LA32 %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN:   | FileCheck --check-prefix=LA64 %s
+
+@g_i8 = dso_local thread_local(localexec) global i8 0
+
+define dso_local signext i8 @tlsle_load_s8() nounwind {
+; LA32-LABEL: tlsle_load_s8:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT:    ld.b $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_s8:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT:    ld.b $a0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+  %1 = load i8, ptr %0
+  ret i8 %1
+}
+
+define dso_local zeroext i8 @tlsle_load_u8() nounwind {
+; LA32-LABEL: tlsle_load_u8:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT:    ld.bu $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_u8:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT:    ld.bu $a0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+  %1 = load i8, ptr %0
+  ret i8 %1
+}
+
+define dso_local void @tlsle_store_i8() nounwind {
+; LA32-LABEL: tlsle_store_i8:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT:    ori $a1, $zero, 1
+; LA32-NEXT:    st.b $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_i8:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT:    ori $a1, $zero, 1
+; LA64-NEXT:    st.b $a1, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+  store i8 1, ptr %0
+  ret void
+}
+
+@g_i16 = dso_local thread_local(localexec) global i16 0
+
+define dso_local signext i16 @tlsle_load_s16() nounwind {
+; LA32-LABEL: tlsle_load_s16:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT:    ld.h $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_s16:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT:    ld.h $a0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+  %1 = load i16, ptr %0
+  ret i16 %1
+}
+
+define dso_local zeroext i16 @tlsle_load_u16() nounwind {
+; LA32-LABEL: tlsle_load_u16:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT:    ld.hu $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_u16:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT:    ld.hu $a0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+  %1 = load i16, ptr %0
+  ret i16 %1
+}
+
+define dso_local void @tlsle_store_i16() nounwind {
+; LA32-LABEL: tlsle_store_i16:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT:    ori $a1, $zero, 1
+; LA32-NEXT:    st.h $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_i16:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT:    ori $a1, $zero, 1
+; LA64-NEXT:    st.h $a1, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+  store i16 1, ptr %0
+  ret void
+}
+
+@g_i32 = dso_local thread_local(localexec) global i32 0
+
+define dso_local signext i32 @tlsle_load_s32() nounwind {
+; LA32-LABEL: tlsle_load_s32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT:    ld.w $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_s32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT:    ld.w $a0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+  %1 = load i32, ptr %0
+  ret i32 %1
+}
+
+define dso_local zeroext i32 @tlsle_load_u32() nounwind {
+; LA32-LABEL: tlsle_load_u32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT:    ld.w $a0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_u32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT:    ld.wu $a0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+  %1 = load i32, ptr %0
+  ret i32 %1
+}
+
+define dso_local void @tlsle_store_i32() nounwind {
+; LA32-LABEL: tlsle_store_i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT:    ori $a1, $zero, 1
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT:    ori $a1, $zero, 1
+; LA64-NEXT:    st.w $a1, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+  store i32 1, ptr %0
+  ret void
+}
+
+@g_i64 = dso_local thread_local(localexec) global i64 0
+
+define dso_local i64 @tlsle_load_i64() nounwind {
+; LA32-LABEL: tlsle_load_i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i64)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i64)
+; LA32-NEXT:    addi.w $a1, $a0, %le_lo12_r(g_i64)
+; LA32-NEXT:    ld.w $a0, $a1, 0
+; LA32-NEXT:    ld.w $a1, $a1, 4
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i64)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i64)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i64)
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
+  %1 = load i64, ptr %0
+  ret i64 %1
+}
+
+define dso_local void @tlsle_store_i64() nounwind {
+; LA32-LABEL: tlsle_store_i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i64)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i64)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i64)
+; LA32-NEXT:    st.w $zero, $a0, 4
+; LA32-NEXT:    ori $a1, $zero, 1
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i64)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i64)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i64)
+; LA64-NEXT:    ori $a1, $zero, 1
+; LA64-NEXT:    st.d $a1, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
+  store i64 1, ptr %0
+  ret void
+}
+
+@g_f32 = dso_local thread_local(localexec) global float 0.0
+
+define dso_local float @tlsle_load_f32() nounwind {
+; LA32-LABEL: tlsle_load_f32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_f32)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_f32)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_f32)
+; LA32-NEXT:    fld.s $fa0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_f32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_f32)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_f32)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_f32)
+; LA64-NEXT:    fld.s $fa0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
+  %1 = load float, ptr %0
+  ret float %1
+}
+
+define dso_local void @tlsle_store_f32() nounwind {
+; LA32-LABEL: tlsle_store_f32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_f32)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_f32)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_f32)
+; LA32-NEXT:    lu12i.w $a1, 260096
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_f32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_f32)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_f32)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_f32)
+; LA64-NEXT:    lu12i.w $a1, 260096
+; LA64-NEXT:    st.w $a1, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
+  store float 1.0, ptr %0
+  ret void
+}
+
+@g_f64 = dso_local thread_local(localexec) global double 0.0
+
+define dso_local double @tlsle_load_f64() nounwind {
+; LA32-LABEL: tlsle_load_f64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_f64)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_f64)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_f64)
+; LA32-NEXT:    fld.d $fa0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_load_f64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_f64)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_f64)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_f64)
+; LA64-NEXT:    fld.d $fa0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
+  %1 = load double, ptr %0
+  ret double %1
+}
+
+define dso_local void @tlsle_store_f64() nounwind {
+; LA32-LABEL: tlsle_store_f64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_f64)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_f64)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_f64)
+; LA32-NEXT:    vldi $vr0, -912
+; LA32-NEXT:    fst.d $fa0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_f64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_f64)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_f64)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_f64)
+; LA64-NEXT:    lu52i.d $a1, $zero, 1023
+; LA64-NEXT:    st.d $a1, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
+  store double 1.0, ptr %0
+  ret void
+}
+
+@g_m64 = dso_local thread_local(localexec) global i64 0
+
+define dso_local void @tlsle_store_multi() nounwind {
+; LA32-LABEL: tlsle_store_multi:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_m64)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_m64)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_m64)
+; LA32-NEXT:    st.w $zero, $a0, 4
+; LA32-NEXT:    ori $a1, $zero, 1
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    st.w $zero, $a0, 4
+; LA32-NEXT:    ori $a1, $zero, 2
+; LA32-NEXT:    st.w $a1, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_multi:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_m64)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_m64)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_m64)
+; LA64-NEXT:    ori $a1, $zero, 1
+; LA64-NEXT:    st.d $a1, $a0, 0
+; LA64-NEXT:    ori $a1, $zero, 2
+; LA64-NEXT:    st.d $a1, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_m64)
+  store volatile i64 1, ptr %0
+  store volatile i64 2, ptr %0
+  ret void
+}
+
+@g_sf32 = dso_local thread_local(localexec) global float 0.0
+
+define dso_local void @tlsle_store_sf32() nounwind {
+; LA32-LABEL: tlsle_store_sf32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_sf32)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_sf32)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_sf32)
+; LA32-NEXT:    fld.s $fa0, $a0, 0
+; LA32-NEXT:    fst.s $fa0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_sf32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_sf32)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_sf32)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_sf32)
+; LA64-NEXT:    fld.s $fa0, $a0, 0
+; LA64-NEXT:    fst.s $fa0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf32)
+  %1 = load float, ptr %0
+  store volatile float %1, ptr %0
+  ret void
+}
+
+@g_sf64 = dso_local thread_local(localexec) global double 0.0
+
+define dso_local void @tlsle_store_sf64() nounwind {
+; LA32-LABEL: tlsle_store_sf64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_sf64)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_sf64)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_sf64)
+; LA32-NEXT:    fld.d $fa0, $a0, 0
+; LA32-NEXT:    fst.d $fa0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_store_sf64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_sf64)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_sf64)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_sf64)
+; LA64-NEXT:    fld.d $fa0, $a0, 0
+; LA64-NEXT:    fst.d $fa0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf64)
+  %1 = load double, ptr %0
+  store volatile double %1, ptr %0
+  ret void
+}
+
+@g_i32x4_src = dso_local thread_local(localexec) global [4 x i32] zeroinitializer, align 16
+@g_i32x4_dst = dso_local thread_local(localexec) global [4 x i32] zeroinitializer, align 16
+
+define dso_local void @tlsle_copy_i32x4() nounwind {
+; LA32-LABEL: tlsle_copy_i32x4:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x4_src)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i32x4_src)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i32x4_src)
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i32x4_dst)
+; LA32-NEXT:    vst $vr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_copy_i32x4:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x4_src)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i32x4_src)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i32x4_src)
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i32x4_dst)
+; LA64-NEXT:    vst $vr0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_src)
+  %1 = load <4 x i32>, ptr %0, align 16
+  %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_dst)
+  store <4 x i32> %1, ptr %2, align 16
+  ret void
+}
+
+@g_i32x8_src = dso_local thread_local(localexec) global [8 x i32] zeroinitializer, align 32
+@g_i32x8_dst = dso_local thread_local(localexec) global [8 x i32] zeroinitializer, align 32
+
+define dso_local void @tlsle_copy_i32x8() nounwind {
+; LA32-LABEL: tlsle_copy_i32x8:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x8_src)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i32x8_src)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i32x8_src)
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i32x8_dst)
+; LA32-NEXT:    xvst $xr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_copy_i32x8:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x8_src)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i32x8_src)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i32x8_src)
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i32x8_dst)
+; LA64-NEXT:    xvst $xr0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_src)
+  %1 = load <8 x i32>, ptr %0, align 32
+  %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_dst)
+  store <8 x i32> %1, ptr %2, align 32
+  ret void
+}
+
+@g_i8x16 = dso_local thread_local(localexec) global <16 x i8> zeroinitializer, align 16
+
+define dso_local void @tlsle_copy_i8_to_i8x16() nounwind {
+; LA32-LABEL: tlsle_copy_i8_to_i8x16:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT:    vldrepl.b $vr0, $a0, 0
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8x16)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i8x16)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i8x16)
+; LA32-NEXT:    vst $vr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_copy_i8_to_i8x16:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT:    vldrepl.b $vr0, $a0, 0
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8x16)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i8x16)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i8x16)
+; LA64-NEXT:    vst $vr0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+  %1 = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr %0, i32 0)
+  %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8x16)
+  store <16 x i8> %1, ptr %2, align 16
+  ret void
+}
+
+@g_i8x32 = dso_local thread_local(localexec) global <32 x i8> zeroinitializer, align 32
+
+define dso_local void @tlsle_copy_i8_to_i8x32() nounwind {
+; LA32-LABEL: tlsle_copy_i8_to_i8x32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT:    xvldrepl.b $xr0, $a0, 0
+; LA32-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8x32)
+; LA32-NEXT:    add.w $a0, $a0, $tp, %le_add_r(g_i8x32)
+; LA32-NEXT:    addi.w $a0, $a0, %le_lo12_r(g_i8x32)
+; LA32-NEXT:    xvst $xr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: tlsle_copy_i8_to_i8x32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT:    addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT:    xvldrepl.b $xr0, $a0, 0
+; LA64-NEXT:    lu12i.w $a0, %le_hi20_r(g_i8x32)
+; LA64-NEXT:    add.d $a0, $a0, $tp, %le_add_r(g_i8x32)
+; LA64-NEXT:    addi.d ...
[truncated]

Base automatically changed from users/zhaoqi5/avoid-scheduling-and-attach-relax to main January 20, 2025 02:00
@zhaoqi5 zhaoqi5 merged commit 91682da into main Feb 10, 2025
8 checks passed
@zhaoqi5 zhaoqi5 deleted the users/zhaoqi5/pre-commit-tlsle-mergebaseoffset-tests branch February 10, 2025 08:40
@llvm-ci
Copy link
Collaborator

llvm-ci commented Feb 10, 2025

LLVM Buildbot has detected a new failure on builder openmp-s390x-linux running on systemz-1 while building llvm at step 6 "test-openmp".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/88/builds/7828

Here is the relevant piece of the build log for the reference
Step 6 (test-openmp) failure: 1200 seconds without output running [b'ninja', b'-j 4', b'check-openmp'], attempting to kill
...
PASS: ompd-test :: openmp_examples/example_3.c (439 of 449)
PASS: ompd-test :: openmp_examples/example_4.c (440 of 449)
PASS: ompd-test :: openmp_examples/example_5.c (441 of 449)
PASS: ompd-test :: openmp_examples/example_task.c (442 of 449)
UNSUPPORTED: ompd-test :: openmp_examples/ompd_bt.c (443 of 449)
PASS: ompd-test :: openmp_examples/fibonacci.c (444 of 449)
UNSUPPORTED: ompd-test :: openmp_examples/ompd_parallel.c (445 of 449)
PASS: ompd-test :: openmp_examples/parallel.c (446 of 449)
PASS: ompd-test :: openmp_examples/nested.c (447 of 449)
PASS: ompd-test :: openmp_examples/ompd_icvs.c (448 of 449)
command timed out: 1200 seconds without output running [b'ninja', b'-j 4', b'check-openmp'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=1315.839599

Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025
…122998)

Similar to tests in `merge-base-offset.ll`, except for tests of
blockaddress.

A later commit will optimize this.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants