Skip to content
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
; Test the generation of asm for the function:
; int foo(_Atomic int *cp, int *old, int c) {
; return atomic_compare_exchange_weak_explicit(cp, old, c, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
; }

; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix -mcpu=pwr8 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: split the run lines since they seem too long

; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix -mcpu=pwr8 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should also use -ppc-vsr-nums-as-vr
Can we auto generate the checks via utils/update_llc_test_checks.py? It would make it easier if things change. Otherwise we need to better document exactly what is the important part of the code gen that we want to focus on.

Copy link
Contributor Author

@diggerlin diggerlin May 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should also use -ppc-vsr-nums-as-vr

I do not think we need the option since there is no vsr related instructions in the test case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's fair


define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
entry:
%cp.addr = alloca ptr, align 4
%old.addr = alloca ptr, align 4
%c.addr = alloca i32, align 4
%.atomictmp = alloca i32, align 4
%cmpxchg.bool = alloca i8, align 1
store ptr %cp, ptr %cp.addr, align 4
store ptr %old, ptr %old.addr, align 4
store i32 %c, ptr %c.addr, align 4
%0 = load ptr, ptr %cp.addr, align 4
%1 = load ptr, ptr %old.addr, align 4
%2 = load i32, ptr %c.addr, align 4
store i32 %2, ptr %.atomictmp, align 4
%3 = load i32, ptr %1, align 4
%4 = load i32, ptr %.atomictmp, align 4
%5 = cmpxchg weak ptr %0, i32 %3, i32 %4 monotonic monotonic, align 4
%6 = extractvalue { i32, i1 } %5, 0
%7 = extractvalue { i32, i1 } %5, 1
br i1 %7, label %cmpxchg.continue, label %cmpxchg.store_expected

cmpxchg.store_expected: ; preds = %entry
store i32 %6, ptr %1, align 4
br label %cmpxchg.continue

cmpxchg.continue: ; preds = %cmpxchg.store_expected, %entry
%storedv = zext i1 %7 to i8
store i8 %storedv, ptr %cmpxchg.bool, align 1
%8 = load i8, ptr %cmpxchg.bool, align 1
%loadedv = trunc i8 %8 to i1
%conv = zext i1 %loadedv to i32
ret i32 %conv
}

; CHECK: .foo:
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: lwz r7, 0(r4)
; CHECK-NEXT: stw r3, -4(r1)
; CHECK-NEXT: stw r4, -8(r1)
; CHECK-NEXT: stw r5, -12(r1)
; CHECK-NEXT: stw r5, -16(r1)
; CHECK-NEXT: L..BB0_1: # %entry
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: lwarx r6, 0, r3
; CHECK-NEXT: cmpw cr1, r6, r7
; CHECK-NEXT: bne cr1, L..BB0_3
; CHECK-NEXT: # %bb.2: # %entry
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: stwcx. r5, 0, r3
; CHECK-NEXT: bne cr0, L..BB0_1
; CHECK-NEXT: L..BB0_3: # %entry
; CHECK-NEXT: cmplw r6, r7
; CHECK-NEXT: beq cr0, L..BB0_5
; CHECK-NEXT: # %bb.4: # %cmpxchg.store_expected
; CHECK-NEXT: stw r6, 0(r4)
; CHECK-NEXT: L..BB0_5: # %cmpxchg.continue
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: li r4, 1
; CHECK-NEXT: isel r3, r4, r3, 4*cr1+eq
; CHECK-NEXT: stb r3, -17(r1)
; CHECK-NEXT: blr

; CHECK64: .foo:
; CHECK64-NEXT: # %bb.0: # %entry
; CHECK64-NEXT: lwz r7, 0(r4)
; CHECK64-NEXT: std r3, -8(r1)
; CHECK64-NEXT: std r4, -16(r1)
; CHECK64-NEXT: stw r5, -20(r1)
; CHECK64-NEXT: stw r5, -24(r1)
; CHECK64-NEXT: L..BB0_1: # %entry
; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK64-NEXT: lwarx r6, 0, r3
; CHECK64-NEXT: cmpw cr1, r6, r7
; CHECK64-NEXT: bne cr1, L..BB0_3
; CHECK64-NEXT: # %bb.2: # %entry
; CHECK64-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK64-NEXT: stwcx. r5, 0, r3
; CHECK64-NEXT: bne cr0, L..BB0_1
; CHECK64-NEXT: L..BB0_3: # %entry
; CHECK64-NEXT: cmplw r6, r7
; CHECK64-NEXT: beq cr0, L..BB0_5
; CHECK64-NEXT: # %bb.4: # %cmpxchg.store_expected
; CHECK64-NEXT: stw r6, 0(r4)
; CHECK64-NEXT: L..BB0_5: # %cmpxchg.continue
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: li r4, 1
; CHECK64-NEXT: isel r3, r4, r3, 4*cr1+eq
; CHECK64-NEXT: li r4, 1
; CHECK64-NEXT: stb r3, -25(r1)
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: isel r3, r4, r3, 4*cr1+eq
; CHECK64-NEXT: blr