Skip to content

Commit cfae42a

Browse files
authored
Merge branch 'main' into vector_combine4
2 parents 104f2e2 + 917d815 commit cfae42a

File tree

20 files changed

+3476
-2012
lines changed

20 files changed

+3476
-2012
lines changed

lldb/source/Target/Process.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3256,6 +3256,7 @@ Status Process::ConnectRemote(llvm::StringRef remote_url) {
32563256
if (state == eStateStopped || state == eStateCrashed) {
32573257
// If we attached and actually have a process on the other end, then
32583258
// this ended up being the equivalent of an attach.
3259+
SetShouldDetach(true);
32593260
CompleteAttach();
32603261

32613262
// This delays passing the stopped event to listeners till
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
"""
2+
Test that ConnectRemote sets ShouldDetach flag correctly.
3+
4+
When connecting to a remote process that stops after connection,
5+
the process should be marked for detach (not kill) on destruction.
6+
"""
7+
8+
import lldb
9+
from lldbsuite.test.lldbtest import *
10+
from lldbsuite.test.decorators import *
11+
from lldbsuite.test.gdbclientutils import *
12+
from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase
13+
from lldbsuite.test import lldbutil
14+
15+
16+
class TestConnectRemoteDetach(GDBRemoteTestBase):
17+
"""Test that ConnectRemote properly sets ShouldDetach flag."""
18+
19+
class StoppedResponder(MockGDBServerResponder):
20+
"""A responder that returns a stopped process."""
21+
22+
def qfThreadInfo(self):
23+
return "m1"
24+
25+
def qsThreadInfo(self):
26+
return "l"
27+
28+
def qC(self):
29+
return "QC1"
30+
31+
def haltReason(self):
32+
# Return that we're stopped
33+
return "T05thread:1;"
34+
35+
def cont(self):
36+
# Stay stopped
37+
return "T05thread:1;"
38+
39+
def D(self):
40+
# Detach packet: this is what we want to verify gets called.
41+
return "OK"
42+
43+
def k(self):
44+
# Kill packet: this is what we want to verify doesn't get called.
45+
raise RuntimeError("should not receive k(ill) packet")
46+
47+
def test_connect_remote_sets_detach(self):
48+
"""Test that ConnectRemote to a stopped process sets ShouldDetach."""
49+
self.server.responder = self.StoppedResponder()
50+
51+
target = self.createTarget("a.yaml")
52+
process = self.connect(target)
53+
54+
# Wait for the process to be in stopped state after connecting.
55+
# When ConnectRemote connects to a remote process that is stopped,
56+
# it should call SetShouldDetach(true) before CompleteAttach().
57+
lldbutil.expect_state_changes(
58+
self, self.dbg.GetListener(), process, [lldb.eStateStopped]
59+
)
60+
61+
# Now destroy the process. Because ShouldDetach was set to true
62+
# during ConnectRemote, this should send a 'D' (detach) packet
63+
# rather than a 'k' (kill) packet when the process is destroyed.
64+
process.Destroy()
65+
66+
# Verify that the (D)etach packet was sent.
67+
self.assertPacketLogReceived(["D"])

llvm/docs/AMDGPUUsage.rst

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,51 @@ is conservatively correct for OpenCL.
11801180
other operations within the same address space.
11811181
======================= ===================================================
11821182

1183+
Target Types
1184+
------------
1185+
1186+
The AMDGPU backend implements some target extension types.
1187+
1188+
.. _amdgpu-types-named-barriers:
1189+
1190+
Named Barriers
1191+
~~~~~~~~~~~~~~
1192+
1193+
Named barriers are fixed function hardware barrier objects that are available
1194+
in gfx12.5+ in addition to the traditional default barriers.
1195+
1196+
In LLVM IR, named barriers are represented by global variables of type
1197+
``target("amdgcn.named.barrier", 0)`` in the LDS address space. Named barrier
1198+
global variables do not occupy actual LDS memory, but their lifetime and
1199+
allocation scope matches that of global variables in LDS. Programs in LLVM IR
1200+
refer to named barriers using pointers.
1201+
1202+
The following named barrier types are supported in global variables, defined
1203+
recursively:
1204+
1205+
* a single, standalone ``target("amdgcn.named.barrier", 0)``
1206+
* an array of supported types
1207+
* a struct containing a single element of supported type
1208+
1209+
.. code-block:: llvm
1210+
1211+
@bar = addrspace(3) global target("amdgcn.named.barrier", 0) undef
1212+
@foo = addrspace(3) global [2 x target("amdgcn.named.barrier", 0)] undef
1213+
@baz = addrspace(3) global { target("amdgcn.named.barrier", 0) } undef
1214+
1215+
...
1216+
1217+
%foo.i = getelementptr [2 x target("amdgcn.named.barrier", 0)], ptr addrspace(3) @foo, i32 0, i32 %i
1218+
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %foo.i, i32 0)
1219+
1220+
Named barrier types may not be used in ``alloca``.
1221+
1222+
Named barriers do not have an underlying byte representation.
1223+
It is undefined behavior to use a pointer to any part of a named barrier object
1224+
as the pointer operand of a regular memory access instruction or intrinsic.
1225+
Pointers to named barrier objects are intended to be used with dedicated
1226+
intrinsics. Reading from or writing to such pointers is undefined behavior.
1227+
11831228
LLVM IR Intrinsics
11841229
------------------
11851230

llvm/lib/Analysis/DependenceAnalysis.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,9 +407,10 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
407407
continue;
408408
Value *Ptr = getLoadStorePointerOperand(&Inst);
409409
const Loop *L = LI.getLoopFor(Inst.getParent());
410+
const Loop *OutermostLoop = L ? L->getOutermostLoop() : nullptr;
410411
const SCEV *PtrSCEV = SE.getSCEVAtScope(Ptr, L);
411412
const SCEV *AccessFn = SE.removePointerBase(PtrSCEV);
412-
SCEVMonotonicity Mon = Checker.checkMonotonicity(AccessFn, L);
413+
SCEVMonotonicity Mon = Checker.checkMonotonicity(AccessFn, OutermostLoop);
413414
OS.indent(2) << "Inst: " << Inst << "\n";
414415
OS.indent(4) << "Expr: " << *AccessFn << "\n";
415416
Mon.print(OS, 4);
@@ -945,6 +946,8 @@ SCEVMonotonicity SCEVMonotonicityChecker::invariantOrUnknown(const SCEV *Expr) {
945946
SCEVMonotonicity
946947
SCEVMonotonicityChecker::checkMonotonicity(const SCEV *Expr,
947948
const Loop *OutermostLoop) {
949+
assert((!OutermostLoop || OutermostLoop->isOutermost()) &&
950+
"OutermostLoop must be outermost");
948951
assert(Expr->getType()->isIntegerTy() && "Expr must be integer type");
949952
this->OutermostLoop = OutermostLoop;
950953
return visit(Expr);

llvm/test/Analysis/DependenceAnalysis/monotonicity-no-wrap-flags.ll

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,8 @@ exit:
298298
}
299299

300300
; The value of step reccurence is not invariant with respect to the outer most
301-
; loop (the i-loop).
301+
; loop (the i-loop). It is theoretically multivariate monotonic by definition,
302+
; but we cannot handle non-affine addrec for now.
302303
;
303304
; offset_i = 0;
304305
; for (int i = 0; i < 100; i++) {
@@ -312,7 +313,8 @@ define void @step_is_variant(ptr %a) {
312313
; CHECK-NEXT: Monotonicity check:
313314
; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1
314315
; CHECK-NEXT: Expr: {%offset.i,+,1}<nuw><nsw><%loop.j>
315-
; CHECK-NEXT: Monotonicity: MultivariateSignedMonotonic
316+
; CHECK-NEXT: Monotonicity: Unknown
317+
; CHECK-NEXT: Reason: %offset.i
316318
; CHECK-EMPTY:
317319
; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
318320
; CHECK-NEXT: da analyze - confused!
@@ -346,6 +348,56 @@ exit:
346348
ret void
347349
}
348350

351+
; The value of step reccurence is not invariant with respect to the outer most
352+
; loop (the i-loop). Actually, `offset_i` is not monotonic.
353+
;
354+
; offset_i = 0;
355+
; for (int i = 0; i < 100; i++) {
356+
; for (int j = 0; j < 100; j++)
357+
; a[offset_i + j] = 0;
358+
; offset_i += (i % 2 == 0) ? -1 : 3;
359+
; }
360+
;
361+
define void @step_is_variant2(ptr %a) {
362+
; CHECK-LABEL: 'step_is_variant2'
363+
; CHECK-NEXT: Monotonicity check:
364+
; CHECK-NEXT: Inst: store i8 0, ptr %idx, align 1
365+
; CHECK-NEXT: Expr: {%offset.i,+,1}<nsw><%loop.j>
366+
; CHECK-NEXT: Monotonicity: Unknown
367+
; CHECK-NEXT: Reason: %offset.i
368+
; CHECK-EMPTY:
369+
; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
370+
; CHECK-NEXT: da analyze - confused!
371+
;
372+
entry:
373+
br label %loop.i.header
374+
375+
loop.i.header:
376+
%i = phi i64 [ 0, %entry ], [ %i.inc, %loop.i.latch ]
377+
%offset.i = phi i64 [ 0, %entry ], [ %offset.i.next, %loop.i.latch ]
378+
%step.i.0 = phi i64 [ -1, %entry ], [ %step.i.1, %loop.i.latch ]
379+
%step.i.1 = phi i64 [ 3, %entry ], [ %step.i.0, %loop.i.latch ]
380+
br label %loop.j
381+
382+
loop.j:
383+
%j = phi i64 [ 0, %loop.i.header ], [ %j.inc, %loop.j ]
384+
%offset = add nsw i64 %offset.i, %j
385+
%idx = getelementptr inbounds i8, ptr %a, i64 %offset
386+
store i8 0, ptr %idx
387+
%j.inc = add nsw i64 %j, 1
388+
%exitcond.j = icmp eq i64 %j.inc, 100
389+
br i1 %exitcond.j, label %loop.i.latch, label %loop.j
390+
391+
loop.i.latch:
392+
%i.inc = add nsw i64 %i, 1
393+
%offset.i.next = add nsw i64 %offset.i, %step.i.0
394+
%exitcond.i = icmp eq i64 %i.inc, 100
395+
br i1 %exitcond.i, label %exit, label %loop.i.header
396+
397+
exit:
398+
ret void
399+
}
400+
349401
; The AddRec doesn't have nsw flag for the j-loop, since the store may not be
350402
; executed.
351403
;
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
4+
5+
; We've separated this file from call-args-inreg.ll since GlobalISel does not support the bfloat type.
6+
; Ideally, we should merge the two files once that support lands.
7+
8+
declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0
9+
declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0
10+
11+
define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
12+
; GFX9-LABEL: test_call_external_void_func_bf16_inreg:
13+
; GFX9: ; %bb.0:
14+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15+
; GFX9-NEXT: s_mov_b32 s17, s33
16+
; GFX9-NEXT: s_mov_b32 s33, s32
17+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
18+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
19+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
20+
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
21+
; GFX9-NEXT: s_addk_i32 s32, 0x400
22+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
23+
; GFX9-NEXT: s_getpc_b64 s[18:19]
24+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
25+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
26+
; GFX9-NEXT: s_mov_b32 s0, s16
27+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
28+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
29+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
30+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
31+
; GFX9-NEXT: s_mov_b32 s32, s33
32+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
33+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
34+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
35+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
36+
; GFX9-NEXT: s_mov_b32 s33, s4
37+
; GFX9-NEXT: s_waitcnt vmcnt(0)
38+
; GFX9-NEXT: s_setpc_b64 s[30:31]
39+
;
40+
; GFX11-LABEL: test_call_external_void_func_bf16_inreg:
41+
; GFX11: ; %bb.0:
42+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43+
; GFX11-NEXT: s_mov_b32 s1, s33
44+
; GFX11-NEXT: s_mov_b32 s33, s32
45+
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
46+
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
47+
; GFX11-NEXT: s_mov_b32 exec_lo, s2
48+
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
49+
; GFX11-NEXT: s_add_i32 s32, s32, 16
50+
; GFX11-NEXT: s_getpc_b64 s[2:3]
51+
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
52+
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
53+
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
54+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
55+
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
56+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
57+
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
58+
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
59+
; GFX11-NEXT: s_mov_b32 s32, s33
60+
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
61+
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
62+
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
63+
; GFX11-NEXT: s_mov_b32 exec_lo, s1
64+
; GFX11-NEXT: s_mov_b32 s33, s0
65+
; GFX11-NEXT: s_waitcnt vmcnt(0)
66+
; GFX11-NEXT: s_setpc_b64 s[30:31]
67+
call void @external_void_func_bf16_inreg(bfloat inreg %arg)
68+
ret void
69+
}
70+
71+
define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 {
72+
; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg:
73+
; GFX9: ; %bb.0:
74+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75+
; GFX9-NEXT: s_mov_b32 s17, s33
76+
; GFX9-NEXT: s_mov_b32 s33, s32
77+
; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1
78+
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
79+
; GFX9-NEXT: s_mov_b64 exec, s[18:19]
80+
; GFX9-NEXT: v_writelane_b32 v40, s17, 2
81+
; GFX9-NEXT: s_addk_i32 s32, 0x400
82+
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
83+
; GFX9-NEXT: s_getpc_b64 s[18:19]
84+
; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
85+
; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
86+
; GFX9-NEXT: s_mov_b32 s0, s16
87+
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
88+
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
89+
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
90+
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
91+
; GFX9-NEXT: s_mov_b32 s32, s33
92+
; GFX9-NEXT: v_readlane_b32 s4, v40, 2
93+
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
94+
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
95+
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
96+
; GFX9-NEXT: s_mov_b32 s33, s4
97+
; GFX9-NEXT: s_waitcnt vmcnt(0)
98+
; GFX9-NEXT: s_setpc_b64 s[30:31]
99+
;
100+
; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg:
101+
; GFX11: ; %bb.0:
102+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103+
; GFX11-NEXT: s_mov_b32 s1, s33
104+
; GFX11-NEXT: s_mov_b32 s33, s32
105+
; GFX11-NEXT: s_or_saveexec_b32 s2, -1
106+
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
107+
; GFX11-NEXT: s_mov_b32 exec_lo, s2
108+
; GFX11-NEXT: v_writelane_b32 v40, s1, 2
109+
; GFX11-NEXT: s_add_i32 s32, s32, 16
110+
; GFX11-NEXT: s_getpc_b64 s[2:3]
111+
; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
112+
; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
113+
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
114+
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
115+
; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
116+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
117+
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
118+
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
119+
; GFX11-NEXT: s_mov_b32 s32, s33
120+
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
121+
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
122+
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
123+
; GFX11-NEXT: s_mov_b32 exec_lo, s1
124+
; GFX11-NEXT: s_mov_b32 s33, s0
125+
; GFX11-NEXT: s_waitcnt vmcnt(0)
126+
; GFX11-NEXT: s_setpc_b64 s[30:31]
127+
call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
128+
ret void
129+
}
130+

0 commit comments

Comments
 (0)