Skip to content

Commit 27e1d7f

Browse files
Martien de Jongmartien-de-jong
authored andcommitted
[AIE] Redirect some key logging to optimization remarks
1 parent 3b8df0d commit 27e1d7f

File tree

6 files changed

+228
-5
lines changed

6 files changed

+228
-5
lines changed

llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "Utils/AIELoopUtils.h"
2323
#include "llvm/ADT/PostOrderIterator.h"
2424
#include "llvm/CodeGen/MachineBasicBlock.h"
25+
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
2526
#include "llvm/CodeGen/MachineScheduler.h"
2627
#include "llvm/Support/ErrorHandling.h"
2728
#include <memory>
@@ -618,6 +619,15 @@ SchedulingStage InterBlockScheduling::updatePipelining(BlockState &BS) {
618619
return BS.FixPoint.Stage = SchedulingStage::Pipelining;
619620
}
620621

622+
auto *BB = BS.TheBlock;
623+
auto DbgLoc = BB->begin()->getDebugLoc();
624+
MachineOptimizationRemarkEmitter More(*BB->getParent(), nullptr);
625+
More.emit([&]() {
626+
return MachineOptimizationRemarkMissed("postpipeliner", "schedule", DbgLoc,
627+
BB)
628+
<< "No schedule found.";
629+
});
630+
621631
// Fall back to the loop schedule. Note that we can only have II != 0
622632
// after the loop schedule has stabilized.
623633
return BS.FixPoint.Stage = SchedulingStage::PipeliningFailed;

llvm/lib/Target/AIE/AIEMachineScheduler.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/ADT/iterator_range.h"
2222
#include "llvm/CodeGen/MachineBasicBlock.h"
2323
#include "llvm/CodeGen/MachineInstr.h"
24+
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
2425
#include "llvm/CodeGen/MachineScheduler.h"
2526
#include "llvm/CodeGen/ResourceScoreboard.h"
2627
#include "llvm/Support/Debug.h"
@@ -1527,7 +1528,9 @@ void AIEScheduleDAGMI::schedule() {
15271528
postProcessDAG();
15281529

15291530
auto &PostSWP = BS.getPostSWP();
1530-
if (PostSWP.schedule(*this, BS.FixPoint.II)) {
1531+
1532+
MachineOptimizationRemarkEmitter More(*getBB()->getParent(), nullptr);
1533+
if (PostSWP.schedule(*this, BS.FixPoint.II, More)) {
15311534
BS.setPipelined();
15321535
LLVM_DEBUG(PostSWP.dump());
15331536
}

llvm/lib/Target/AIE/AIEMachineScheduler.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
7-
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
7+
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
88
//
99
//===----------------------------------------------------------------------===//
1010
//
@@ -20,7 +20,6 @@
2020
#include "AIEPostPipeliner.h"
2121
#include "llvm/CodeGen/MachineBasicBlock.h"
2222
#include "llvm/CodeGen/MachineScheduler.h"
23-
#include <memory>
2423

2524
namespace llvm {
2625

llvm/lib/Target/AIE/AIEPostPipeliner.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "AIEPostPipeliner.h"
1515
#include "AIESlotCounts.h"
1616
#include "Utils/AIELoopUtils.h"
17+
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
18+
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
1719
#include "llvm/CodeGen/ScheduleDAG.h"
1820
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
1921
#include "llvm/Support/MathExtras.h"
@@ -867,11 +869,21 @@ bool PostPipeliner::tryHeuristics() {
867869
return false;
868870
}
869871

870-
bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval) {
872+
bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval,
873+
MachineOptimizationRemarkEmitter &More) {
871874
NTotalInstrs = TheDAG.SUnits.size();
872875
assert(NTotalInstrs % NInstr == 0);
873876
NCopies = NTotalInstrs / NInstr;
877+
878+
auto *BB = TheDAG.getBB();
879+
auto DbgLoc = BB->begin()->getDebugLoc();
880+
874881
if (NCopies == 1) {
882+
More.emit([&]() {
883+
return MachineOptimizationRemarkMissed("postpipeliner", "schedule",
884+
DbgLoc, BB)
885+
<< "Not feasible.";
886+
});
875887
LLVM_DEBUG(dbgs() << "PostPipeliner: Not feasible\n");
876888
return false;
877889
}
@@ -899,6 +911,11 @@ bool PostPipeliner::schedule(ScheduleDAGMI &TheDAG, int InitiationInterval) {
899911
return false;
900912
}
901913

914+
More.emit([&]() {
915+
return MachineOptimizationRemark("postpipeliner", "schedule", DbgLoc, BB)
916+
<< "Schedule found: NS=" << ore::NV("NS", NStages)
917+
<< " II=" << ore::NV("II", II);
918+
});
902919
LLVM_DEBUG(dbgs() << "PostPipeliner: Success\n");
903920
return true;
904921
}

llvm/lib/Target/AIE/AIEPostPipeliner.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
namespace llvm {
2525
class MachineInstr;
2626
class AIEHazardRecognizer;
27+
class MachineOptimizationRemarkEmitter;
2728
} // namespace llvm
2829

2930
namespace llvm::AIE {
@@ -244,7 +245,8 @@ class PostPipeliner {
244245

245246
// Schedule using the given InitiationInterval. Return true when successful.
246247
// In that case calls to the query methods below are legitimate
247-
bool schedule(ScheduleDAGMI &DAG, int InitiationInterval);
248+
bool schedule(ScheduleDAGMI &DAG, int InitiationInterval,
249+
MachineOptimizationRemarkEmitter &More);
248250

249251
// quick query for the stage count
250252
int getStageCount() { return NStages; }
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
2+
# See https://llvm.org/LICENSE.txt for license information.
3+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
#
5+
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
6+
7+
# RUN: llc --mtriple=aie2p -O2 --start-before=postmisched %s \
8+
# RUN: --aie-addrspace-none-is-safe=1 \
9+
# RUN: -pass-remarks-output=- -pass-remarks-filter=postpipeliner -o /dev/null | FileCheck %s
10+
11+
12+
--- |
13+
define dso_local void @gemm(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
14+
; CHECK:--- !Passed
15+
; CHECK-NEXT:Pass: postpipeliner
16+
; CHECK-NEXT:Name: schedule
17+
; CHECK-NEXT:Function: gemm
18+
; CHECK-NEXT:Args:
19+
; CHECK-NEXT: - String: 'Schedule found: NS='
20+
; CHECK-NEXT: - NS: '5'
21+
; CHECK-NEXT: - String: ' II='
22+
; CHECK-NEXT: - II: '7'
23+
; CHECK-NEXT:...
24+
entry:
25+
%cmp5 = icmp sgt i32 %n, 0
26+
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
27+
28+
for.body.preheader: ; preds = %entry
29+
call void @llvm.set.loop.iterations.i32(i32 %n)
30+
br label %for.body
31+
32+
for.body: ; preds = %for.body.preheader, %for.body
33+
%d.addr.07 = phi ptr addrspace(5) [ %incdec.ptr, %for.body ], [ %d, %for.body.preheader ]
34+
%s.addr.06 = phi ptr addrspace(6) [ %incdec.ptr1, %for.body ], [ %s, %for.body.preheader ]
35+
%0 = load i32, ptr addrspace(6) %s.addr.06, align 4
36+
%add = add nsw i32 %0, 1
37+
store i32 %add, ptr addrspace(5) %d.addr.07, align 4
38+
%incdec.ptr = getelementptr inbounds i32, ptr addrspace(5) %d.addr.07, i20 1
39+
%incdec.ptr1 = getelementptr inbounds i32, ptr addrspace(6) %s.addr.06, i20 1
40+
%1 = call i1 @llvm.loop.decrement.i32(i32 1)
41+
br i1 %1, label %for.body, label %for.cond.cleanup, !llvm.loop !0
42+
43+
for.cond.cleanup: ; preds = %for.body, %entry
44+
ret void
45+
46+
}
47+
48+
49+
define dso_local void @gemm_lowitercount(ptr addrspace(5) noalias nocapture writeonly %d, ptr addrspace(6) noalias nocapture readonly %s, i32 noundef %n) local_unnamed_addr {
50+
; CHECK:--- !Missed
51+
; CHECK-NEXT:Pass: postpipeliner
52+
; CHECK-NEXT:Name: schedule
53+
; CHECK-NEXT:Function: gemm_lowitercount
54+
; CHECK-NEXT:Args:
55+
; CHECK-NEXT: - String: No schedule found.
56+
; CHECK-NEXT:...
57+
entry:
58+
%cmp5 = icmp sgt i32 %n, 0
59+
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
60+
61+
for.body.preheader: ; preds = %entry
62+
call void @llvm.set.loop.iterations.i32(i32 %n)
63+
br label %for.body
64+
65+
for.body: ; preds = %for.body.preheader, %for.body
66+
%d.addr.07 = phi ptr addrspace(5) [ %incdec.ptr, %for.body ], [ %d, %for.body.preheader ]
67+
%s.addr.06 = phi ptr addrspace(6) [ %incdec.ptr1, %for.body ], [ %s, %for.body.preheader ]
68+
%0 = load i32, ptr addrspace(6) %s.addr.06, align 4
69+
%add = add nsw i32 %0, 1
70+
store i32 %add, ptr addrspace(5) %d.addr.07, align 4
71+
%incdec.ptr = getelementptr inbounds i32, ptr addrspace(5) %d.addr.07, i20 1
72+
%incdec.ptr1 = getelementptr inbounds i32, ptr addrspace(6) %s.addr.06, i20 1
73+
%1 = call i1 @llvm.loop.decrement.i32(i32 1)
74+
br i1 %1, label %for.body, label %for.cond.cleanup, !llvm.loop !3
75+
76+
for.cond.cleanup: ; preds = %for.body, %entry
77+
ret void
78+
79+
}
80+
81+
declare void @llvm.set.loop.iterations.i32(i32)
82+
declare i1 @llvm.loop.decrement.i32(i32)
83+
84+
!0 = distinct !{!0, !1, !2}
85+
!1 = !{!"llvm.loop.mustprogress"}
86+
!2 = !{!"llvm.loop.itercount.range", i64 10}
87+
!3 = distinct !{!3, !1, !4}
88+
!4 = !{!"llvm.loop.itercount.range", i64 2}
89+
90+
...
91+
---
92+
name: gemm
93+
alignment: 16
94+
tracksRegLiveness: true
95+
body: |
96+
bb.0.entry (align 16):
97+
successors: %bb.2
98+
liveins: $p0, $p1, $r0
99+
100+
$lc = ADD_NC_mv_add_ri $r0, 0
101+
$ls = MOVXM %bb.2
102+
$le = MOVXM <mcsymbol .L_LEnd0>
103+
104+
bb.2.for.body (align 16):
105+
successors: %bb.2, %bb.3
106+
liveins: $dc0, $dc1, $dc2, $dc3, $dc4, $dc5, $dj0, $dj1, $dj2, $dj3, $dj4, $dj5, $dj7, $dm0:0x000000001800000C, $dm1:0x000000001800000C, $dm2:0x000000001800000C, $dm3:0x000000001800000C, $dn0, $dn1, $dn2, $dn4, $dn5, $m0, $m1, $m2, $m3, $m4, $m5, $p0, $p1, $p2, $p4, $p6, $p7, $r0, $r1, $r2, $r3, $r4, $r8, $y5:0x0000000000000033, $d0_3d:0x0003C00000200E00, $d1_3d:0x0003C00000200E00, $dn3
107+
108+
$p5 = MOVS $p6
109+
$x4 = VLDB_dmx_ldb_x_idx_imm $p6, 64 :: (load (<16 x s32>))
110+
$x9, $p6, $dc0, $dc4 = VLDB_3D_dmx_ldb_x $p6, $d0_3d :: (load (<16 x s32>))
111+
$p3 = MOVS $p7
112+
$cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 64 :: (load (<32 x s16>))
113+
$cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf $p7, $d1_3d :: (load (<32 x s16>))
114+
$x8 = VSHUFFLE_vec_shuffle_x $x9, $x4, $r0
115+
$x9 = VSHUFFLE_vec_shuffle_x $x9, $x4, $r1
116+
$p5 = PADDB_pstm_nrm $p5, $m4
117+
$x6 = VLDB_dmx_ldb_x_idx_imm $p5, 0 :: (load (<16 x s32>))
118+
$x1 = VLDB_dmx_ldb_x_idx_imm $p5, 64 :: (load (<16 x s32>))
119+
$ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
120+
$dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y4, $y5, $r2, implicit-def $srfpflags, implicit $crfpmask
121+
$x0 = VSHUFFLE_vec_shuffle_x $x1, $x6, $r0
122+
$x1 = VSHUFFLE_vec_shuffle_x $x1, $x6, $r1
123+
$ex3 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
124+
$dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y0, $y5, $r2, implicit-def $srfpflags, implicit $crfpmask
125+
$p3 = PADDA_pstm_nrm $p3, $m5
126+
$ex5 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
127+
$cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p3, 0 :: (load (<32 x s16>))
128+
$cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p3, 64 :: (load (<32 x s16>))
129+
$ex7 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
130+
$dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm3, $ex2, $ex3, $r3, implicit-def $srfpflags, implicit $crfpmask
131+
$dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm2, $ex2, $ex5, $r3, implicit-def $srfpflags, implicit $crfpmask
132+
$dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm1, $ex7, $ex3, $r3, implicit-def $srfpflags, implicit $crfpmask
133+
$dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm0, $ex7, $ex5, $r3, implicit-def $srfpflags, implicit $crfpmask
134+
135+
PseudoLoopEnd <mcsymbol .L_LEnd2>, %bb.2
136+
137+
bb.3 (align 16):
138+
RET implicit $lr
139+
DelayedSchedBarrier
140+
141+
...
142+
---
143+
name: gemm_lowitercount
144+
alignment: 16
145+
tracksRegLiveness: true
146+
body: |
147+
bb.0.entry (align 16):
148+
successors: %bb.2
149+
liveins: $p0, $p1, $r0
150+
151+
$lc = ADD_NC_mv_add_ri $r0, 0
152+
$ls = MOVXM %bb.2
153+
$le = MOVXM <mcsymbol .L_LEnd1>
154+
155+
bb.2.for.body (align 16):
156+
successors: %bb.2, %bb.3
157+
liveins: $dc0, $dc1, $dc2, $dc3, $dc4, $dc5, $dj0, $dj1, $dj2, $dj3, $dj4, $dj5, $dj7, $dm0:0x000000001800000C, $dm1:0x000000001800000C, $dm2:0x000000001800000C, $dm3:0x000000001800000C, $dn0, $dn1, $dn2, $dn4, $dn5, $m0, $m1, $m2, $m3, $m4, $m5, $p0, $p1, $p2, $p4, $p6, $p7, $r0, $r1, $r2, $r3, $r4, $r8, $y5:0x0000000000000033, $d0_3d:0x0003C00000200E00, $d1_3d:0x0003C00000200E00, $dn3
158+
159+
$p5 = MOVS $p6
160+
$x4 = VLDB_dmx_ldb_x_idx_imm $p6, 64 :: (load (<16 x s32>))
161+
$x9, $p6, $dc0, $dc4 = VLDB_3D_dmx_ldb_x $p6, $d0_3d :: (load (<16 x s32>))
162+
$p3 = MOVS $p7
163+
$cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p7, 64 :: (load (<32 x s16>))
164+
$cml4, $p7, $dc1, $dc5 = VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf $p7, $d1_3d :: (load (<32 x s16>))
165+
$x8 = VSHUFFLE_vec_shuffle_x $x9, $x4, $r0
166+
$x9 = VSHUFFLE_vec_shuffle_x $x9, $x4, $r1
167+
$p5 = PADDB_pstm_nrm $p5, $m4
168+
$x6 = VLDB_dmx_ldb_x_idx_imm $p5, 0 :: (load (<16 x s32>))
169+
$x1 = VLDB_dmx_ldb_x_idx_imm $p5, 64 :: (load (<16 x s32>))
170+
$ex2 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
171+
$dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y4, $y5, $r2, implicit-def $srfpflags, implicit $crfpmask
172+
$x0 = VSHUFFLE_vec_shuffle_x $x1, $x6, $r0
173+
$x1 = VSHUFFLE_vec_shuffle_x $x1, $x6, $r1
174+
$ex3 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
175+
$dm4 = VMUL_f_vmul_bf_vmul_bf_core_Y_Y $y0, $y5, $r2, implicit-def $srfpflags, implicit $crfpmask
176+
$p3 = PADDA_pstm_nrm $p3, $m5
177+
$ex5 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
178+
$cml4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p3, 0 :: (load (<32 x s16>))
179+
$cmh4 = VLDA_CONV_fp32_bf16_dmx_lda_ups_bf_idx_imm $p3, 64 :: (load (<32 x s16>))
180+
$ex7 = VCONV_bfp16ebs8_fp32 $dm4, implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd
181+
$dm3 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm3, $ex2, $ex3, $r3, implicit-def $srfpflags, implicit $crfpmask
182+
$dm2 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm2, $ex2, $ex5, $r3, implicit-def $srfpflags, implicit $crfpmask
183+
$dm1 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm1, $ex7, $ex3, $r3, implicit-def $srfpflags, implicit $crfpmask
184+
$dm0 = VMAC_f_vmac_bfp_vmul_bfp_core_EX_EX $dm0, $ex7, $ex5, $r3, implicit-def $srfpflags, implicit $crfpmask
185+
186+
PseudoLoopEnd <mcsymbol .L_LEnd1>, %bb.2
187+
188+
bb.3 (align 16):
189+
RET implicit $lr
190+
DelayedSchedBarrier
191+
192+
...

0 commit comments

Comments
 (0)