Skip to content

Commit e260667

Browse files
author
chenqian
committed
[Pass] add RISCVESP32P4LoopVectorizeExtractor pass
1 parent 36736a6 commit e260667

10 files changed

+1307
-151
lines changed

llvm/lib/Target/RISCV/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen
4444
RISCVCustomLICM.cpp
4545
RISCVLoopUnrollAndRemainder.cpp
4646
RISCVEsp32P4MemIntrin.cpp
47+
RISCVESP32P4LoopVectorizeExtractor.cpp
4748
RISCVIndirectBranchTracking.cpp
4849
RISCVIntLoopUnrollAndRemainder.cpp
4950
RISCVDotprodSplitter.cpp
Lines changed: 355 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,355 @@
1+
//===-- RISCVESP32P4LoopVectorizeExtractor.cpp -Loop Vectorizer -----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements a pass that prepares loops for ESP32-P4 specific
10+
// vectorization by setting appropriate loop metadata and running vectorization
11+
// passes optimized for ESP32-P4 SIMD capabilities.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "RISCVESP32P4LoopVectorizeExtractor.h"
16+
#include "llvm/ADT/SmallVector.h"
17+
#include "llvm/ADT/Statistic.h"
18+
#include "llvm/Analysis/LoopInfo.h"
19+
#include "llvm/IR/Constants.h"
20+
#include "llvm/IR/DataLayout.h"
21+
#include "llvm/IR/Function.h"
22+
#include "llvm/IR/InstIterator.h"
23+
#include "llvm/IR/Instructions.h"
24+
#include "llvm/IR/LLVMContext.h"
25+
#include "llvm/IR/Metadata.h"
26+
#include "llvm/IR/Module.h"
27+
#include "llvm/IR/PassManager.h"
28+
#include "llvm/IR/Type.h"
29+
#include "llvm/Passes/PassBuilder.h"
30+
#include "llvm/Support/CommandLine.h"
31+
#include "llvm/Support/Debug.h"
32+
#include "llvm/Support/TypeSize.h"
33+
#include "llvm/Support/raw_ostream.h"
34+
#include "llvm/Transforms/IPO/LoopExtractor.h"
35+
#include "llvm/Transforms/Scalar/LoopPassManager.h"
36+
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
37+
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
38+
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
39+
40+
using namespace llvm;
41+
42+
#define DEBUG_TYPE "riscv-esp32p4-loop-vectorize-extractor"
43+
44+
// Constants for ESP32-P4 specific vectorization
45+
static constexpr unsigned ESP32P4_SIMD_BIT_WIDTH = 128;
46+
static constexpr unsigned DEFAULT_INTERLEAVE_COUNT = 1;
47+
static constexpr char MUSTPROGRESS_METADATA_NAME[] = "llvm.loop.mustprogress";
48+
static constexpr char TARGET_FEATURES_ATTR_NAME[] = "target-features";
49+
50+
// Vectorization metadata names
51+
static constexpr char VECTORIZE_SCALABLE_ENABLE[] =
52+
"llvm.loop.vectorize.scalable.enable";
53+
static constexpr char INTERLEAVE_COUNT[] = "llvm.loop.interleave.count";
54+
static constexpr char VECTORIZE_ENABLE[] = "llvm.loop.vectorize.enable";
55+
static constexpr char VECTORIZE_WIDTH[] = "llvm.loop.vectorize.width";
56+
57+
// Command line option to enable/disable RISCVESP32P4LoopVectorizeExtractor
58+
cl::opt<bool> llvm::EnableRISCVESP32P4LoopVectorizeExtractor(
59+
"riscv-esp32p4-loop-vectorize-extractor", cl::init(false),
60+
cl::desc("Enable RISC-V ESP32-P4 loop vectorization extractor for specific "
61+
"loops"));
62+
63+
STATISTIC(NumLoopsVectorized, "Number of loops prepared for vectorization");
64+
STATISTIC(NumFunctionsProcessed, "Number of functions processed");
65+
STATISTIC(NumModulesWithExtraction,
66+
"Number of modules requiring loop extraction");
67+
68+
/// Extract the element type from memory access instructions
69+
static Type *getElementTypeFromInstruction(const Instruction &I) {
70+
if (const auto *LI = dyn_cast<LoadInst>(&I)) {
71+
return LI->getType();
72+
}
73+
if (const auto *SI = dyn_cast<StoreInst>(&I)) {
74+
return SI->getValueOperand()->getType();
75+
}
76+
return nullptr;
77+
}
78+
79+
/// Get the minimum element bit width from loop body memory accesses
80+
static unsigned getLoopBodyElementBitWidth(Loop *L, const DataLayout &DL) {
81+
if (!L || L->getBlocks().empty())
82+
return 0;
83+
84+
TypeSize MinBitWidth = TypeSize::getFixed(UINT_MAX);
85+
86+
for (BasicBlock *BB : L->getBlocks()) {
87+
for (const Instruction &I : *BB) {
88+
Type *ElTy = getElementTypeFromInstruction(I);
89+
if (!ElTy)
90+
continue;
91+
92+
// Handle vector types by extracting element type
93+
if (ElTy->isVectorTy())
94+
ElTy = cast<VectorType>(ElTy)->getElementType();
95+
96+
// Only consider integer and floating-point types
97+
if (ElTy->isIntegerTy() || ElTy->isFloatingPointTy()) {
98+
MinBitWidth = std::min(MinBitWidth, DL.getTypeSizeInBits(ElTy));
99+
}
100+
}
101+
}
102+
103+
return (!MinBitWidth.isScalable() &&
104+
MinBitWidth.getKnownMinValue() != UINT_MAX)
105+
? MinBitWidth.getKnownMinValue()
106+
: 0;
107+
}
108+
109+
/// Check if a loop has the required mustprogress metadata
110+
static bool hasLoopMustProgressMetadata(const Loop *L) {
111+
if (!L)
112+
return false;
113+
114+
const MDNode *LoopID = L->getLoopID();
115+
if (!LoopID)
116+
return false;
117+
118+
for (unsigned I = 1; I < LoopID->getNumOperands(); ++I) {
119+
if (const auto *MD = dyn_cast<MDNode>(LoopID->getOperand(I))) {
120+
if (MD->getNumOperands() >= 1) {
121+
if (const auto *S = dyn_cast<MDString>(MD->getOperand(0))) {
122+
if (S->getString() == MUSTPROGRESS_METADATA_NAME) {
123+
return true;
124+
}
125+
}
126+
}
127+
}
128+
}
129+
return false;
130+
}
131+
132+
/// Determine if a loop is a candidate for vectorization
133+
static bool isLoopVectorizationCandidate(const Loop *L, const DataLayout &DL) {
134+
if (!L)
135+
return false;
136+
137+
// Only process innermost loops
138+
if (!L->isInnermost())
139+
return false;
140+
141+
// Check for required metadata
142+
if (!hasLoopMustProgressMetadata(L))
143+
return false;
144+
145+
// Verify element bit width compatibility
146+
unsigned ElementBitWidth =
147+
getLoopBodyElementBitWidth(const_cast<Loop *>(L), DL);
148+
if (ElementBitWidth == 0) {
149+
LLVM_DEBUG(dbgs() << "Loop has no valid element bit width\n");
150+
return false;
151+
}
152+
153+
// Check if SIMD width is compatible with element width
154+
if (ESP32P4_SIMD_BIT_WIDTH % ElementBitWidth != 0) {
155+
LLVM_DEBUG(dbgs() << "SIMD width " << ESP32P4_SIMD_BIT_WIDTH
156+
<< " not compatible with element width "
157+
<< ElementBitWidth << "\n");
158+
return false;
159+
}
160+
161+
return true;
162+
}
163+
164+
/// Create vectorization metadata for a loop
165+
static MDNode *createVectorizationMetadata(LLVMContext &Ctx,
166+
unsigned VectorWidth,
167+
unsigned InterleaveCount) {
168+
// Create individual metadata nodes
169+
MDNode *MustProgress =
170+
MDNode::get(Ctx, MDString::get(Ctx, MUSTPROGRESS_METADATA_NAME));
171+
172+
MDNode *NoScalable = MDNode::get(
173+
Ctx,
174+
{MDString::get(Ctx, VECTORIZE_SCALABLE_ENABLE),
175+
ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Ctx), 0))});
176+
177+
MDNode *Interleave =
178+
MDNode::get(Ctx, {MDString::get(Ctx, INTERLEAVE_COUNT),
179+
ConstantAsMetadata::get(ConstantInt::get(
180+
Type::getInt32Ty(Ctx), InterleaveCount))});
181+
182+
MDNode *VecEnable = MDNode::get(
183+
Ctx,
184+
{MDString::get(Ctx, VECTORIZE_ENABLE),
185+
ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Ctx), 1))});
186+
187+
MDNode *VecWidthMD =
188+
MDNode::get(Ctx, {MDString::get(Ctx, VECTORIZE_WIDTH),
189+
ConstantAsMetadata::get(ConstantInt::get(
190+
Type::getInt32Ty(Ctx), VectorWidth))});
191+
192+
// Assemble the complete metadata
193+
SmallVector<Metadata *, 6> MDs;
194+
MDs.push_back(nullptr); // Self-reference placeholder
195+
MDs.push_back(MustProgress);
196+
MDs.push_back(VecWidthMD);
197+
MDs.push_back(NoScalable);
198+
MDs.push_back(Interleave);
199+
MDs.push_back(VecEnable);
200+
201+
MDNode *NewLoopID = MDNode::get(Ctx, MDs);
202+
NewLoopID->replaceOperandWith(0, NewLoopID); // Set self-reference
203+
return NewLoopID;
204+
}
205+
206+
bool RISCVESP32P4LoopVectorizeExtractorPass::prepareLoopForVectorization(
207+
Function &F, FunctionAnalysisManager &AM, unsigned InterleaveCount) {
208+
209+
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
210+
const DataLayout &DL = F.getParent()->getDataLayout();
211+
212+
for (Loop *L : LI) {
213+
if (!isLoopVectorizationCandidate(L, DL))
214+
continue;
215+
216+
unsigned ElementBitWidth = getLoopBodyElementBitWidth(L, DL);
217+
// This should not happen as we already checked in
218+
// isLoopVectorizationCandidate
219+
assert(ElementBitWidth != 0 && "Element bit width should not be zero");
220+
221+
unsigned VectorWidth = ESP32P4_SIMD_BIT_WIDTH / ElementBitWidth;
222+
223+
LLVM_DEBUG(dbgs() << "Vectorizing loop in " << F.getName()
224+
<< " with element type width " << ElementBitWidth
225+
<< " and calculated vector factor " << VectorWidth
226+
<< "\n");
227+
228+
MDNode *NewLoopID = createVectorizationMetadata(F.getContext(), VectorWidth,
229+
InterleaveCount);
230+
L->setLoopID(NewLoopID);
231+
++NumLoopsVectorized;
232+
return true;
233+
}
234+
return false;
235+
}
236+
237+
bool RISCVESP32P4LoopVectorizeExtractorPass::hasProcessableLoops(
238+
Function &F, FunctionAnalysisManager &AM) {
239+
240+
// Early exit if function lacks target-features attribute
241+
if (!F.getFnAttribute(TARGET_FEATURES_ATTR_NAME).isValid())
242+
return false;
243+
244+
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
245+
const DataLayout &DL = F.getParent()->getDataLayout();
246+
247+
// Check if any loop is a vectorization candidate
248+
return llvm::any_of(
249+
LI, [&DL](const Loop *L) { return isLoopVectorizationCandidate(L, DL); });
250+
}
251+
252+
bool RISCVESP32P4LoopExtractorConditionalWrapper::hasLoopsNeedingExtraction(
253+
Module &M, ModuleAnalysisManager &AM) {
254+
255+
FunctionAnalysisManager &FAM =
256+
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
257+
258+
// Check if any non-declaration function has processable loops
259+
for (const Function &F : M) {
260+
if (!F.isDeclaration() &&
261+
RISCVESP32P4LoopVectorizeExtractorPass::hasProcessableLoops(
262+
const_cast<Function &>(F), FAM)) {
263+
LLVM_DEBUG(dbgs() << "Found function " << F.getName()
264+
<< " with processable loops needing extraction\n");
265+
return true;
266+
}
267+
}
268+
269+
return false;
270+
}
271+
272+
bool RISCVESP32P4LoopVectorizeExtractorPass::runVectorizationPass(
273+
Function &F, FunctionAnalysisManager &AM, unsigned InterleaveCount) {
274+
275+
if (!F.getFnAttribute(TARGET_FEATURES_ATTR_NAME).isValid()) {
276+
LLVM_DEBUG(
277+
dbgs()
278+
<< "Function " << F.getName()
279+
<< " lacks target-features attribute. Skipping vectorization.\n");
280+
return false;
281+
}
282+
283+
bool Changed = prepareLoopForVectorization(F, AM, InterleaveCount);
284+
if (!Changed)
285+
return false;
286+
287+
// Create fresh analysis managers for vectorization passes
288+
// This ensures we don't interfere with the calling pass's analysis state
289+
LoopAnalysisManager LAM;
290+
FunctionAnalysisManager FAM;
291+
CGSCCAnalysisManager CGAM;
292+
ModuleAnalysisManager MAM;
293+
PassBuilder PB;
294+
295+
// Register all required analyses
296+
PB.registerModuleAnalyses(MAM);
297+
PB.registerCGSCCAnalyses(CGAM);
298+
PB.registerFunctionAnalyses(FAM);
299+
PB.registerLoopAnalyses(LAM);
300+
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
301+
302+
// Configure vectorization options
303+
LoopVectorizeOptions Opts;
304+
Opts.VectorizeOnlyWhenForced = false;
305+
Opts.InterleaveOnlyWhenForced = false;
306+
307+
// Build and run the vectorization pipeline
308+
FunctionPassManager FPM;
309+
FPM.addPass(LoopVectorizePass(Opts));
310+
FPM.addPass(SLPVectorizerPass());
311+
FPM.addPass(createFunctionToLoopPassAdaptor(LoopStrengthReducePass()));
312+
313+
// Run the pipeline with the fresh analysis manager
314+
FPM.run(F, FAM);
315+
316+
return true;
317+
}
318+
319+
PreservedAnalyses
320+
RISCVESP32P4LoopVectorizeExtractorPass::run(Function &F,
321+
FunctionAnalysisManager &FAM) {
322+
if (!EnableRISCVESP32P4LoopVectorizeExtractor)
323+
return PreservedAnalyses::all();
324+
325+
++NumFunctionsProcessed;
326+
327+
bool Changed = runVectorizationPass(F, FAM, DEFAULT_INTERLEAVE_COUNT);
328+
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
329+
}
330+
331+
//===----------------------------------------------------------------------===//
332+
// RISCVESP32P4LoopExtractorConditionalWrapper Implementation
333+
//===----------------------------------------------------------------------===//
334+
335+
RISCVESP32P4LoopExtractorConditionalWrapper::
336+
RISCVESP32P4LoopExtractorConditionalWrapper(ModulePassManager &&PM)
337+
: PM(std::move(PM)) {}
338+
339+
PreservedAnalyses
340+
RISCVESP32P4LoopExtractorConditionalWrapper::run(Module &M,
341+
ModuleAnalysisManager &AM) {
342+
343+
if (!hasLoopsNeedingExtraction(M, AM)) {
344+
LLVM_DEBUG(dbgs() << "No loops needing extraction found in module "
345+
<< M.getName() << ", skipping LoopExtractor passes\n");
346+
return PreservedAnalyses::all();
347+
}
348+
349+
++NumModulesWithExtraction;
350+
351+
LLVM_DEBUG(dbgs() << "Running LoopExtractor passes for module " << M.getName()
352+
<< "\n");
353+
354+
return PM.run(M, AM);
355+
}

0 commit comments

Comments
 (0)