1+ // ===-- RISCVESP32P4LoopVectorizeExtractor.cpp -Loop Vectorizer -----------===//
2+ //
3+ // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+ // See https://llvm.org/LICENSE.txt for license information.
5+ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+ //
7+ // ===----------------------------------------------------------------------===//
8+ //
9+ // This file implements a pass that prepares loops for ESP32-P4 specific
10+ // vectorization by setting appropriate loop metadata and running vectorization
11+ // passes optimized for ESP32-P4 SIMD capabilities.
12+ //
13+ // ===----------------------------------------------------------------------===//
14+
15+ #include " RISCVESP32P4LoopVectorizeExtractor.h"
16+ #include " llvm/ADT/SmallVector.h"
17+ #include " llvm/ADT/Statistic.h"
18+ #include " llvm/Analysis/LoopInfo.h"
19+ #include " llvm/IR/Constants.h"
20+ #include " llvm/IR/DataLayout.h"
21+ #include " llvm/IR/Function.h"
22+ #include " llvm/IR/InstIterator.h"
23+ #include " llvm/IR/Instructions.h"
24+ #include " llvm/IR/LLVMContext.h"
25+ #include " llvm/IR/Metadata.h"
26+ #include " llvm/IR/Module.h"
27+ #include " llvm/IR/PassManager.h"
28+ #include " llvm/IR/Type.h"
29+ #include " llvm/Passes/PassBuilder.h"
30+ #include " llvm/Support/CommandLine.h"
31+ #include " llvm/Support/Debug.h"
32+ #include " llvm/Support/TypeSize.h"
33+ #include " llvm/Support/raw_ostream.h"
34+ #include " llvm/Transforms/IPO/LoopExtractor.h"
35+ #include " llvm/Transforms/Scalar/LoopPassManager.h"
36+ #include " llvm/Transforms/Scalar/LoopStrengthReduce.h"
37+ #include " llvm/Transforms/Vectorize/LoopVectorize.h"
38+ #include " llvm/Transforms/Vectorize/SLPVectorizer.h"
39+
40+ using namespace llvm ;
41+
42+ #define DEBUG_TYPE " riscv-esp32p4-loop-vectorize-extractor"
43+
44+ // Constants for ESP32-P4 specific vectorization
45+ static constexpr unsigned ESP32P4_SIMD_BIT_WIDTH = 128 ;
46+ static constexpr unsigned DEFAULT_INTERLEAVE_COUNT = 1 ;
47+ static constexpr char MUSTPROGRESS_METADATA_NAME[] = " llvm.loop.mustprogress" ;
48+ static constexpr char TARGET_FEATURES_ATTR_NAME[] = " target-features" ;
49+
50+ // Vectorization metadata names
51+ static constexpr char VECTORIZE_SCALABLE_ENABLE[] =
52+ " llvm.loop.vectorize.scalable.enable" ;
53+ static constexpr char INTERLEAVE_COUNT[] = " llvm.loop.interleave.count" ;
54+ static constexpr char VECTORIZE_ENABLE[] = " llvm.loop.vectorize.enable" ;
55+ static constexpr char VECTORIZE_WIDTH[] = " llvm.loop.vectorize.width" ;
56+
57+ // Command line option to enable/disable RISCVESP32P4LoopVectorizeExtractor
58+ cl::opt<bool > llvm::EnableRISCVESP32P4LoopVectorizeExtractor (
59+ " riscv-esp32p4-loop-vectorize-extractor" , cl::init(false ),
60+ cl::desc(" Enable RISC-V ESP32-P4 loop vectorization extractor for specific "
61+ " loops" ));
62+
63+ STATISTIC (NumLoopsVectorized, " Number of loops prepared for vectorization" );
64+ STATISTIC (NumFunctionsProcessed, " Number of functions processed" );
65+ STATISTIC (NumModulesWithExtraction,
66+ " Number of modules requiring loop extraction" );
67+
68+ // / Extract the element type from memory access instructions
69+ static Type *getElementTypeFromInstruction (const Instruction &I) {
70+ if (const auto *LI = dyn_cast<LoadInst>(&I)) {
71+ return LI->getType ();
72+ }
73+ if (const auto *SI = dyn_cast<StoreInst>(&I)) {
74+ return SI->getValueOperand ()->getType ();
75+ }
76+ return nullptr ;
77+ }
78+
79+ // / Get the minimum element bit width from loop body memory accesses
80+ static unsigned getLoopBodyElementBitWidth (Loop *L, const DataLayout &DL) {
81+ if (!L || L->getBlocks ().empty ())
82+ return 0 ;
83+
84+ TypeSize MinBitWidth = TypeSize::getFixed (UINT_MAX);
85+
86+ for (BasicBlock *BB : L->getBlocks ()) {
87+ for (const Instruction &I : *BB) {
88+ Type *ElTy = getElementTypeFromInstruction (I);
89+ if (!ElTy)
90+ continue ;
91+
92+ // Handle vector types by extracting element type
93+ if (ElTy->isVectorTy ())
94+ ElTy = cast<VectorType>(ElTy)->getElementType ();
95+
96+ // Only consider integer and floating-point types
97+ if (ElTy->isIntegerTy () || ElTy->isFloatingPointTy ()) {
98+ MinBitWidth = std::min (MinBitWidth, DL.getTypeSizeInBits (ElTy));
99+ }
100+ }
101+ }
102+
103+ return (!MinBitWidth.isScalable () &&
104+ MinBitWidth.getKnownMinValue () != UINT_MAX)
105+ ? MinBitWidth.getKnownMinValue ()
106+ : 0 ;
107+ }
108+
109+ // / Check if a loop has the required mustprogress metadata
110+ static bool hasLoopMustProgressMetadata (const Loop *L) {
111+ if (!L)
112+ return false ;
113+
114+ const MDNode *LoopID = L->getLoopID ();
115+ if (!LoopID)
116+ return false ;
117+
118+ for (unsigned I = 1 ; I < LoopID->getNumOperands (); ++I) {
119+ if (const auto *MD = dyn_cast<MDNode>(LoopID->getOperand (I))) {
120+ if (MD->getNumOperands () >= 1 ) {
121+ if (const auto *S = dyn_cast<MDString>(MD->getOperand (0 ))) {
122+ if (S->getString () == MUSTPROGRESS_METADATA_NAME) {
123+ return true ;
124+ }
125+ }
126+ }
127+ }
128+ }
129+ return false ;
130+ }
131+
132+ // / Determine if a loop is a candidate for vectorization
133+ static bool isLoopVectorizationCandidate (const Loop *L, const DataLayout &DL) {
134+ if (!L)
135+ return false ;
136+
137+ // Only process innermost loops
138+ if (!L->isInnermost ())
139+ return false ;
140+
141+ // Check for required metadata
142+ if (!hasLoopMustProgressMetadata (L))
143+ return false ;
144+
145+ // Verify element bit width compatibility
146+ unsigned ElementBitWidth =
147+ getLoopBodyElementBitWidth (const_cast <Loop *>(L), DL);
148+ if (ElementBitWidth == 0 ) {
149+ LLVM_DEBUG (dbgs () << " Loop has no valid element bit width\n " );
150+ return false ;
151+ }
152+
153+ // Check if SIMD width is compatible with element width
154+ if (ESP32P4_SIMD_BIT_WIDTH % ElementBitWidth != 0 ) {
155+ LLVM_DEBUG (dbgs () << " SIMD width " << ESP32P4_SIMD_BIT_WIDTH
156+ << " not compatible with element width "
157+ << ElementBitWidth << " \n " );
158+ return false ;
159+ }
160+
161+ return true ;
162+ }
163+
164+ // / Create vectorization metadata for a loop
165+ static MDNode *createVectorizationMetadata (LLVMContext &Ctx,
166+ unsigned VectorWidth,
167+ unsigned InterleaveCount) {
168+ // Create individual metadata nodes
169+ MDNode *MustProgress =
170+ MDNode::get (Ctx, MDString::get (Ctx, MUSTPROGRESS_METADATA_NAME));
171+
172+ MDNode *NoScalable = MDNode::get (
173+ Ctx,
174+ {MDString::get (Ctx, VECTORIZE_SCALABLE_ENABLE),
175+ ConstantAsMetadata::get (ConstantInt::get (Type::getInt1Ty (Ctx), 0 ))});
176+
177+ MDNode *Interleave =
178+ MDNode::get (Ctx, {MDString::get (Ctx, INTERLEAVE_COUNT),
179+ ConstantAsMetadata::get (ConstantInt::get (
180+ Type::getInt32Ty (Ctx), InterleaveCount))});
181+
182+ MDNode *VecEnable = MDNode::get (
183+ Ctx,
184+ {MDString::get (Ctx, VECTORIZE_ENABLE),
185+ ConstantAsMetadata::get (ConstantInt::get (Type::getInt1Ty (Ctx), 1 ))});
186+
187+ MDNode *VecWidthMD =
188+ MDNode::get (Ctx, {MDString::get (Ctx, VECTORIZE_WIDTH),
189+ ConstantAsMetadata::get (ConstantInt::get (
190+ Type::getInt32Ty (Ctx), VectorWidth))});
191+
192+ // Assemble the complete metadata
193+ SmallVector<Metadata *, 6 > MDs;
194+ MDs.push_back (nullptr ); // Self-reference placeholder
195+ MDs.push_back (MustProgress);
196+ MDs.push_back (VecWidthMD);
197+ MDs.push_back (NoScalable);
198+ MDs.push_back (Interleave);
199+ MDs.push_back (VecEnable);
200+
201+ MDNode *NewLoopID = MDNode::get (Ctx, MDs);
202+ NewLoopID->replaceOperandWith (0 , NewLoopID); // Set self-reference
203+ return NewLoopID;
204+ }
205+
206+ bool RISCVESP32P4LoopVectorizeExtractorPass::prepareLoopForVectorization (
207+ Function &F, FunctionAnalysisManager &AM, unsigned InterleaveCount) {
208+
209+ LoopInfo &LI = AM.getResult <LoopAnalysis>(F);
210+ const DataLayout &DL = F.getParent ()->getDataLayout ();
211+
212+ for (Loop *L : LI) {
213+ if (!isLoopVectorizationCandidate (L, DL))
214+ continue ;
215+
216+ unsigned ElementBitWidth = getLoopBodyElementBitWidth (L, DL);
217+ // This should not happen as we already checked in
218+ // isLoopVectorizationCandidate
219+ assert (ElementBitWidth != 0 && " Element bit width should not be zero" );
220+
221+ unsigned VectorWidth = ESP32P4_SIMD_BIT_WIDTH / ElementBitWidth;
222+
223+ LLVM_DEBUG (dbgs () << " Vectorizing loop in " << F.getName ()
224+ << " with element type width " << ElementBitWidth
225+ << " and calculated vector factor " << VectorWidth
226+ << " \n " );
227+
228+ MDNode *NewLoopID = createVectorizationMetadata (F.getContext (), VectorWidth,
229+ InterleaveCount);
230+ L->setLoopID (NewLoopID);
231+ ++NumLoopsVectorized;
232+ return true ;
233+ }
234+ return false ;
235+ }
236+
237+ bool RISCVESP32P4LoopVectorizeExtractorPass::hasProcessableLoops (
238+ Function &F, FunctionAnalysisManager &AM) {
239+
240+ // Early exit if function lacks target-features attribute
241+ if (!F.getFnAttribute (TARGET_FEATURES_ATTR_NAME).isValid ())
242+ return false ;
243+
244+ LoopInfo &LI = AM.getResult <LoopAnalysis>(F);
245+ const DataLayout &DL = F.getParent ()->getDataLayout ();
246+
247+ // Check if any loop is a vectorization candidate
248+ return llvm::any_of (
249+ LI, [&DL](const Loop *L) { return isLoopVectorizationCandidate (L, DL); });
250+ }
251+
252+ bool RISCVESP32P4LoopExtractorConditionalWrapper::hasLoopsNeedingExtraction (
253+ Module &M, ModuleAnalysisManager &AM) {
254+
255+ FunctionAnalysisManager &FAM =
256+ AM.getResult <FunctionAnalysisManagerModuleProxy>(M).getManager ();
257+
258+ // Check if any non-declaration function has processable loops
259+ for (const Function &F : M) {
260+ if (!F.isDeclaration () &&
261+ RISCVESP32P4LoopVectorizeExtractorPass::hasProcessableLoops (
262+ const_cast <Function &>(F), FAM)) {
263+ LLVM_DEBUG (dbgs () << " Found function " << F.getName ()
264+ << " with processable loops needing extraction\n " );
265+ return true ;
266+ }
267+ }
268+
269+ return false ;
270+ }
271+
272+ bool RISCVESP32P4LoopVectorizeExtractorPass::runVectorizationPass (
273+ Function &F, FunctionAnalysisManager &AM, unsigned InterleaveCount) {
274+
275+ if (!F.getFnAttribute (TARGET_FEATURES_ATTR_NAME).isValid ()) {
276+ LLVM_DEBUG (
277+ dbgs ()
278+ << " Function " << F.getName ()
279+ << " lacks target-features attribute. Skipping vectorization.\n " );
280+ return false ;
281+ }
282+
283+ bool Changed = prepareLoopForVectorization (F, AM, InterleaveCount);
284+ if (!Changed)
285+ return false ;
286+
287+ // Create fresh analysis managers for vectorization passes
288+ // This ensures we don't interfere with the calling pass's analysis state
289+ LoopAnalysisManager LAM;
290+ FunctionAnalysisManager FAM;
291+ CGSCCAnalysisManager CGAM;
292+ ModuleAnalysisManager MAM;
293+ PassBuilder PB;
294+
295+ // Register all required analyses
296+ PB.registerModuleAnalyses (MAM);
297+ PB.registerCGSCCAnalyses (CGAM);
298+ PB.registerFunctionAnalyses (FAM);
299+ PB.registerLoopAnalyses (LAM);
300+ PB.crossRegisterProxies (LAM, FAM, CGAM, MAM);
301+
302+ // Configure vectorization options
303+ LoopVectorizeOptions Opts;
304+ Opts.VectorizeOnlyWhenForced = false ;
305+ Opts.InterleaveOnlyWhenForced = false ;
306+
307+ // Build and run the vectorization pipeline
308+ FunctionPassManager FPM;
309+ FPM.addPass (LoopVectorizePass (Opts));
310+ FPM.addPass (SLPVectorizerPass ());
311+ FPM.addPass (createFunctionToLoopPassAdaptor (LoopStrengthReducePass ()));
312+
313+ // Run the pipeline with the fresh analysis manager
314+ FPM.run (F, FAM);
315+
316+ return true ;
317+ }
318+
319+ PreservedAnalyses
320+ RISCVESP32P4LoopVectorizeExtractorPass::run (Function &F,
321+ FunctionAnalysisManager &FAM) {
322+ if (!EnableRISCVESP32P4LoopVectorizeExtractor)
323+ return PreservedAnalyses::all ();
324+
325+ ++NumFunctionsProcessed;
326+
327+ bool Changed = runVectorizationPass (F, FAM, DEFAULT_INTERLEAVE_COUNT);
328+ return Changed ? PreservedAnalyses::none () : PreservedAnalyses::all ();
329+ }
330+
331+ // ===----------------------------------------------------------------------===//
332+ // RISCVESP32P4LoopExtractorConditionalWrapper Implementation
333+ // ===----------------------------------------------------------------------===//
334+
335+ RISCVESP32P4LoopExtractorConditionalWrapper::
336+ RISCVESP32P4LoopExtractorConditionalWrapper (ModulePassManager &&PM)
337+ : PM(std::move(PM)) {}
338+
339+ PreservedAnalyses
340+ RISCVESP32P4LoopExtractorConditionalWrapper::run (Module &M,
341+ ModuleAnalysisManager &AM) {
342+
343+ if (!hasLoopsNeedingExtraction (M, AM)) {
344+ LLVM_DEBUG (dbgs () << " No loops needing extraction found in module "
345+ << M.getName () << " , skipping LoopExtractor passes\n " );
346+ return PreservedAnalyses::all ();
347+ }
348+
349+ ++NumModulesWithExtraction;
350+
351+ LLVM_DEBUG (dbgs () << " Running LoopExtractor passes for module " << M.getName ()
352+ << " \n " );
353+
354+ return PM.run (M, AM);
355+ }
0 commit comments