diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt index 6b6f2b1ca2276..9d9012012e338 100644 --- a/clang-tools-extra/CMakeLists.txt +++ b/clang-tools-extra/CMakeLists.txt @@ -29,6 +29,9 @@ add_subdirectory(include-cleaner) add_subdirectory(pp-trace) add_subdirectory(tool-template) +# Add regular-mem-access tool +add_subdirectory(regular-mem-access) + option(CLANG_TOOLS_EXTRA_INCLUDE_DOCS "Generate build targets for the Clang Extra Tools docs." ${LLVM_INCLUDE_DOCS}) if( CLANG_TOOLS_EXTRA_INCLUDE_DOCS ) diff --git a/clang-tools-extra/regular-mem-access/CMakeLists.txt b/clang-tools-extra/regular-mem-access/CMakeLists.txt new file mode 100644 index 0000000000000..880f4eb5252f5 --- /dev/null +++ b/clang-tools-extra/regular-mem-access/CMakeLists.txt @@ -0,0 +1,21 @@ +# CMakeLists.txt for regular-mem-access tool +add_clang_executable(regular-mem-access + RegularMemAccess.cpp +) + +# Do NOT add test.c to the build target! It is a test input, not a source file for the tool. + +# Required libraries for codegen and IR analysis + +target_link_libraries(regular-mem-access + PRIVATE + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangTooling + clangCodeGen + LLVMCore +) + +install(TARGETS regular-mem-access RUNTIME DESTINATION bin) diff --git a/clang-tools-extra/regular-mem-access/RegularMemAccess.cpp b/clang-tools-extra/regular-mem-access/RegularMemAccess.cpp new file mode 100644 index 0000000000000..2ec3288b54810 --- /dev/null +++ b/clang-tools-extra/regular-mem-access/RegularMemAccess.cpp @@ -0,0 +1,171 @@ +//===--- RegularMemAccess.cpp - Regular Memory Access Analyzer -----------===// +// +// Detects functions with regular (sequential) memory access patterns. +// +// Usage: +// regular-mem-access -analyze-regular-memory-access +// regular-mem-access -analyze-regular-memory-access-llvm-ir +// +//===----------------------------------------------------------------------===// + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Tooling.h" +#include "clang/CodeGen/CodeGenAction.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/IR/Module.h" +#include + +using namespace clang; +using namespace clang::ast_matchers; +using namespace clang::tooling; +using namespace llvm; + +static cl::OptionCategory RegularMemAccessCategory("regular-mem-access options"); +static cl::opt AnalyzeRegularMemoryAccess( + "analyze-regular-memory-access", + cl::desc("Enable detection of regular memory access patterns."), + cl::cat(RegularMemAccessCategory)); + +static cl::opt AnalyzeRegularMemoryAccessLLVMIR( + "analyze-regular-memory-access-llvm-ir", + cl::desc("Enable detection of regular memory access patterns in LLVM IR."), + cl::cat(RegularMemAccessCategory)); + +namespace { +class RegularMemAccessCallback : public MatchFinder::MatchCallback { +public: + void run(const MatchFinder::MatchResult &Result) override { + const auto *Access = Result.Nodes.getNodeAs("arrayAccess"); + const auto *Loop = Result.Nodes.getNodeAs("forLoop"); + const auto *Func = Result.Nodes.getNodeAs("func"); + if (!Access || !Loop || !Func) return; + + // Check if the index is a DeclRefExpr to the loop variable, or a linear function of it + const Expr *Idx = Access->getIdx()->IgnoreParenImpCasts(); + const VarDecl *LoopVar = nullptr; + if (const auto *Init = Loop->getInit()) { + if (const auto *DS = dyn_cast(Init)) { + if (const auto *VD = dyn_cast(DS->getSingleDecl())) { + LoopVar = VD; + } + } + } + bool isRegular = false; + // Case 1: arr[i] + if (const auto *IdxRef = dyn_cast(Idx)) { + if (LoopVar && IdxRef->getDecl() == LoopVar) { + isRegular = true; + } + } + // Case 2: arr[i + c] or arr[i - c] + else if (const auto *BO = clang::dyn_cast(Idx)) { + if ((BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub)) { + const Expr *LHS = BO->getLHS()->IgnoreParenImpCasts(); + const Expr *RHS = BO->getRHS()->IgnoreParenImpCasts(); + if ((isa(LHS) && cast(LHS)->getDecl() == LoopVar && isa(RHS)) || + (isa(RHS) && cast(RHS)->getDecl() == LoopVar && isa(LHS))) { + isRegular = true; + } + } + } + // Case 3: arr[i * stride + c] or arr[stride * i + c] + else if (const auto *BO = clang::dyn_cast(Idx)) { + if (BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub) { + const Expr *LHS = BO->getLHS()->IgnoreParenImpCasts(); + const Expr *RHS = BO->getRHS()->IgnoreParenImpCasts(); + // Check for (i * stride) + c or c + (i * stride) + const clang::BinaryOperator *InnerBO = nullptr; + const Expr *ConstExpr = nullptr; + if ((InnerBO = clang::dyn_cast(LHS)) && (InnerBO->getOpcode() == BO_Mul) && isa(RHS)) + ConstExpr = RHS; + else if ((InnerBO = clang::dyn_cast(RHS)) && (InnerBO->getOpcode() == BO_Mul) && isa(LHS)) + ConstExpr = LHS; + if (InnerBO && ConstExpr) { + const Expr *MulLHS = InnerBO->getLHS()->IgnoreParenImpCasts(); + const Expr *MulRHS = InnerBO->getRHS()->IgnoreParenImpCasts(); + if ((isa(MulLHS) && cast(MulLHS)->getDecl() == LoopVar && isa(MulRHS)) || + (isa(MulRHS) && cast(MulRHS)->getDecl() == LoopVar && isa(MulLHS))) { + isRegular = true; + } + } + } + } + if (isRegular) { + FullSourceLoc FullLoc(Access->getBeginLoc(), *Result.SourceManager); + llvm::outs() << "Function '" << Func->getNameAsString() << "' has regular memory access patterns:\n"; + llvm::outs() << "- Sequential/linear access detected at line " << FullLoc.getSpellingLineNumber() << "\n\n"; + } + // Otherwise, do not emit diagnostic (irregular or unknown pattern) + } +}; + +// LLVM IR analysis +class RegularMemAccessIRAction : public clang::EmitLLVMOnlyAction { +public: + void EndSourceFileAction() override { + std::unique_ptr ModulePtr = takeModule(); + llvm::Module *Module = ModulePtr.get(); + if (!Module) return; + for (auto &F : *Module) { + if (F.isDeclaration()) continue; + for (auto &BB : F) { + for (auto &I : BB) { + if (auto *GEP = llvm::dyn_cast(&I)) { + if (GEP->getNumIndices() == 1) { + llvm::Value *Idx = GEP->getOperand(GEP->getNumOperands() - 1); + if (llvm::isa(Idx) || llvm::isa(Idx)) { + llvm::errs() << "[LLVM IR] Function '" << F.getName() << "' has regular memory access patterns (sequential/linear GEP)\n"; + } + } + } + } + } + } + } +}; +} // namespace + +int main(int argc, const char **argv) { + auto ExpectedParser = CommonOptionsParser::create(argc, argv, RegularMemAccessCategory); + if (!ExpectedParser) { + llvm::errs() << ExpectedParser.takeError(); + return 1; + } + CommonOptionsParser &OptionsParser = ExpectedParser.get(); + ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList()); + + if (AnalyzeRegularMemoryAccessLLVMIR) { + return Tool.run(newFrontendActionFactory().get()); + } + if (!AnalyzeRegularMemoryAccess) + return Tool.run(newFrontendActionFactory().get()); + + RegularMemAccessCallback Callback; + MatchFinder Finder; + // Match for-loops with array accesses indexed by the loop variable + Finder.addMatcher( + functionDecl( + isDefinition(), + hasDescendant( + forStmt( + hasBody( + hasDescendant( + arraySubscriptExpr( + hasIndex(ignoringParenImpCasts(declRefExpr().bind("idx"))) + ).bind("arrayAccess") + ) + ) + ).bind("forLoop") + ) + ).bind("func"), + &Callback); + + return Tool.run(newFrontendActionFactory(&Finder).get()); +} diff --git a/clang-tools-extra/regular-mem-access/test.c b/clang-tools-extra/regular-mem-access/test.c new file mode 100644 index 0000000000000..404664b7b9692 --- /dev/null +++ b/clang-tools-extra/regular-mem-access/test.c @@ -0,0 +1,29 @@ +#include + +// Regular memory access pattern +void compute(float* data, int N) { + for (int i = 0; i < N; ++i) { + data[i] = sin(data[i]) + cos(data[i]); + } +} + +// Irregular memory access pattern +void scatter(float* data, float* output, int* indices, int N) { + for (int i = 0; i < N; ++i) { + output[indices[i]] = data[i]; + } +} + +// Regular: linear stride +void stride_access(float* data, int N) { + for (int i = 0; i < N; ++i) { + data[2*i+1] = data[2*i] + 1.0f; + } +} + +// Irregular: data-dependent index +void gather(float* data, float* output, int* indices, int N) { + for (int i = 0; i < N; ++i) { + output[i] = data[indices[i]]; + } +}