Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,4 @@ pythonenv*
/clang/utils/analyzer/projects/*/RefScanBuildResults
# automodapi puts generated documentation files here.
/lldb/docs/python_api/
/install
68 changes: 68 additions & 0 deletions TTL_MLIR_Integration.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# TTL MLIR Integration

## Project Overview
This project aims to integrate TTL (Template Tiling Library) with MLIR to create an optimized pipeline from C code to TTL-optimized C code. The pipeline includes affine loop tiling and dialect conversions, with a focus on optimizing operations like sigmoid.

## Current Pipeline
```
C code with TTL DSL → MLIR → Optimized MLIR → EmitC → C code
```

## Technical Implementation

### Version Compatibility
- Using LLVM 20 for MLIR pipeline
- Polygeist (C → MLIR) is on LLVM 18
- Solution: Manually removing incompatible parts
- This is a manageable limitation for now

### Type System Integration
- Minor issue with unrealized conversion casts
- Can be fixed with a simple pass if needed
- Not a critical blocker

### TTL Integration Strategy
Two possible approaches:
1. Generate direct function calls to TTL's existing functions
2. Create a TTL dialect (if needed)
- Currently leaning towards function calls for simplicity
- Decision pending based on future requirements

## Next Steps

### 1. Frontend Definition
- Define Polygeist as the frontend
- Its output will feed into TTL optimizer passes (like tiling)
- Currently supporting minimal 2D loops and array access
- Will expand TTL DSL features in the frontend

### 2. Backend Generation
- Develop pipeline to generate TTL-specific code
- Focus on efficient memory operations and tiling

### 3. TTL DSL Development
- Currently minimal: 2D loops and array access
- Will expand based on requirements
- Starting with sigmoid as a test case

### 4. Immediate Focus
- Optimizing sigmoid function
- Using it as a test case for the complete pipeline
- Will use learnings to expand to other operations

## Technical Decisions
- Keeping things simple with function calls rather than new dialect
- Managing version compatibility manually for now
- Type conversion issues are minor and can be addressed if needed

## Current Limitations
1. Version mismatch between Polygeist and MLIR pipeline
2. Minimal TTL DSL features in frontend
3. Focus on sigmoid optimization only

## Future Work
1. Expand TTL DSL features
2. Add more optimization passes
3. Support more complex operations
4. Evaluate need for TTL dialect
5. Consider automating version compatibility fixes
12 changes: 12 additions & 0 deletions mlir/include/mlir/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ class GreedyRewriteConfig;
#define GEN_PASS_DECL_SYMBOLPRIVATIZE
#define GEN_PASS_DECL_TOPOLOGICALSORT
#define GEN_PASS_DECL_COMPOSITEFIXEDPOINTPASS
#define GEN_PASS_DECL_TTLOPS
#define GEN_PASS_DECL_TTLPIPELINE
#define GEN_PASS_DECL_TTLTOEMITC
#include "mlir/Transforms/Passes.h.inc"

/// Creates an instance of the Canonicalizer pass, configured with default
Expand All @@ -65,6 +68,15 @@ createCanonicalizerPass(const GreedyRewriteConfig &config,
ArrayRef<std::string> disabledPatterns = std::nullopt,
ArrayRef<std::string> enabledPatterns = std::nullopt);

/// Creates a TTL ops pass.
std::unique_ptr<Pass> createTTLOpsPass();

/// Creates a TTL pipeline pass that runs multiple passes.
std::unique_ptr<Pass> createTTLPipelinePass();

/// Creates a TTL to emit C pass.
std::unique_ptr<Pass> createTTLToEmitC();

/// Creates a pass to perform control-flow sinking.
std::unique_ptr<Pass> createControlFlowSinkPass();

Expand Down
26 changes: 26 additions & 0 deletions mlir/include/mlir/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,24 @@ def Canonicalizer : Pass<"canonicalize"> {
] # RewritePassUtils.options;
}

def TTLOps : Pass<"ttl-ops", "ModuleOp"> {
let summary = "Convert TTL operations to MLIR";
let description = [{
This pass converts TTL operations to their MLIR equivalents.
}];
let constructor = "mlir::createTTLOpsPass()";
let dependentDialects = ["func::FuncDialect"];
}

def TTLPipeline : Pass<"ttl-pipeline", "ModuleOp"> {
let summary = "Run a pipeline of TTL passes";
let description = [{
This pass runs a sequence of TTL-related passes in a specific order.
}];
let constructor = "mlir::createTTLPipelinePass()";
let dependentDialects = ["func::FuncDialect"];
}

def ControlFlowSink : Pass<"control-flow-sink"> {
let summary = "Sink operations into conditional blocks";
let description = [{
Expand Down Expand Up @@ -586,4 +604,12 @@ def CompositeFixedPointPass : Pass<"composite-fixed-point-pass"> {
];
}

def TTLToEmitC : Pass<"ttl-to-emitc", "func::FuncOp"> {
let summary = "Convert TTL operations to EmitC dialect";
let description = [{
This pass converts TTL operations to EmitC dialect for C code generation.
}];
let dependentDialects = ["mlir::emitc::EmitCDialect"];
}

#endif // MLIR_TRANSFORMS_PASSES
3 changes: 3 additions & 0 deletions mlir/lib/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
add_subdirectory(Utils)

add_mlir_library(MLIRTransforms
TTLOps.cpp
TTLPipeline.cpp
TTLToEmitC.cpp
Canonicalizer.cpp
CompositePass.cpp
ControlFlowSink.cpp
Expand Down
210 changes: 210 additions & 0 deletions mlir/lib/Transforms/TTLOps.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#include "mlir/Pass/Pass.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Builders.h"
#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/Utils.h"
#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
#include "mlir/Dialect/Affine/LoopUtils.h"
#include "llvm/Support/raw_ostream.h"

using namespace mlir;
using namespace mlir::affine;

namespace {

// Core data structures for analyzing loops and memory accesses
struct LoopInfo {
// Loop bounds and step
int64_t lowerBound;
int64_t upperBound;
int64_t step;

// Memory accesses in this loop
enum class AccessType {
Load,
Store
};

struct MemoryAccess {
Value memref; // The memref being accessed
AffineMap accessMap; // The affine map for the access
AccessType type; // Whether it's a load or store
};
SmallVector<MemoryAccess> accesses;
};

// Helper class to validate loop structures and memory accesses
class LoopValidator {
public:
// Check if a memory access is 2D
static bool is2DAccess(Operation *op) {
AffineMap map;
if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
map = loadOp.getAffineMap();
} else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
map = storeOp.getAffineMap();
} else {
assert(false && "Expected load or store operation");
}
return map.getNumResults() == 2;
}


// Validate loop band and collect information if valid
static std::optional<SmallVector<LoopInfo>> validateAndCollectInfo(ArrayRef<AffineForOp> loops) {
// Check if it's a 2D perfectly nested loop
if (loops.size() != 2 || !affine::isPerfectlyNested(loops)) {
return std::nullopt;
}

SmallVector<LoopInfo> loopInfos;

// Analyze each loop
for (const auto &loop : loops) {
LoopInfo info;

// Get loop bounds and check if they're compile-time constants
auto lowerMap = const_cast<AffineForOp &>(loop).getLowerBoundMap();
auto upperMap = const_cast<AffineForOp &>(loop).getUpperBoundMap();

if (!lowerMap.isConstant() || !upperMap.isConstant()) {
return std::nullopt;
}

info.lowerBound = lowerMap.getSingleConstantResult();
info.upperBound = upperMap.getSingleConstantResult();
info.step = const_cast<AffineForOp &>(loop).getStep().getSExtValue();

// Only collect memory accesses in the innermost loop
if (loop == loops.back()) {
bool all2D = true;
loop->walk([&](Operation *op) {
if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
if (!is2DAccess(op)) {
all2D = false;
return;
}
info.accesses.push_back({loadOp.getMemRef(), loadOp.getAffineMap(), LoopInfo::AccessType::Load});
} else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
if (!is2DAccess(op)) {
all2D = false;
return;
}
info.accesses.push_back({storeOp.getMemRef(), storeOp.getAffineMap(), LoopInfo::AccessType::Store});
}
});

// If not all accesses are 2D, return nullopt
if (!all2D) {
return std::nullopt;
}
}

loopInfos.push_back(info);
}

return loopInfos;
}
};

// Helper function to print loop information
static void printLoopInfo(const SmallVector<LoopInfo> &loopInfos, func::FuncOp funcOp) {
llvm::errs() << "\n=== Band Information ===\n";

// Print loop structure
llvm::errs() << "Loop Structure:\n";
for (size_t i = 0; i < loopInfos.size(); i++) {
const auto &info = loopInfos[i];
llvm::errs() << " Loop " << i << ": [" << info.lowerBound << ", "
<< info.upperBound << ") step " << info.step << "\n";
}

// Print only innermost loop's memory accesses
llvm::errs() << "\nMemory Accesses in Innermost Loop:\n";
const auto &innerLoop = loopInfos.back();
for (const auto &access : innerLoop.accesses) {
llvm::errs() << " " << (access.type == LoopInfo::AccessType::Load ? "Load" : "Store") << " from ";

// Print block argument information
if (auto blockArg = dyn_cast<BlockArgument>(access.memref)) {
llvm::errs() << "<block argument> of type '" << blockArg.getType()
<< "' at index: " << blockArg.getArgNumber()
<< " (arg" << blockArg.getArgNumber() << ")";
} else {
llvm::errs() << access.memref;
}
llvm::errs() << "\n";
llvm::errs() << " Access Map: " << access.accessMap << "\n";
}
llvm::errs() << "================================\n";
}

struct TTLOps : public PassWrapper<TTLOps, OperationPass<ModuleOp>> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLOps)

// Default constructor
TTLOps() = default;

// Copy constructor - needed for pass cloning
TTLOps(const TTLOps &other) : PassWrapper<TTLOps, OperationPass<ModuleOp>>(other) {
// Copy option values
localMemorySize = other.localMemorySize;
loadCost = other.loadCost;
storeCost = other.storeCost;
}

// Pass options
Option<unsigned> localMemorySize{
*this, "local-memory-size",
llvm::cl::desc("Size of local memory in KB (default: 32)"),
llvm::cl::init(32)};
Option<unsigned> loadCost{
*this, "load-cost",
llvm::cl::desc("Cost of a load operation (default: 1)"),
llvm::cl::init(1)};
Option<unsigned> storeCost{
*this, "store-cost",
llvm::cl::desc("Cost of a store operation (default: 1)"),
llvm::cl::init(1)};

StringRef getArgument() const override { return "ttl-ops"; }
StringRef getDescription() const override { return "TTL operations pass"; }

void runOnOperation() override {
ModuleOp module = getOperation();

// Ensure we only have one function in the module
auto funcOps = module.getOps<func::FuncOp>();
assert(std::distance(funcOps.begin(), funcOps.end()) == 1 &&
"Expected exactly one function in the module");

// Find perfect loop nests (bands) in each function
module->walk([&](func::FuncOp funcOp) {
std::vector<SmallVector<AffineForOp, 6>> bands;
mlir::affine::getTileableBands(funcOp, &bands);

// Analyze each band
for (const auto &band : bands) {
// Validate band and collect information
if (auto loopInfos = LoopValidator::validateAndCollectInfo(band)) {
printLoopInfo(*loopInfos, funcOp);
}
}
});
}
};

// Register the pass
void registerTTLOps() {
PassRegistration<TTLOps>();
}
} // end anonymous namespace

namespace mlir {
std::unique_ptr<Pass> createTTLOpsPass() {
return std::make_unique<TTLOps>();
}
} // end namespace mlir
Loading