llvm · amroshahbari27 · Jun 9, 2025
diff --git a/.gitignore b/.gitignore
@@ -73,3 +73,4 @@ pythonenv*
 /clang/utils/analyzer/projects/*/RefScanBuildResults
 # automodapi puts generated documentation files here.
 /lldb/docs/python_api/
+/install
diff --git a/TTL_MLIR_Integration.md b/TTL_MLIR_Integration.md
@@ -0,0 +1,68 @@
+# TTL MLIR Integration
+
+## Project Overview
+This project aims to integrate TTL (Template Tiling Library) with MLIR to create an optimized pipeline from C code to TTL-optimized C code. The pipeline includes affine loop tiling and dialect conversions, with a focus on optimizing operations like sigmoid.
+
+## Current Pipeline
+```
+C code with TTL DSL → MLIR → Optimized MLIR → EmitC → C code
+```
+
+## Technical Implementation
+
+### Version Compatibility
+- Using LLVM 20 for MLIR pipeline
+- Polygeist (C → MLIR) is on LLVM 18
+- Solution: Manually removing incompatible parts
+- This is a manageable limitation for now
+
+### Type System Integration
+- Minor issue with unrealized conversion casts
+- Can be fixed with a simple pass if needed
+- Not a critical blocker
+
+### TTL Integration Strategy
+Two possible approaches:
+1. Generate direct function calls to TTL's existing functions
+2. Create a TTL dialect (if needed)
+- Currently leaning towards function calls for simplicity
+- Decision pending based on future requirements
+
+## Next Steps
+
+### 1. Frontend Definition
+- Define Polygeist as the frontend
+- Its output will feed into TTL optimizer passes (like tiling)
+- Currently supporting minimal 2D loops and array access
+- Will expand TTL DSL features in the frontend
+
+### 2. Backend Generation
+- Develop pipeline to generate TTL-specific code
+- Focus on efficient memory operations and tiling
+
+### 3. TTL DSL Development
+- Currently minimal: 2D loops and array access
+- Will expand based on requirements
+- Starting with sigmoid as a test case
+
+### 4. Immediate Focus
+- Optimizing sigmoid function
+- Using it as a test case for the complete pipeline
+- Will use learnings to expand to other operations
+
+## Technical Decisions
+- Keeping things simple with function calls rather than new dialect
+- Managing version compatibility manually for now
+- Type conversion issues are minor and can be addressed if needed
+
+## Current Limitations
+1. Version mismatch between Polygeist and MLIR pipeline
+2. Minimal TTL DSL features in frontend
+3. Focus on sigmoid optimization only
+
+## Future Work
+1. Expand TTL DSL features
+2. Add more optimization passes
+3. Support more complex operations
+4. Evaluate need for TTL dialect
+5. Consider automating version compatibility fixes 
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
@@ -46,6 +46,9 @@ class GreedyRewriteConfig;
 #define GEN_PASS_DECL_SYMBOLPRIVATIZE
 #define GEN_PASS_DECL_TOPOLOGICALSORT
 #define GEN_PASS_DECL_COMPOSITEFIXEDPOINTPASS
+#define GEN_PASS_DECL_TTLOPS
+#define GEN_PASS_DECL_TTLPIPELINE
+#define GEN_PASS_DECL_TTLTOEMITC
 #include "mlir/Transforms/Passes.h.inc"
 
 /// Creates an instance of the Canonicalizer pass, configured with default
@@ -65,6 +68,15 @@ createCanonicalizerPass(const GreedyRewriteConfig &config,
                         ArrayRef<std::string> disabledPatterns = std::nullopt,
                         ArrayRef<std::string> enabledPatterns = std::nullopt);
 
+/// Creates a TTL ops pass.
+std::unique_ptr<Pass> createTTLOpsPass();
+
+/// Creates a TTL pipeline pass that runs multiple passes.
+std::unique_ptr<Pass> createTTLPipelinePass();
+
+/// Creates a TTL to emit C pass.
+std::unique_ptr<Pass> createTTLToEmitC();
+
 /// Creates a pass to perform control-flow sinking.
 std::unique_ptr<Pass> createControlFlowSinkPass();
 

diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
@@ -54,6 +54,24 @@ def Canonicalizer : Pass<"canonicalize"> {
   ] # RewritePassUtils.options;
 }
 
+def TTLOps : Pass<"ttl-ops", "ModuleOp"> {
+  let summary = "Convert TTL operations to MLIR";
+  let description = [{
+    This pass converts TTL operations to their MLIR equivalents.
+  }];
+  let constructor = "mlir::createTTLOpsPass()";
+  let dependentDialects = ["func::FuncDialect"];
+}
+
+def TTLPipeline : Pass<"ttl-pipeline", "ModuleOp"> {
+  let summary = "Run a pipeline of TTL passes";
+  let description = [{
+    This pass runs a sequence of TTL-related passes in a specific order.
+  }];
+  let constructor = "mlir::createTTLPipelinePass()";
+  let dependentDialects = ["func::FuncDialect"];
+}
+
 def ControlFlowSink : Pass<"control-flow-sink"> {
   let summary = "Sink operations into conditional blocks";
   let description = [{
@@ -586,4 +604,12 @@ def CompositeFixedPointPass : Pass<"composite-fixed-point-pass"> {
   ];
 }
 
+def TTLToEmitC : Pass<"ttl-to-emitc", "func::FuncOp"> {
+  let summary = "Convert TTL operations to EmitC dialect";
+  let description = [{
+    This pass converts TTL operations to EmitC dialect for C code generation.
+  }];
+  let dependentDialects = ["mlir::emitc::EmitCDialect"];
+}
+
 #endif // MLIR_TRANSFORMS_PASSES
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
@@ -1,6 +1,9 @@
 add_subdirectory(Utils)
 
 add_mlir_library(MLIRTransforms
+  TTLOps.cpp
+  TTLPipeline.cpp
+  TTLToEmitC.cpp
   Canonicalizer.cpp
   CompositePass.cpp
   ControlFlowSink.cpp

diff --git a/mlir/lib/Transforms/TTLOps.cpp b/mlir/lib/Transforms/TTLOps.cpp
@@ -0,0 +1,210 @@
+#include "mlir/Pass/Pass.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
+#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace mlir;
+using namespace mlir::affine;
+
+namespace {
+
+// Core data structures for analyzing loops and memory accesses
+struct LoopInfo {
+  // Loop bounds and step
+  int64_t lowerBound;
+  int64_t upperBound;
+  int64_t step;
+
+  // Memory accesses in this loop
+  enum class AccessType {
+    Load,
+    Store
+  };
+
+  struct MemoryAccess {
+    Value memref;           // The memref being accessed
+    AffineMap accessMap;    // The affine map for the access
+    AccessType type;        // Whether it's a load or store
+  };
+  SmallVector<MemoryAccess> accesses;
+};
+
+// Helper class to validate loop structures and memory accesses
+class LoopValidator {
+public:
+  // Check if a memory access is 2D
+  static bool is2DAccess(Operation *op) {
+    AffineMap map;
+    if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
+      map = loadOp.getAffineMap();
+    } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
+      map = storeOp.getAffineMap();
+    } else {
+      assert(false && "Expected load or store operation");
+    }
+    return map.getNumResults() == 2;
+  }
+
+
+  // Validate loop band and collect information if valid
+  static std::optional<SmallVector<LoopInfo>> validateAndCollectInfo(ArrayRef<AffineForOp> loops) {
+    // Check if it's a 2D perfectly nested loop
+    if (loops.size() != 2 || !affine::isPerfectlyNested(loops)) {
+      return std::nullopt;
+    }
+
+    SmallVector<LoopInfo> loopInfos;
+
+    // Analyze each loop
+    for (const auto &loop : loops) {
+      LoopInfo info;
+
+      // Get loop bounds and check if they're compile-time constants
+      auto lowerMap = const_cast<AffineForOp &>(loop).getLowerBoundMap();
+      auto upperMap = const_cast<AffineForOp &>(loop).getUpperBoundMap();
+
+      if (!lowerMap.isConstant() || !upperMap.isConstant()) {
+        return std::nullopt;
+      }
+
+      info.lowerBound = lowerMap.getSingleConstantResult();
+      info.upperBound = upperMap.getSingleConstantResult();
+      info.step = const_cast<AffineForOp &>(loop).getStep().getSExtValue();
+
+      // Only collect memory accesses in the innermost loop
+      if (loop == loops.back()) {
+        bool all2D = true;
+        loop->walk([&](Operation *op) {
+          if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
+            if (!is2DAccess(op)) {
+              all2D = false;
+              return;
+            }
+            info.accesses.push_back({loadOp.getMemRef(), loadOp.getAffineMap(), LoopInfo::AccessType::Load});
+          } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
+            if (!is2DAccess(op)) {
+              all2D = false;
+              return;
+            }
+            info.accesses.push_back({storeOp.getMemRef(), storeOp.getAffineMap(), LoopInfo::AccessType::Store});
+          }
+        });
+
+        // If not all accesses are 2D, return nullopt
+        if (!all2D) {
+          return std::nullopt;
+        }
+      }
+
+      loopInfos.push_back(info);
+    }
+
+    return loopInfos;
+  }
+};
+
+// Helper function to print loop information
+static void printLoopInfo(const SmallVector<LoopInfo> &loopInfos, func::FuncOp funcOp) {
+  llvm::errs() << "\n=== Band Information ===\n";
+
+  // Print loop structure
+  llvm::errs() << "Loop Structure:\n";
+  for (size_t i = 0; i < loopInfos.size(); i++) {
+    const auto &info = loopInfos[i];
+    llvm::errs() << "  Loop " << i << ": [" << info.lowerBound << ", " 
+                 << info.upperBound << ") step " << info.step << "\n";
+  }
+
+  // Print only innermost loop's memory accesses
+  llvm::errs() << "\nMemory Accesses in Innermost Loop:\n";
+  const auto &innerLoop = loopInfos.back();
+  for (const auto &access : innerLoop.accesses) {
+    llvm::errs() << "  " << (access.type == LoopInfo::AccessType::Load ? "Load" : "Store") << " from ";
+
+    // Print block argument information
+    if (auto blockArg = dyn_cast<BlockArgument>(access.memref)) {
+      llvm::errs() << "<block argument> of type '" << blockArg.getType() 
+                   << "' at index: " << blockArg.getArgNumber()
+                   << " (arg" << blockArg.getArgNumber() << ")";
+    } else {
+      llvm::errs() << access.memref;
+    }
+    llvm::errs() << "\n";
+    llvm::errs() << "    Access Map: " << access.accessMap << "\n";
+  }
+  llvm::errs() << "================================\n";
+}
+
+struct TTLOps : public PassWrapper<TTLOps, OperationPass<ModuleOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TTLOps)
+
+  // Default constructor
+  TTLOps() = default;
+
+  // Copy constructor - needed for pass cloning
+  TTLOps(const TTLOps &other) : PassWrapper<TTLOps, OperationPass<ModuleOp>>(other) {
+    // Copy option values
+    localMemorySize = other.localMemorySize;
+    loadCost = other.loadCost;
+    storeCost = other.storeCost;
+  }
+
+  // Pass options
+  Option<unsigned> localMemorySize{
+      *this, "local-memory-size",
+      llvm::cl::desc("Size of local memory in KB (default: 32)"),
+      llvm::cl::init(32)};
+  Option<unsigned> loadCost{
+      *this, "load-cost",
+      llvm::cl::desc("Cost of a load operation (default: 1)"),
+      llvm::cl::init(1)};
+  Option<unsigned> storeCost{
+      *this, "store-cost",
+      llvm::cl::desc("Cost of a store operation (default: 1)"),
+      llvm::cl::init(1)};
+
+  StringRef getArgument() const override { return "ttl-ops"; }
+  StringRef getDescription() const override { return "TTL operations pass"; }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation();
+
+    // Ensure we only have one function in the module
+    auto funcOps = module.getOps<func::FuncOp>();
+    assert(std::distance(funcOps.begin(), funcOps.end()) == 1 && 
+           "Expected exactly one function in the module");
+
+    // Find perfect loop nests (bands) in each function
+    module->walk([&](func::FuncOp funcOp) {
+      std::vector<SmallVector<AffineForOp, 6>> bands;
+      mlir::affine::getTileableBands(funcOp, &bands);
+
+      // Analyze each band
+      for (const auto &band : bands) {
+        // Validate band and collect information
+        if (auto loopInfos = LoopValidator::validateAndCollectInfo(band)) {
+          printLoopInfo(*loopInfos, funcOp);
+        }
+      }
+    });
+  }
+};
+
+// Register the pass
+void registerTTLOps() {
+  PassRegistration<TTLOps>();
+}
+} // end anonymous namespace
+
+namespace mlir {
+std::unique_ptr<Pass> createTTLOpsPass() {
+  return std::make_unique<TTLOps>();
+}
+} // end namespace mlir