1+ // RUN: mlir-opt %s -split-input-file -test-single-fold | FileCheck %s
2+
3+ // The tests in this file verify that fold() methods can handle complex
4+ // optimization scenarios without requiring multiple folding iterations.
5+ // This is important because:
6+ //
7+ // 1. OpBuilder::createOrFold() only calls fold() once, so operations must
8+ // be fully optimized in that single call
9+ // 2. Multiple rounds of folding would incur higher performance costs,
10+ // so it's more efficient to complete all optimizations in one pass
11+ //
12+ // These tests ensure that folding implementations are robust and complete,
13+ // avoiding situations where operations are left in intermediate states
14+ // that could be further optimized.
15+
16+ // CHECK-LABEL: fold_extract_in_single_pass
17+ // CHECK-SAME: (%{{.*}}: vector<4xf16>, %[[ARG1:.+]]: f16)
18+ func.func @fold_extract_in_single_pass (%arg0: vector <4 xf16 >, %arg1: f16 ) -> f16 {
19+ %0 = vector.insert %arg1 , %arg0 [1 ] : f16 into vector <4 xf16 >
20+ %c1 = arith.constant 1 : index
21+ // Verify that the fold is finished in a single pass even if the index is dynamic.
22+ %1 = vector.extract %0 [%c1 ] : f16 from vector <4 xf16 >
23+ // CHECK: return %[[ARG1]] : f16
24+ return %1 : f16
25+ }
26+
27+ // -----
28+
29+ // CHECK-LABEL: fold_insert_in_single_pass
30+ func.func @fold_insert_in_single_pass () -> vector <2 xf16 > {
31+ %cst = arith.constant dense <0.000000e+00 > : vector <2 xf16 >
32+ %c1 = arith.constant 1 : index
33+ %c2 = arith.constant 2.5 : f16
34+ // Verify that the fold is finished in a single pass even if the index is dynamic.
35+ // CHECK: arith.constant dense<[0.000000e+00, 2.500000e+00]> : vector<2xf16>
36+ %0 = vector.insert %c2 , %cst [%c1 ] : f16 into vector <2 xf16 >
37+ return %0 : vector <2 xf16 >
38+ }
0 commit comments