Skip to content

Commit 98689ff

Browse files
mshelegoigcbot
authored andcommitted
Prevent folding of loaded constants
When vector constants are loaded using wrregions only (without genx.constant intrinsics) they are immediately folded back. This prevents some optimizations like moving constants outside of loops
1 parent fd750c4 commit 98689ff

File tree

2 files changed

+30
-15
lines changed

2 files changed

+30
-15
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXConstants.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,9 +1749,11 @@ Instruction *ConstantLoader::loadBig(Instruction *InsertBefore) {
17491749
// Load this subvector constant if necessary, and insert into the overall
17501750
// value with wrregion.
17511751
Constant *SubC = getConstantSubvector(C, Idx, Size);
1752-
Value *SubV = SubC;
17531752
ConstantLoader SubLoader(SubC, Subtarget, DL);
1754-
if (!SubLoader.isSimple())
1753+
Instruction *SubV = nullptr;
1754+
if (SubLoader.isSimple())
1755+
SubV = SubLoader.load(InsertBefore);
1756+
else
17551757
SubV = SubLoader.loadNonSimple(InsertBefore);
17561758
Region R(C, &DL);
17571759
R.getSubregion(Idx, Size);

IGC/VectorCompiler/test/PostLegalization/constant_load_return.ll

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,39 @@ target datalayout = "e-p:64:64-i64:64-n8:16:32"
1515

1616
define <3 x i64> @legalize_return_integer() {
1717
; CHECK-LABEL: @legalize_return_integer(
18-
; CHECK-NEXT: [[SPLIT1:%.+]] = call <3 x i64> @llvm.genx.wrregioni.v3i64.v2i64.i16.i1(<3 x i64> undef, <2 x i64> zeroinitializer, i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
19-
; CHECK-NEXT: [[SPLIT2:%.+]] = call <3 x i64> @llvm.genx.wrregioni.v3i64.v1i64.i16.i1(<3 x i64> [[SPLIT1:%.+]], <1 x i64> zeroinitializer, i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
18+
; CHECK-NEXT: [[CONST1:%.+]] = call <1 x i64> @llvm.genx.constanti.v1i64(<1 x i64> zeroinitializer)
19+
; CHECK-NEXT: [[SPLAT1:%.+]] = call <2 x i64> @llvm.genx.rdregioni.v2i64.v1i64.i16(<1 x i64> [[CONST1]], i32 0, i32 2, i32 0, i16 0, i32 undef)
20+
; CHECK-NEXT: [[SPLIT1:%.+]] = call <3 x i64> @llvm.genx.wrregioni.v3i64.v2i64.i16.i1(<3 x i64> undef, <2 x i64> [[SPLAT1]], i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
21+
; CHECK-NEXT: [[CONST2:%.+]] = call <1 x i64> @llvm.genx.constanti.v1i64(<1 x i64> zeroinitializer)
22+
; CHECK-NEXT: [[SPLIT2:%.+]] = call <3 x i64> @llvm.genx.wrregioni.v3i64.v1i64.i16.i1(<3 x i64> [[SPLIT1:%.+]], <1 x i64> [[CONST2]], i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
2023
; CHECK-NEXT: ret <3 x i64> [[SPLIT2:%.+]]
2124
ret <3 x i64> zeroinitializer
2225
}
2326

2427
define <3 x double> @legalize_return_double() {
2528
; CHECK-LABEL: @legalize_return_double(
26-
; CHECK-NEXT: [[SPLIT1:%.+]] = call <3 x double> @llvm.genx.wrregionf.v3f64.v2f64.i16.i1(<3 x double> undef, <2 x double> zeroinitializer, i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
27-
; CHECK-NEXT: [[SPLIT2:%.+]] = call <3 x double> @llvm.genx.wrregionf.v3f64.v1f64.i16.i1(<3 x double> [[SPLIT1]], <1 x double> zeroinitializer, i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
29+
; CHECK-NEXT: [[CONST1:%.+]] = call <1 x double> @llvm.genx.constantf.v1f64(<1 x double> zeroinitializer)
30+
; CHECK-NEXT: [[SPLAT1:%.+]] = call <2 x double> @llvm.genx.rdregionf.v2f64.v1f64.i16(<1 x double> [[CONST1]], i32 0, i32 2, i32 0, i16 0, i32 undef)
31+
; CHECK-NEXT: [[SPLIT1:%.+]] = call <3 x double> @llvm.genx.wrregionf.v3f64.v2f64.i16.i1(<3 x double> undef, <2 x double> [[SPLAT1]], i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
32+
; CHECK-NEXT: [[CONST2:%.+]] = call <1 x double> @llvm.genx.constantf.v1f64(<1 x double> zeroinitializer)
33+
; CHECK-NEXT: [[SPLIT2:%.+]] = call <3 x double> @llvm.genx.wrregionf.v3f64.v1f64.i16.i1(<3 x double> [[SPLIT1]], <1 x double> [[CONST2]], i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
2834
; CHECK-NEXT: ret <3 x double> [[SPLIT2]]
2935
ret <3 x double> zeroinitializer
3036
}
3137

3238
define <3 x i8*> @legalize_return_nullptr_vec() {
3339
; CHECK-LABEL: @legalize_return_nullptr_vec(
34-
; CHECK-TYPED-PTRS-NEXT: [[SPLIT1:%.+]] = call <3 x i8*> @llvm.genx.wrregioni.v3p0i8.v2p0i8.i16.i1(<3 x i8*> undef, <2 x i8*> zeroinitializer, i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
35-
; CHECK-TYPED-PTRS-NEXT: [[SPLIT2:%.+]] = call <3 x i8*> @llvm.genx.wrregioni.v3p0i8.v1p0i8.i16.i1(<3 x i8*> [[SPLIT1]], <1 x i8*> zeroinitializer, i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
40+
; CHECK-TYPED-PTRS-NEXT: [[CONST1:%.+]] = call <1 x i8*> @llvm.genx.constanti.v1p0i8(<1 x i8*> zeroinitializer)
41+
; CHECK-TYPED-PTRS-NEXT: [[SPLAT1:%.+]] = call <2 x i8*> @llvm.genx.rdregioni.v2p0i8.v1p0i8.i16(<1 x i8*> [[CONST1]], i32 0, i32 2, i32 0, i16 0, i32 undef)
42+
; CHECK-TYPED-PTRS-NEXT: [[SPLIT1:%.+]] = call <3 x i8*> @llvm.genx.wrregioni.v3p0i8.v2p0i8.i16.i1(<3 x i8*> undef, <2 x i8*> [[SPLAT1]], i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
43+
; CHECK-TYPED-PTRS-NEXT: [[CONST2:%.+]] = call <1 x i8*> @llvm.genx.constanti.v1p0i8(<1 x i8*> zeroinitializer)
44+
; CHECK-TYPED-PTRS-NEXT: [[SPLIT2:%.+]] = call <3 x i8*> @llvm.genx.wrregioni.v3p0i8.v1p0i8.i16.i1(<3 x i8*> [[SPLIT1]], <1 x i8*> [[CONST2]], i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
3645
; CHECK-TYPED-PTRS-NEXT: ret <3 x i8*> [[SPLIT2]]
37-
; CHECK-OPAQUE-PTRS-NEXT: [[SPLIT1:%.+]] = call <3 x ptr> @llvm.genx.wrregioni.v3p0.v2p0.i16.i1(<3 x ptr> undef, <2 x ptr> zeroinitializer, i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
38-
; CHECK-OPAQUE-PTRS-NEXT: [[SPLIT2:%.+]] = call <3 x ptr> @llvm.genx.wrregioni.v3p0.v1p0.i16.i1(<3 x ptr> [[SPLIT1]], <1 x ptr> zeroinitializer, i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
46+
; CHECK-OPAQUE-PTRS-NEXT: [[CONST1:%.+]] = call <1 x ptr> @llvm.genx.constanti.v1p0(<1 x ptr> zeroinitializer)
47+
; CHECK-OPAQUE-PTRS-NEXT: [[SPLAT1:%.+]] = call <2 x ptr> @llvm.genx.rdregioni.v2p0.v1p0.i16(<1 x ptr> [[CONST1]], i32 0, i32 2, i32 0, i16 0, i32 undef)
48+
; CHECK-OPAQUE-PTRS-NEXT: [[SPLIT1:%.+]] = call <3 x ptr> @llvm.genx.wrregioni.v3p0.v2p0.i16.i1(<3 x ptr> undef, <2 x ptr> [[SPLAT1]], i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
49+
; CHECK-OPAQUE-PTRS-NEXT: [[CONST2:%.+]] = call <1 x ptr> @llvm.genx.constanti.v1p0(<1 x ptr> zeroinitializer)
50+
; CHECK-OPAQUE-PTRS-NEXT: [[SPLIT2:%.+]] = call <3 x ptr> @llvm.genx.wrregioni.v3p0.v1p0.i16.i1(<3 x ptr> [[SPLIT1]], <1 x ptr> [[CONST2]], i32 1, i32 1, i32 1, i16 16, i32 undef, i1 true)
3951
; CHECK-OPAQUE-PTRS-NEXT: ret <3 x ptr> [[SPLIT2]]
4052
ret <3 x i8*> zeroinitializer
4153
}
@@ -80,12 +92,13 @@ define { <16 x i32> } @const_big_undef_return() {
8092
}
8193

8294
define { <3 x i32> } @legalize_struct_const_return() {
83-
; COM: ConstantLoader properly loads <3 x i32> vector, however PostLegalization
84-
; COM: also runs simplifyRegionInsts, thats folds the sequence of wrregioni O^o
85-
; COM: Though, the goal is archived, since ret does not have a constant as
86-
; COM: it's operand
8795
; CHECK-LABEL: @legalize_struct_const_return
88-
; CHECK-NEXT: [[STRUCT:%.+]] = insertvalue { <3 x i32> } undef, <3 x i32> <i32 1, i32 1, i32 1>, 0
96+
; CHECK-NEXT: [[CONST1:%.+]] = call <1 x i32> @llvm.genx.constanti.v1i32(<1 x i32> <i32 1>)
97+
; CHECK-NEXT: [[SPLAT1:%.+]] = call <2 x i32> @llvm.genx.rdregioni.v2i32.v1i32.i16(<1 x i32> [[CONST1]], i32 0, i32 2, i32 0, i16 0, i32 undef)
98+
; CHECK-NEXT: [[SPLIT1:%.+]] = call <3 x i32> @llvm.genx.wrregioni.v3i32.v2i32.i16.i1(<3 x i32> undef, <2 x i32> [[SPLAT1]], i32 2, i32 2, i32 1, i16 0, i32 undef, i1 true)
99+
; CHECK-NEXT: [[CONST2:%.+]] = call <1 x i32> @llvm.genx.constanti.v1i32(<1 x i32> <i32 1>)
100+
; CHECK-NEXT: [[SPLIT2:%.+]] = call <3 x i32> @llvm.genx.wrregioni.v3i32.v1i32.i16.i1(<3 x i32> [[SPLIT1]], <1 x i32> [[CONST2]], i32 1, i32 1, i32 1, i16 8, i32 undef, i1 true)
101+
; CHECK-NEXT: [[STRUCT:%.+]] = insertvalue { <3 x i32> } undef, <3 x i32> [[SPLIT2]], 0
89102
; CHECK-NEXT: ret { <3 x i32> } [[STRUCT]]
90103
ret { <3 x i32> } { <3 x i32> <i32 1, i32 1, i32 1> }
91104
}

0 commit comments

Comments
 (0)