|
79 | 79 | // ld.global.f32 %f4, [%rl6+132]; // much better |
80 | 80 | // |
81 | 81 | // Another improvement enabled by the LowerGEP flag is to lower a GEP with |
82 | | -// multiple indices to either multiple GEPs with a single index or arithmetic |
83 | | -// operations (depending on whether the target uses alias analysis in codegen). |
| 82 | +// multiple indices to multiple GEPs with a single index. |
84 | 83 | // Such transformation can have following benefits: |
85 | 84 | // (1) It can always extract constants in the indices of structure type. |
86 | 85 | // (2) After such Lowering, there are more optimization opportunities such as |
87 | 86 | // CSE, LICM and CGP. |
88 | 87 | // |
89 | 88 | // E.g. The following GEPs have multiple indices: |
90 | 89 | // BB1: |
91 | | -// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3 |
| 90 | +// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3 |
92 | 91 | // load %p |
93 | 92 | // ... |
94 | 93 | // BB2: |
95 | | -// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2 |
| 94 | +// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2 |
96 | 95 | // load %p2 |
97 | 96 | // ... |
98 | 97 | // |
99 | 98 | // We can not do CSE to the common part related to index "i64 %i". Lowering |
100 | 99 | // GEPs can achieve such goals. |
101 | | -// If the target does not use alias analysis in codegen, this pass will |
102 | | -// lower a GEP with multiple indices into arithmetic operations: |
103 | | -// BB1: |
104 | | -// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity |
105 | | -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
106 | | -// %3 = add i64 %1, %2 ; CSE opportunity |
107 | | -// %4 = mul i64 %j1, length_of_struct |
108 | | -// %5 = add i64 %3, %4 |
109 | | -// %6 = add i64 %3, struct_field_3 ; Constant offset |
110 | | -// %p = inttoptr i64 %6 to i32* |
111 | | -// load %p |
112 | | -// ... |
113 | | -// BB2: |
114 | | -// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity |
115 | | -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
116 | | -// %9 = add i64 %7, %8 ; CSE opportunity |
117 | | -// %10 = mul i64 %j2, length_of_struct |
118 | | -// %11 = add i64 %9, %10 |
119 | | -// %12 = add i64 %11, struct_field_2 ; Constant offset |
120 | | -// %p = inttoptr i64 %12 to i32* |
121 | | -// load %p2 |
122 | | -// ... |
123 | 100 | // |
124 | | -// If the target uses alias analysis in codegen, this pass will lower a GEP |
125 | | -// with multiple indices into multiple GEPs with a single index: |
| 101 | +// This pass will lower a GEP with multiple indices into multiple GEPs with a |
| 102 | +// single index: |
126 | 103 | // BB1: |
127 | | -// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity |
128 | | -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
129 | | -// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity |
| 104 | +// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
| 105 | +// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity |
130 | 106 | // %4 = mul i64 %j1, length_of_struct |
131 | | -// %5 = getelementptr i8* %3, i64 %4 |
132 | | -// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset |
133 | | -// %p = bitcast i8* %6 to i32* |
| 107 | +// %5 = getelementptr i8, ptr %3, i64 %4 |
| 108 | +// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset |
134 | 109 | // load %p |
135 | 110 | // ... |
136 | 111 | // BB2: |
137 | | -// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity |
138 | | -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
139 | | -// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity |
| 112 | +// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity |
| 113 | +// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity |
140 | 114 | // %10 = mul i64 %j2, length_of_struct |
141 | | -// %11 = getelementptr i8* %9, i64 %10 |
142 | | -// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset |
143 | | -// %p2 = bitcast i8* %12 to i32* |
| 115 | +// %11 = getelementptr i8, ptr %9, i64 %10 |
| 116 | +// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset |
144 | 117 | // load %p2 |
145 | 118 | // ... |
146 | 119 | // |
@@ -408,16 +381,6 @@ class SeparateConstOffsetFromGEP { |
408 | 381 | void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic, |
409 | 382 | int64_t AccumulativeByteOffset); |
410 | 383 |
|
411 | | - /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. |
412 | | - /// Function splitGEP already split the original GEP into a variadic part and |
413 | | - /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the |
414 | | - /// variadic part into a set of arithmetic operations and applies |
415 | | - /// AccumulativeByteOffset to it. |
416 | | - /// \p Variadic The variadic part of the original GEP. |
417 | | - /// \p AccumulativeByteOffset The constant offset. |
418 | | - void lowerToArithmetics(GetElementPtrInst *Variadic, |
419 | | - int64_t AccumulativeByteOffset); |
420 | | - |
421 | 384 | /// Finds the constant offset within each index and accumulates them. If |
422 | 385 | /// LowerGEP is true, it finds in indices of both sequential and structure |
423 | 386 | /// types, otherwise it only finds in sequential indices. The output |
@@ -951,55 +914,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( |
951 | 914 | Variadic->eraseFromParent(); |
952 | 915 | } |
953 | 916 |
|
954 | | -void |
955 | | -SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, |
956 | | - int64_t AccumulativeByteOffset) { |
957 | | - IRBuilder<> Builder(Variadic); |
958 | | - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); |
959 | | - assert(IntPtrTy == DL->getIndexType(Variadic->getType()) && |
960 | | - "Pointer type must match index type for arithmetic-based lowering of " |
961 | | - "split GEPs"); |
962 | | - |
963 | | - Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); |
964 | | - gep_type_iterator GTI = gep_type_begin(*Variadic); |
965 | | - // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We |
966 | | - // don't create arithmetics for structure indices, as they are accumulated |
967 | | - // in the constant offset index. |
968 | | - for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) { |
969 | | - if (GTI.isSequential()) { |
970 | | - Value *Idx = Variadic->getOperand(I); |
971 | | - // Skip zero indices. |
972 | | - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) |
973 | | - if (CI->isZero()) |
974 | | - continue; |
975 | | - |
976 | | - APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), |
977 | | - GTI.getSequentialElementStride(*DL)); |
978 | | - // Scale the index by element size. |
979 | | - if (ElementSize != 1) { |
980 | | - if (ElementSize.isPowerOf2()) { |
981 | | - Idx = Builder.CreateShl( |
982 | | - Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); |
983 | | - } else { |
984 | | - Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); |
985 | | - } |
986 | | - } |
987 | | - // Create an ADD for each index. |
988 | | - ResultPtr = Builder.CreateAdd(ResultPtr, Idx); |
989 | | - } |
990 | | - } |
991 | | - |
992 | | - // Create an ADD for the constant offset index. |
993 | | - if (AccumulativeByteOffset != 0) { |
994 | | - ResultPtr = Builder.CreateAdd( |
995 | | - ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset)); |
996 | | - } |
997 | | - |
998 | | - ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType()); |
999 | | - Variadic->replaceAllUsesWith(ResultPtr); |
1000 | | - Variadic->eraseFromParent(); |
1001 | | -} |
1002 | | - |
1003 | 917 | bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, |
1004 | 918 | TargetTransformInfo &TTI) { |
1005 | 919 | auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand()); |
@@ -1091,8 +1005,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { |
1091 | 1005 | // Notice that we don't remove struct field indices here. If LowerGEP is |
1092 | 1006 | // disabled, a structure index is not accumulated and we still use the old |
1093 | 1007 | // one. If LowerGEP is enabled, a structure index is accumulated in the |
1094 | | - // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later |
1095 | | - // handle the constant offset and won't need a new structure index. |
| 1008 | + // constant offset. LowerToSingleIndexGEPs will later handle the constant |
| 1009 | + // offset and won't need a new structure index. |
1096 | 1010 | gep_type_iterator GTI = gep_type_begin(*GEP); |
1097 | 1011 | for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { |
1098 | 1012 | if (GTI.isSequential()) { |
@@ -1167,22 +1081,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { |
1167 | 1081 |
|
1168 | 1082 | GEP->setNoWrapFlags(NewGEPFlags); |
1169 | 1083 |
|
1170 | | - // Lowers a GEP to either GEPs with a single index or arithmetic operations. |
| 1084 | + // Lowers a GEP to GEPs with a single index. |
1171 | 1085 | if (LowerGEP) { |
1172 | | - // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to |
1173 | | - // arithmetic operations if the target uses alias analysis in codegen. |
1174 | | - // Additionally, pointers that aren't integral (and so can't be safely |
1175 | | - // converted to integers) or those whose offset size is different from their |
1176 | | - // pointer size (which means that doing integer arithmetic on them could |
1177 | | - // affect that data) can't be lowered in this way. |
1178 | | - unsigned AddrSpace = GEP->getPointerAddressSpace(); |
1179 | | - bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) != |
1180 | | - DL->getIndexSizeInBits(AddrSpace); |
1181 | | - if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) || |
1182 | | - PointerHasExtraData) |
1183 | | - lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); |
1184 | | - else |
1185 | | - lowerToArithmetics(GEP, AccumulativeByteOffset); |
| 1086 | + lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); |
1186 | 1087 | return true; |
1187 | 1088 | } |
1188 | 1089 |
|
|
0 commit comments