@@ -170,15 +170,20 @@ SBFTargetLowering::SBFTargetLowering(const TargetMachine &TM,
170
170
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0 ;
171
171
MaxLoadsPerMemcmp = 0 ;
172
172
} else {
173
- auto SelectionDAGInfo = STI.getSelectionDAGInfo ();
174
- // inline memcpy() for kernel to see explicit copy
175
- unsigned CommonMaxStores =
176
- SelectionDAGInfo->getCommonMaxStoresPerMemFunc ();
177
-
178
- MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
179
- MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
180
- MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
181
- MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
173
+ // A syscall consumes at least 10 CUs, so we should only invoke it when
174
+ // the number of instructions is at least 10.
175
+
176
+ // Memset translates to stdw or stdxw, so the maximum should be 10.
177
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 10 ;
178
+ // Each store in memcpy follows a load, so the maximum is 5.
179
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 5 ;
180
+ // Each store in memmove follows a load, so the maximum is 5.
181
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 5 ;
182
+ // Memcmp expands to three instructions for each load:
183
+ // 1. One load for each pointer being compared.
184
+ // 2. One jne for each load.
185
+ // The limit here should be three, since 3*3 = 9;
186
+ MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = 3 ;
182
187
}
183
188
184
189
// CPU/Feature control
0 commit comments