@@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to,
1053
1053
I915_FENCE_GFP );
1054
1054
}
1055
1055
1056
+ static bool intel_timeline_sync_has_start (struct intel_timeline * tl ,
1057
+ struct dma_fence * fence )
1058
+ {
1059
+ return __intel_timeline_sync_is_later (tl ,
1060
+ fence -> context ,
1061
+ fence -> seqno - 1 );
1062
+ }
1063
+
1064
+ static int intel_timeline_sync_set_start (struct intel_timeline * tl ,
1065
+ const struct dma_fence * fence )
1066
+ {
1067
+ return __intel_timeline_sync_set (tl , fence -> context , fence -> seqno - 1 );
1068
+ }
1069
+
1056
1070
static int
1057
- i915_request_await_request (struct i915_request * to , struct i915_request * from )
1071
+ __i915_request_await_execution (struct i915_request * to ,
1072
+ struct i915_request * from ,
1073
+ void (* hook )(struct i915_request * rq ,
1074
+ struct dma_fence * signal ))
1058
1075
{
1059
- int ret ;
1076
+ int err ;
1060
1077
1061
- GEM_BUG_ON (to == from );
1062
- GEM_BUG_ON (to -> timeline == from -> timeline );
1078
+ GEM_BUG_ON (intel_context_is_barrier (from -> context ));
1063
1079
1064
- if (i915_request_completed (from )) {
1065
- i915_sw_fence_set_error_once (& to -> submit , from -> fence .error );
1080
+ /* Submit both requests at the same time */
1081
+ err = __await_execution (to , from , hook , I915_FENCE_GFP );
1082
+ if (err )
1083
+ return err ;
1084
+
1085
+ /* Squash repeated depenendices to the same timelines */
1086
+ if (intel_timeline_sync_has_start (i915_request_timeline (to ),
1087
+ & from -> fence ))
1066
1088
return 0 ;
1089
+
1090
+ /*
1091
+ * Wait until the start of this request.
1092
+ *
1093
+ * The execution cb fires when we submit the request to HW. But in
1094
+ * many cases this may be long before the request itself is ready to
1095
+ * run (consider that we submit 2 requests for the same context, where
1096
+ * the request of interest is behind an indefinite spinner). So we hook
1097
+ * up to both to reduce our queues and keep the execution lag minimised
1098
+ * in the worst case, though we hope that the await_start is elided.
1099
+ */
1100
+ err = i915_request_await_start (to , from );
1101
+ if (err < 0 )
1102
+ return err ;
1103
+
1104
+ /*
1105
+ * Ensure both start together [after all semaphores in signal]
1106
+ *
1107
+ * Now that we are queued to the HW at roughly the same time (thanks
1108
+ * to the execute cb) and are ready to run at roughly the same time
1109
+ * (thanks to the await start), our signaler may still be indefinitely
1110
+ * delayed by waiting on a semaphore from a remote engine. If our
1111
+ * signaler depends on a semaphore, so indirectly do we, and we do not
1112
+ * want to start our payload until our signaler also starts theirs.
1113
+ * So we wait.
1114
+ *
1115
+ * However, there is also a second condition for which we need to wait
1116
+ * for the precise start of the signaler. Consider that the signaler
1117
+ * was submitted in a chain of requests following another context
1118
+ * (with just an ordinary intra-engine fence dependency between the
1119
+ * two). In this case the signaler is queued to HW, but not for
1120
+ * immediate execution, and so we must wait until it reaches the
1121
+ * active slot.
1122
+ */
1123
+ if (intel_engine_has_semaphores (to -> engine ) &&
1124
+ !i915_request_has_initial_breadcrumb (to )) {
1125
+ err = __emit_semaphore_wait (to , from , from -> fence .seqno - 1 );
1126
+ if (err < 0 )
1127
+ return err ;
1067
1128
}
1068
1129
1130
+ /* Couple the dependency tree for PI on this exposed to->fence */
1069
1131
if (to -> engine -> schedule ) {
1070
- ret = i915_sched_node_add_dependency (& to -> sched ,
1132
+ err = i915_sched_node_add_dependency (& to -> sched ,
1071
1133
& from -> sched ,
1072
- I915_DEPENDENCY_EXTERNAL );
1073
- if (ret < 0 )
1074
- return ret ;
1134
+ I915_DEPENDENCY_WEAK );
1135
+ if (err < 0 )
1136
+ return err ;
1075
1137
}
1076
1138
1077
- if (to -> engine == from -> engine )
1078
- ret = i915_sw_fence_await_sw_fence_gfp (& to -> submit ,
1079
- & from -> submit ,
1080
- I915_FENCE_GFP );
1081
- else
1082
- ret = emit_semaphore_wait (to , from , I915_FENCE_GFP );
1083
- if (ret < 0 )
1084
- return ret ;
1085
-
1086
- return 0 ;
1139
+ return intel_timeline_sync_set_start (i915_request_timeline (to ),
1140
+ & from -> fence );
1087
1141
}
1088
1142
1089
1143
static void mark_external (struct i915_request * rq )
@@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
1136
1190
}
1137
1191
1138
1192
int
1139
- i915_request_await_dma_fence (struct i915_request * rq , struct dma_fence * fence )
1193
+ i915_request_await_execution (struct i915_request * rq ,
1194
+ struct dma_fence * fence ,
1195
+ void (* hook )(struct i915_request * rq ,
1196
+ struct dma_fence * signal ))
1140
1197
{
1141
1198
struct dma_fence * * child = & fence ;
1142
1199
unsigned int nchild = 1 ;
1143
1200
int ret ;
1144
1201
1145
- /*
1146
- * Note that if the fence-array was created in signal-on-any mode,
1147
- * we should *not* decompose it into its individual fences. However,
1148
- * we don't currently store which mode the fence-array is operating
1149
- * in. Fortunately, the only user of signal-on-any is private to
1150
- * amdgpu and we should not see any incoming fence-array from
1151
- * sync-file being in signal-on-any mode.
1152
- */
1153
1202
if (dma_fence_is_array (fence )) {
1154
1203
struct dma_fence_array * array = to_dma_fence_array (fence );
1155
1204
1205
+ /* XXX Error for signal-on-any fence arrays */
1206
+
1156
1207
child = array -> fences ;
1157
1208
nchild = array -> num_fences ;
1158
1209
GEM_BUG_ON (!nchild );
@@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
1165
1216
continue ;
1166
1217
}
1167
1218
1168
- /*
1169
- * Requests on the same timeline are explicitly ordered, along
1170
- * with their dependencies, by i915_request_add() which ensures
1171
- * that requests are submitted in-order through each ring.
1172
- */
1173
1219
if (fence -> context == rq -> fence .context )
1174
1220
continue ;
1175
1221
1176
- /* Squash repeated waits to the same timelines */
1177
- if (fence -> context &&
1178
- intel_timeline_sync_is_later (i915_request_timeline (rq ),
1179
- fence ))
1180
- continue ;
1222
+ /*
1223
+ * We don't squash repeated fence dependencies here as we
1224
+ * want to run our callback in all cases.
1225
+ */
1181
1226
1182
1227
if (dma_fence_is_i915 (fence ))
1183
- ret = i915_request_await_request (rq , to_request (fence ));
1228
+ ret = __i915_request_await_execution (rq ,
1229
+ to_request (fence ),
1230
+ hook );
1184
1231
else
1185
1232
ret = i915_request_await_external (rq , fence );
1186
1233
if (ret < 0 )
1187
1234
return ret ;
1188
-
1189
- /* Record the latest fence used against each timeline */
1190
- if (fence -> context )
1191
- intel_timeline_sync_set (i915_request_timeline (rq ),
1192
- fence );
1193
1235
} while (-- nchild );
1194
1236
1195
1237
return 0 ;
1196
1238
}
1197
1239
1198
- static bool intel_timeline_sync_has_start (struct intel_timeline * tl ,
1199
- struct dma_fence * fence )
1200
- {
1201
- return __intel_timeline_sync_is_later (tl ,
1202
- fence -> context ,
1203
- fence -> seqno - 1 );
1204
- }
1205
-
1206
- static int intel_timeline_sync_set_start (struct intel_timeline * tl ,
1207
- const struct dma_fence * fence )
1208
- {
1209
- return __intel_timeline_sync_set (tl , fence -> context , fence -> seqno - 1 );
1210
- }
1211
-
1212
1240
static int
1213
- __i915_request_await_execution (struct i915_request * to ,
1214
- struct i915_request * from ,
1215
- void (* hook )(struct i915_request * rq ,
1216
- struct dma_fence * signal ))
1241
+ i915_request_await_request (struct i915_request * to , struct i915_request * from )
1217
1242
{
1218
- int err ;
1219
-
1220
- GEM_BUG_ON (intel_context_is_barrier (from -> context ));
1243
+ int ret ;
1221
1244
1222
- /* Submit both requests at the same time */
1223
- err = __await_execution (to , from , hook , I915_FENCE_GFP );
1224
- if (err )
1225
- return err ;
1245
+ GEM_BUG_ON (to == from );
1246
+ GEM_BUG_ON (to -> timeline == from -> timeline );
1226
1247
1227
- /* Squash repeated depenendices to the same timelines */
1228
- if (intel_timeline_sync_has_start (i915_request_timeline (to ),
1229
- & from -> fence ))
1248
+ if (i915_request_completed (from )) {
1249
+ i915_sw_fence_set_error_once (& to -> submit , from -> fence .error );
1230
1250
return 0 ;
1231
-
1232
- /*
1233
- * Wait until the start of this request.
1234
- *
1235
- * The execution cb fires when we submit the request to HW. But in
1236
- * many cases this may be long before the request itself is ready to
1237
- * run (consider that we submit 2 requests for the same context, where
1238
- * the request of interest is behind an indefinite spinner). So we hook
1239
- * up to both to reduce our queues and keep the execution lag minimised
1240
- * in the worst case, though we hope that the await_start is elided.
1241
- */
1242
- err = i915_request_await_start (to , from );
1243
- if (err < 0 )
1244
- return err ;
1245
-
1246
- /*
1247
- * Ensure both start together [after all semaphores in signal]
1248
- *
1249
- * Now that we are queued to the HW at roughly the same time (thanks
1250
- * to the execute cb) and are ready to run at roughly the same time
1251
- * (thanks to the await start), our signaler may still be indefinitely
1252
- * delayed by waiting on a semaphore from a remote engine. If our
1253
- * signaler depends on a semaphore, so indirectly do we, and we do not
1254
- * want to start our payload until our signaler also starts theirs.
1255
- * So we wait.
1256
- *
1257
- * However, there is also a second condition for which we need to wait
1258
- * for the precise start of the signaler. Consider that the signaler
1259
- * was submitted in a chain of requests following another context
1260
- * (with just an ordinary intra-engine fence dependency between the
1261
- * two). In this case the signaler is queued to HW, but not for
1262
- * immediate execution, and so we must wait until it reaches the
1263
- * active slot.
1264
- */
1265
- if (intel_engine_has_semaphores (to -> engine ) &&
1266
- !i915_request_has_initial_breadcrumb (to )) {
1267
- err = __emit_semaphore_wait (to , from , from -> fence .seqno - 1 );
1268
- if (err < 0 )
1269
- return err ;
1270
1251
}
1271
1252
1272
- /* Couple the dependency tree for PI on this exposed to->fence */
1273
1253
if (to -> engine -> schedule ) {
1274
- err = i915_sched_node_add_dependency (& to -> sched ,
1254
+ ret = i915_sched_node_add_dependency (& to -> sched ,
1275
1255
& from -> sched ,
1276
- I915_DEPENDENCY_WEAK );
1277
- if (err < 0 )
1278
- return err ;
1256
+ I915_DEPENDENCY_EXTERNAL );
1257
+ if (ret < 0 )
1258
+ return ret ;
1279
1259
}
1280
1260
1281
- return intel_timeline_sync_set_start (i915_request_timeline (to ),
1282
- & from -> fence );
1261
+ if (to -> engine == READ_ONCE (from -> engine ))
1262
+ ret = i915_sw_fence_await_sw_fence_gfp (& to -> submit ,
1263
+ & from -> submit ,
1264
+ I915_FENCE_GFP );
1265
+ else
1266
+ ret = emit_semaphore_wait (to , from , I915_FENCE_GFP );
1267
+ if (ret < 0 )
1268
+ return ret ;
1269
+
1270
+ return 0 ;
1283
1271
}
1284
1272
1285
1273
int
1286
- i915_request_await_execution (struct i915_request * rq ,
1287
- struct dma_fence * fence ,
1288
- void (* hook )(struct i915_request * rq ,
1289
- struct dma_fence * signal ))
1274
+ i915_request_await_dma_fence (struct i915_request * rq , struct dma_fence * fence )
1290
1275
{
1291
1276
struct dma_fence * * child = & fence ;
1292
1277
unsigned int nchild = 1 ;
1293
1278
int ret ;
1294
1279
1280
+ /*
1281
+ * Note that if the fence-array was created in signal-on-any mode,
1282
+ * we should *not* decompose it into its individual fences. However,
1283
+ * we don't currently store which mode the fence-array is operating
1284
+ * in. Fortunately, the only user of signal-on-any is private to
1285
+ * amdgpu and we should not see any incoming fence-array from
1286
+ * sync-file being in signal-on-any mode.
1287
+ */
1295
1288
if (dma_fence_is_array (fence )) {
1296
1289
struct dma_fence_array * array = to_dma_fence_array (fence );
1297
1290
1298
- /* XXX Error for signal-on-any fence arrays */
1299
-
1300
1291
child = array -> fences ;
1301
1292
nchild = array -> num_fences ;
1302
1293
GEM_BUG_ON (!nchild );
@@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq,
1309
1300
continue ;
1310
1301
}
1311
1302
1303
+ /*
1304
+ * Requests on the same timeline are explicitly ordered, along
1305
+ * with their dependencies, by i915_request_add() which ensures
1306
+ * that requests are submitted in-order through each ring.
1307
+ */
1312
1308
if (fence -> context == rq -> fence .context )
1313
1309
continue ;
1314
1310
1315
- /*
1316
- * We don't squash repeated fence dependencies here as we
1317
- * want to run our callback in all cases.
1318
- */
1311
+ /* Squash repeated waits to the same timelines */
1312
+ if (fence -> context &&
1313
+ intel_timeline_sync_is_later (i915_request_timeline (rq ),
1314
+ fence ))
1315
+ continue ;
1319
1316
1320
1317
if (dma_fence_is_i915 (fence ))
1321
- ret = __i915_request_await_execution (rq ,
1322
- to_request (fence ),
1323
- hook );
1318
+ ret = i915_request_await_request (rq , to_request (fence ));
1324
1319
else
1325
1320
ret = i915_request_await_external (rq , fence );
1326
1321
if (ret < 0 )
1327
1322
return ret ;
1323
+
1324
+ /* Record the latest fence used against each timeline */
1325
+ if (fence -> context )
1326
+ intel_timeline_sync_set (i915_request_timeline (rq ),
1327
+ fence );
1328
1328
} while (-- nchild );
1329
1329
1330
1330
return 0 ;
0 commit comments