Skip to content

Commit dd873dd

Browse files
icklejlahtine-intel
authored andcommitted
drm/i915: Reorder await_execution before await_request
Reorder the code so that we can reuse the await_execution from a special case in await_request in the next patch. Signed-off-by: Chris Wilson <[email protected]> Reviewed-by: Tvrtko Ursulin <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] (cherry picked from commit ffb0c60) Signed-off-by: Joonas Lahtinen <[email protected]>
1 parent 757a939 commit dd873dd

File tree

1 file changed

+132
-132
lines changed

1 file changed

+132
-132
lines changed

drivers/gpu/drm/i915/i915_request.c

Lines changed: 132 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,37 +1053,91 @@ emit_semaphore_wait(struct i915_request *to,
10531053
I915_FENCE_GFP);
10541054
}
10551055

1056+
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
1057+
struct dma_fence *fence)
1058+
{
1059+
return __intel_timeline_sync_is_later(tl,
1060+
fence->context,
1061+
fence->seqno - 1);
1062+
}
1063+
1064+
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1065+
const struct dma_fence *fence)
1066+
{
1067+
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
1068+
}
1069+
10561070
static int
1057-
i915_request_await_request(struct i915_request *to, struct i915_request *from)
1071+
__i915_request_await_execution(struct i915_request *to,
1072+
struct i915_request *from,
1073+
void (*hook)(struct i915_request *rq,
1074+
struct dma_fence *signal))
10581075
{
1059-
int ret;
1076+
int err;
10601077

1061-
GEM_BUG_ON(to == from);
1062-
GEM_BUG_ON(to->timeline == from->timeline);
1078+
GEM_BUG_ON(intel_context_is_barrier(from->context));
10631079

1064-
if (i915_request_completed(from)) {
1065-
i915_sw_fence_set_error_once(&to->submit, from->fence.error);
1080+
/* Submit both requests at the same time */
1081+
err = __await_execution(to, from, hook, I915_FENCE_GFP);
1082+
if (err)
1083+
return err;
1084+
1085+
/* Squash repeated depenendices to the same timelines */
1086+
if (intel_timeline_sync_has_start(i915_request_timeline(to),
1087+
&from->fence))
10661088
return 0;
1089+
1090+
/*
1091+
* Wait until the start of this request.
1092+
*
1093+
* The execution cb fires when we submit the request to HW. But in
1094+
* many cases this may be long before the request itself is ready to
1095+
* run (consider that we submit 2 requests for the same context, where
1096+
* the request of interest is behind an indefinite spinner). So we hook
1097+
* up to both to reduce our queues and keep the execution lag minimised
1098+
* in the worst case, though we hope that the await_start is elided.
1099+
*/
1100+
err = i915_request_await_start(to, from);
1101+
if (err < 0)
1102+
return err;
1103+
1104+
/*
1105+
* Ensure both start together [after all semaphores in signal]
1106+
*
1107+
* Now that we are queued to the HW at roughly the same time (thanks
1108+
* to the execute cb) and are ready to run at roughly the same time
1109+
* (thanks to the await start), our signaler may still be indefinitely
1110+
* delayed by waiting on a semaphore from a remote engine. If our
1111+
* signaler depends on a semaphore, so indirectly do we, and we do not
1112+
* want to start our payload until our signaler also starts theirs.
1113+
* So we wait.
1114+
*
1115+
* However, there is also a second condition for which we need to wait
1116+
* for the precise start of the signaler. Consider that the signaler
1117+
* was submitted in a chain of requests following another context
1118+
* (with just an ordinary intra-engine fence dependency between the
1119+
* two). In this case the signaler is queued to HW, but not for
1120+
* immediate execution, and so we must wait until it reaches the
1121+
* active slot.
1122+
*/
1123+
if (intel_engine_has_semaphores(to->engine) &&
1124+
!i915_request_has_initial_breadcrumb(to)) {
1125+
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
1126+
if (err < 0)
1127+
return err;
10671128
}
10681129

1130+
/* Couple the dependency tree for PI on this exposed to->fence */
10691131
if (to->engine->schedule) {
1070-
ret = i915_sched_node_add_dependency(&to->sched,
1132+
err = i915_sched_node_add_dependency(&to->sched,
10711133
&from->sched,
1072-
I915_DEPENDENCY_EXTERNAL);
1073-
if (ret < 0)
1074-
return ret;
1134+
I915_DEPENDENCY_WEAK);
1135+
if (err < 0)
1136+
return err;
10751137
}
10761138

1077-
if (to->engine == from->engine)
1078-
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
1079-
&from->submit,
1080-
I915_FENCE_GFP);
1081-
else
1082-
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
1083-
if (ret < 0)
1084-
return ret;
1085-
1086-
return 0;
1139+
return intel_timeline_sync_set_start(i915_request_timeline(to),
1140+
&from->fence);
10871141
}
10881142

10891143
static void mark_external(struct i915_request *rq)
@@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence)
11361190
}
11371191

11381192
int
1139-
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
1193+
i915_request_await_execution(struct i915_request *rq,
1194+
struct dma_fence *fence,
1195+
void (*hook)(struct i915_request *rq,
1196+
struct dma_fence *signal))
11401197
{
11411198
struct dma_fence **child = &fence;
11421199
unsigned int nchild = 1;
11431200
int ret;
11441201

1145-
/*
1146-
* Note that if the fence-array was created in signal-on-any mode,
1147-
* we should *not* decompose it into its individual fences. However,
1148-
* we don't currently store which mode the fence-array is operating
1149-
* in. Fortunately, the only user of signal-on-any is private to
1150-
* amdgpu and we should not see any incoming fence-array from
1151-
* sync-file being in signal-on-any mode.
1152-
*/
11531202
if (dma_fence_is_array(fence)) {
11541203
struct dma_fence_array *array = to_dma_fence_array(fence);
11551204

1205+
/* XXX Error for signal-on-any fence arrays */
1206+
11561207
child = array->fences;
11571208
nchild = array->num_fences;
11581209
GEM_BUG_ON(!nchild);
@@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
11651216
continue;
11661217
}
11671218

1168-
/*
1169-
* Requests on the same timeline are explicitly ordered, along
1170-
* with their dependencies, by i915_request_add() which ensures
1171-
* that requests are submitted in-order through each ring.
1172-
*/
11731219
if (fence->context == rq->fence.context)
11741220
continue;
11751221

1176-
/* Squash repeated waits to the same timelines */
1177-
if (fence->context &&
1178-
intel_timeline_sync_is_later(i915_request_timeline(rq),
1179-
fence))
1180-
continue;
1222+
/*
1223+
* We don't squash repeated fence dependencies here as we
1224+
* want to run our callback in all cases.
1225+
*/
11811226

11821227
if (dma_fence_is_i915(fence))
1183-
ret = i915_request_await_request(rq, to_request(fence));
1228+
ret = __i915_request_await_execution(rq,
1229+
to_request(fence),
1230+
hook);
11841231
else
11851232
ret = i915_request_await_external(rq, fence);
11861233
if (ret < 0)
11871234
return ret;
1188-
1189-
/* Record the latest fence used against each timeline */
1190-
if (fence->context)
1191-
intel_timeline_sync_set(i915_request_timeline(rq),
1192-
fence);
11931235
} while (--nchild);
11941236

11951237
return 0;
11961238
}
11971239

1198-
static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
1199-
struct dma_fence *fence)
1200-
{
1201-
return __intel_timeline_sync_is_later(tl,
1202-
fence->context,
1203-
fence->seqno - 1);
1204-
}
1205-
1206-
static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1207-
const struct dma_fence *fence)
1208-
{
1209-
return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
1210-
}
1211-
12121240
static int
1213-
__i915_request_await_execution(struct i915_request *to,
1214-
struct i915_request *from,
1215-
void (*hook)(struct i915_request *rq,
1216-
struct dma_fence *signal))
1241+
i915_request_await_request(struct i915_request *to, struct i915_request *from)
12171242
{
1218-
int err;
1219-
1220-
GEM_BUG_ON(intel_context_is_barrier(from->context));
1243+
int ret;
12211244

1222-
/* Submit both requests at the same time */
1223-
err = __await_execution(to, from, hook, I915_FENCE_GFP);
1224-
if (err)
1225-
return err;
1245+
GEM_BUG_ON(to == from);
1246+
GEM_BUG_ON(to->timeline == from->timeline);
12261247

1227-
/* Squash repeated depenendices to the same timelines */
1228-
if (intel_timeline_sync_has_start(i915_request_timeline(to),
1229-
&from->fence))
1248+
if (i915_request_completed(from)) {
1249+
i915_sw_fence_set_error_once(&to->submit, from->fence.error);
12301250
return 0;
1231-
1232-
/*
1233-
* Wait until the start of this request.
1234-
*
1235-
* The execution cb fires when we submit the request to HW. But in
1236-
* many cases this may be long before the request itself is ready to
1237-
* run (consider that we submit 2 requests for the same context, where
1238-
* the request of interest is behind an indefinite spinner). So we hook
1239-
* up to both to reduce our queues and keep the execution lag minimised
1240-
* in the worst case, though we hope that the await_start is elided.
1241-
*/
1242-
err = i915_request_await_start(to, from);
1243-
if (err < 0)
1244-
return err;
1245-
1246-
/*
1247-
* Ensure both start together [after all semaphores in signal]
1248-
*
1249-
* Now that we are queued to the HW at roughly the same time (thanks
1250-
* to the execute cb) and are ready to run at roughly the same time
1251-
* (thanks to the await start), our signaler may still be indefinitely
1252-
* delayed by waiting on a semaphore from a remote engine. If our
1253-
* signaler depends on a semaphore, so indirectly do we, and we do not
1254-
* want to start our payload until our signaler also starts theirs.
1255-
* So we wait.
1256-
*
1257-
* However, there is also a second condition for which we need to wait
1258-
* for the precise start of the signaler. Consider that the signaler
1259-
* was submitted in a chain of requests following another context
1260-
* (with just an ordinary intra-engine fence dependency between the
1261-
* two). In this case the signaler is queued to HW, but not for
1262-
* immediate execution, and so we must wait until it reaches the
1263-
* active slot.
1264-
*/
1265-
if (intel_engine_has_semaphores(to->engine) &&
1266-
!i915_request_has_initial_breadcrumb(to)) {
1267-
err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
1268-
if (err < 0)
1269-
return err;
12701251
}
12711252

1272-
/* Couple the dependency tree for PI on this exposed to->fence */
12731253
if (to->engine->schedule) {
1274-
err = i915_sched_node_add_dependency(&to->sched,
1254+
ret = i915_sched_node_add_dependency(&to->sched,
12751255
&from->sched,
1276-
I915_DEPENDENCY_WEAK);
1277-
if (err < 0)
1278-
return err;
1256+
I915_DEPENDENCY_EXTERNAL);
1257+
if (ret < 0)
1258+
return ret;
12791259
}
12801260

1281-
return intel_timeline_sync_set_start(i915_request_timeline(to),
1282-
&from->fence);
1261+
if (to->engine == READ_ONCE(from->engine))
1262+
ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
1263+
&from->submit,
1264+
I915_FENCE_GFP);
1265+
else
1266+
ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
1267+
if (ret < 0)
1268+
return ret;
1269+
1270+
return 0;
12831271
}
12841272

12851273
int
1286-
i915_request_await_execution(struct i915_request *rq,
1287-
struct dma_fence *fence,
1288-
void (*hook)(struct i915_request *rq,
1289-
struct dma_fence *signal))
1274+
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
12901275
{
12911276
struct dma_fence **child = &fence;
12921277
unsigned int nchild = 1;
12931278
int ret;
12941279

1280+
/*
1281+
* Note that if the fence-array was created in signal-on-any mode,
1282+
* we should *not* decompose it into its individual fences. However,
1283+
* we don't currently store which mode the fence-array is operating
1284+
* in. Fortunately, the only user of signal-on-any is private to
1285+
* amdgpu and we should not see any incoming fence-array from
1286+
* sync-file being in signal-on-any mode.
1287+
*/
12951288
if (dma_fence_is_array(fence)) {
12961289
struct dma_fence_array *array = to_dma_fence_array(fence);
12971290

1298-
/* XXX Error for signal-on-any fence arrays */
1299-
13001291
child = array->fences;
13011292
nchild = array->num_fences;
13021293
GEM_BUG_ON(!nchild);
@@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq,
13091300
continue;
13101301
}
13111302

1303+
/*
1304+
* Requests on the same timeline are explicitly ordered, along
1305+
* with their dependencies, by i915_request_add() which ensures
1306+
* that requests are submitted in-order through each ring.
1307+
*/
13121308
if (fence->context == rq->fence.context)
13131309
continue;
13141310

1315-
/*
1316-
* We don't squash repeated fence dependencies here as we
1317-
* want to run our callback in all cases.
1318-
*/
1311+
/* Squash repeated waits to the same timelines */
1312+
if (fence->context &&
1313+
intel_timeline_sync_is_later(i915_request_timeline(rq),
1314+
fence))
1315+
continue;
13191316

13201317
if (dma_fence_is_i915(fence))
1321-
ret = __i915_request_await_execution(rq,
1322-
to_request(fence),
1323-
hook);
1318+
ret = i915_request_await_request(rq, to_request(fence));
13241319
else
13251320
ret = i915_request_await_external(rq, fence);
13261321
if (ret < 0)
13271322
return ret;
1323+
1324+
/* Record the latest fence used against each timeline */
1325+
if (fence->context)
1326+
intel_timeline_sync_set(i915_request_timeline(rq),
1327+
fence);
13281328
} while (--nchild);
13291329

13301330
return 0;

0 commit comments

Comments
 (0)