Skip to content

Commit 717a398

Browse files
[SYCL] Optimize handler::StoreLambda implemnation
Current implemenation creates descriptions of all kernal params, then process them in turn. It's prossible to process each param right away.
1 parent 1687b78 commit 717a398

File tree

3 files changed

+58
-17
lines changed

3 files changed

+58
-17
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -375,16 +375,6 @@ template <int Dims> bool range_size_fits_in_size_t(const range<Dims> &r) {
375375
return true;
376376
}
377377

378-
template <typename KernelNameType>
379-
std::vector<kernel_param_desc_t> getKernelParamDescs() {
380-
std::vector<kernel_param_desc_t> Result;
381-
int NumParams = getKernelNumParams<KernelNameType>();
382-
Result.reserve(NumParams);
383-
for (int I = 0; I < NumParams; ++I) {
384-
Result.push_back(getKernelParamDesc<KernelNameType>(I));
385-
}
386-
return Result;
387-
}
388378
} // namespace detail
389379

390380
/// Command group handler class.
@@ -476,16 +466,23 @@ class __SYCL_EXPORT handler {
476466
"a single kernel or explicit memory operation.");
477467
}
478468

479-
/// Extracts and prepares kernel arguments from the lambda using information
480-
/// from the built-ins or integration header.
469+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
470+
// TODO: Those functions are not used anymore, remove it in the next
471+
// ABI-breaking window.
481472
void extractArgsAndReqsFromLambda(
482473
char *LambdaPtr,
483474
const std::vector<detail::kernel_param_desc_t> &ParamDescs, bool IsESIMD);
484-
// TODO Unused, remove during ABI breaking window
485475
void
486476
extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum,
487477
const detail::kernel_param_desc_t *KernelArgs,
488478
bool IsESIMD);
479+
#endif
480+
/// Extracts and prepares kernel arguments from the lambda using information
481+
/// from the built-ins or integration header.
482+
void
483+
extractArgsAndReqsFromLambda(char *LambdaPtr,
484+
const detail::kernel_param_desc_t &ParamDesc,
485+
bool IsESIMD, size_t Index, size_t &IndexShift);
489486

490487
/// Extracts and prepares kernel arguments set via set_arg(s).
491488
void extractArgsAndReqs();
@@ -752,10 +749,18 @@ class __SYCL_EXPORT handler {
752749
// header, so don't perform things that require it.
753750
if (KernelHasName) {
754751
// TODO support ESIMD in no-integration-header case too.
752+
753+
size_t NumParams = detail::getKernelNumParams<KernelName>();
755754
clearArgs();
756-
extractArgsAndReqsFromLambda(MHostKernel->getPtr(),
757-
detail::getKernelParamDescs<KernelName>(),
758-
detail::isKernelESIMD<KernelName>());
755+
reseveArgs(NumParams);
756+
757+
char *LambdaPtr = MHostKernel->getPtr();
758+
759+
for (size_t I = 0, IndexShift = 0; I < NumParams; ++I) {
760+
const detail::kernel_param_desc_t param = detail::getKernelParamDesc<KernelName>(I);
761+
extractArgsAndReqsFromLambda(LambdaPtr, param, detail::isKernelESIMD<KernelName>(), I, IndexShift);
762+
}
763+
759764
MKernelName = detail::getKernelName<KernelName>();
760765
} else {
761766
// In case w/o the integration header it is necessary to process
@@ -3816,6 +3821,7 @@ class __SYCL_EXPORT handler {
38163821
void addArg(detail::kernel_param_kind_t ArgKind, void *Req, int AccessTarget,
38173822
int ArgIndex);
38183823
void clearArgs();
3824+
void reseveArgs(int NumParams);
38193825
void setArgsToAssociatedAccessors();
38203826

38213827
bool HasAssociatedAccessor(detail::AccessorImplHost *Req,

sycl/source/handler.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,37 @@ void handler::extractArgsAndReqs() {
10651065
}
10661066
}
10671067

1068+
void handler::extractArgsAndReqsFromLambda(
1069+
char *LambdaPtr, const detail::kernel_param_desc_t &ParamDesc,
1070+
bool IsESIMD, size_t Index, size_t &IndexShift) {
1071+
void *Ptr = LambdaPtr + ParamDesc.offset;
1072+
const detail::kernel_param_kind_t &Kind = ParamDesc.kind;
1073+
const int &Size = ParamDesc.info;
1074+
if (Kind == detail::kernel_param_kind_t::kind_accessor) {
1075+
// For args kind of accessor Size is information about accessor.
1076+
// The first 11 bits of Size encodes the accessor target.
1077+
const access::target AccTarget =
1078+
static_cast<access::target>(Size & AccessTargetMask);
1079+
if ((AccTarget == access::target::device ||
1080+
AccTarget == access::target::constant_buffer) ||
1081+
(AccTarget == access::target::image ||
1082+
AccTarget == access::target::image_array)) {
1083+
detail::AccessorBaseHost *AccBase =
1084+
static_cast<detail::AccessorBaseHost *>(Ptr);
1085+
Ptr = detail::getSyclObjImpl(*AccBase).get();
1086+
} else if (AccTarget == access::target::local) {
1087+
detail::LocalAccessorBaseHost *LocalAccBase =
1088+
static_cast<detail::LocalAccessorBaseHost *>(Ptr);
1089+
Ptr = detail::getSyclObjImpl(*LocalAccBase).get();
1090+
}
1091+
}
1092+
processArg(Ptr, Kind, Size, Index, IndexShift, /*IsKernelCreatedFromSource=*/false,
1093+
IsESIMD);
1094+
}
1095+
1096+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
1097+
// TODO: Those functions are not used anymore, remove it in the next
1098+
// ABI-breaking window.
10681099
void handler::extractArgsAndReqsFromLambda(
10691100
char *LambdaPtr, const std::vector<detail::kernel_param_desc_t> &ParamDescs,
10701101
bool IsESIMD) {
@@ -1099,14 +1130,14 @@ void handler::extractArgsAndReqsFromLambda(
10991130
}
11001131
}
11011132

1102-
// TODO Unused, remove during ABI breaking window
11031133
void handler::extractArgsAndReqsFromLambda(
11041134
char *LambdaPtr, size_t KernelArgsNum,
11051135
const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) {
11061136
std::vector<detail::kernel_param_desc_t> ParamDescs(
11071137
KernelArgs, KernelArgs + KernelArgsNum);
11081138
extractArgsAndReqsFromLambda(LambdaPtr, ParamDescs, IsESIMD);
11091139
}
1140+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
11101141

11111142
// Calling methods of kernel_impl requires knowledge of class layout.
11121143
// As this is impossible in header, there's a function that calls necessary
@@ -2103,6 +2134,8 @@ void handler::addArg(detail::kernel_param_kind_t ArgKind, void *Req,
21032134

21042135
void handler::clearArgs() { impl->MArgs.clear(); }
21052136

2137+
void handler::reseveArgs(int NumParams) { impl->MArgs.reserve(MaxNumAdditionalArgs * NumParams); }
2138+
21062139
void handler::setArgsToAssociatedAccessors() {
21072140
impl->MArgs = impl->MAssociatedAccesors;
21082141
}

sycl/test/abi/sycl_symbols_linux.dump

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3490,6 +3490,7 @@ _ZN4sycl3_V17handler10depends_onERKSt6vectorINS0_5eventESaIS3_EE
34903490
_ZN4sycl3_V17handler10depends_onERKSt6vectorISt10shared_ptrINS0_6detail10event_implEESaIS6_EE
34913491
_ZN4sycl3_V17handler10mem_adviseEPKvmi
34923492
_ZN4sycl3_V17handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb
3493+
_ZN4sycl3_V17handler10reseveArgsEi
34933494
_ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvNS0_14interop_handleEEE
34943495
_ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvvEE
34953496
_ZN4sycl3_V17handler11copyCodeLocERKS1_
@@ -3552,6 +3553,7 @@ _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEES3_NS0_2idILi3
35523553
_ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEEbi
35533554
_ZN4sycl3_V17handler27addLifetimeSharedPtrStorageESt10shared_ptrIKvE
35543555
_ZN4sycl3_V17handler27computeFallbackKernelBoundsEmm
3556+
_ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcRKNS0_6detail19kernel_param_desc_tEbmRm
35553557
_ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcRKSt6vectorINS0_6detail19kernel_param_desc_tESaIS5_EEb
35563558
_ZN4sycl3_V17handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb
35573559
_ZN4sycl3_V17handler28memcpyToHostOnlyDeviceGlobalEPKvS3_mbmm

0 commit comments

Comments
 (0)