@@ -233,8 +233,6 @@ Expected<std::unique_ptr<llvm::Module>> jit_compiler::compileDeviceCode(
233233 DerivedArgList DAL{UserArgList};
234234 const auto &OptTable = getDriverOptTable ();
235235 DAL.AddFlagArg (nullptr , OptTable.getOption (OPT_fsycl_device_only));
236- DAL.AddFlagArg (nullptr ,
237- OptTable.getOption (OPT_fno_sycl_dead_args_optimization));
238236 DAL.AddJoinedArg (
239237 nullptr , OptTable.getOption (OPT_resource_dir_EQ),
240238 (DPCPPRoot + " /lib/clang/" + Twine (CLANG_VERSION_MAJOR)).str ());
@@ -436,15 +434,35 @@ template <class PassClass> static bool runModulePass(llvm::Module &M) {
436434 return !Res.areAllPreserved ();
437435}
438436
439- llvm::Expected<PostLinkResult> jit_compiler::performPostLink (
440- std::unique_ptr<llvm::Module> Module,
441- [[maybe_unused]] const llvm::opt::InputArgList &UserArgList) {
437+ static IRSplitMode getDeviceCodeSplitMode (const InputArgList &UserArgList) {
438+ // This is the (combined) logic from
439+ // `get[NonTriple|Triple]BasedSYCLPostLinkOpts` in
440+ // `clang/lib/Driver/ToolChains/Clang.cpp`: Default is auto mode, but the user
441+ // can override it by specifying the `-fsycl-device-code-split=` option. The
442+ // no-argument variant `-fsycl-device-code-split` is ignored.
443+ if (auto *Arg = UserArgList.getLastArg (OPT_fsycl_device_code_split_EQ)) {
444+ StringRef ArgVal{Arg->getValue ()};
445+ if (ArgVal == " per_kernel" ) {
446+ return SPLIT_PER_KERNEL;
447+ }
448+ if (ArgVal == " per_source" ) {
449+ return SPLIT_PER_TU;
450+ }
451+ if (ArgVal == " off" ) {
452+ return SPLIT_NONE;
453+ }
454+ }
455+ return SPLIT_AUTO;
456+ }
457+
458+ Expected<PostLinkResult>
459+ jit_compiler::performPostLink (std::unique_ptr<llvm::Module> Module,
460+ const InputArgList &UserArgList) {
442461 // This is a simplified version of `processInputModule` in
443462 // `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality
444463 // left out of the algorithm for now.
445464
446- // TODO: SplitMode can be controlled by the user.
447- const auto SplitMode = SPLIT_NONE;
465+ const auto SplitMode = getDeviceCodeSplitMode (UserArgList);
448466
449467 // TODO: EmitOnlyKernelsAsEntryPoints is controlled by
450468 // `shouldEmitOnlyKernelsAsEntryPoints` in
@@ -480,77 +498,87 @@ llvm::Expected<PostLinkResult> jit_compiler::performPostLink(
480498 return createStringError (" `invoke_simd` calls detected" );
481499 }
482500
483- // TODO: Implement actual device code splitting. We're just using the splitter
484- // to obtain additional information about the module for now.
485-
486501 std::unique_ptr<ModuleSplitterBase> Splitter = getDeviceCodeSplitter (
487502 ModuleDesc{std::move (Module)}, SplitMode,
488503 /* IROutputOnly=*/ false , EmitOnlyKernelsAsEntryPoints);
489504 assert (Splitter->hasMoreSplits ());
490- if (Splitter->remainingSplits () > 1 ) {
491- return createStringError (" Device code requires splitting" );
492- }
493505
494506 // TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall
495507 // be processed.
496508
497- ModuleDesc MDesc = Splitter->nextSplit ();
509+ // TODO: This allocation assumes that there are no further splits required,
510+ // i.e. there are no mixed SYCL/ESIMD modules.
511+ RTCBundleInfo BundleInfo{Splitter->remainingSplits ()};
512+ SmallVector<std::unique_ptr<llvm::Module>> Modules;
498513
499- // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when
500- // `invoke_simd` is supported.
514+ auto *DevImgInfoIt = BundleInfo.begin ();
515+ while (Splitter->hasMoreSplits ()) {
516+ assert (DevImgInfoIt != BundleInfo.end ());
501517
502- SmallVector<ModuleDesc, 2 > ESIMDSplits =
503- splitByESIMD (std::move (MDesc), EmitOnlyKernelsAsEntryPoints);
504- assert (!ESIMDSplits.empty ());
505- if (ESIMDSplits.size () > 1 ) {
506- return createStringError (" Mixing SYCL and ESIMD code is unsupported" );
507- }
508- MDesc = std::move (ESIMDSplits.front ());
518+ ModuleDesc MDesc = Splitter->nextSplit ();
519+ RTCDevImgInfo &DevImgInfo = *DevImgInfoIt++;
509520
510- if (MDesc.isESIMD ()) {
511- // `sycl-post-link` has a `-lower-esimd` option, but there's no clang driver
512- // option to influence it. Rather, the driver sets it unconditionally in the
513- // multi-file output mode, which we are mimicking here.
514- lowerEsimdConstructs (MDesc, PerformOpts);
515- }
521+ // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when
522+ // `invoke_simd` is supported.
516523
517- MDesc.saveSplitInformationAsMetadata ();
518-
519- RTCBundleInfo BundleInfo;
520- BundleInfo.SymbolTable = FrozenSymbolTable{MDesc.entries ().size ()};
521- transform (MDesc.entries (), BundleInfo.SymbolTable .begin (),
522- [](Function *F) { return F->getName (); });
523-
524- // TODO: Determine what is requested.
525- GlobalBinImageProps PropReq{
526- /* EmitKernelParamInfo=*/ true , /* EmitProgramMetadata=*/ true ,
527- /* EmitExportedSymbols=*/ true , /* EmitImportedSymbols=*/ true ,
528- /* DeviceGlobals=*/ false };
529- PropertySetRegistry Properties =
530- computeModuleProperties (MDesc.getModule (), MDesc.entries (), PropReq);
531- // TODO: Manually add `compile_target` property as in
532- // `saveModuleProperties`?
533- const auto &PropertySets = Properties.getPropSets ();
534-
535- BundleInfo.Properties = FrozenPropertyRegistry{PropertySets.size ()};
536- for (auto &&[KV, FrozenPropSet] : zip (PropertySets, BundleInfo.Properties )) {
537- const auto &PropertySetName = KV.first ;
538- const auto &PropertySet = KV.second ;
539- FrozenPropSet =
540- FrozenPropertySet{PropertySetName.str (), PropertySet.size ()};
541- for (auto &&[KV2, FrozenProp] : zip (PropertySet, FrozenPropSet.Values )) {
542- const auto &PropertyName = KV2.first ;
543- const auto &PropertyValue = KV2.second ;
544- FrozenProp = PropertyValue.getType () == PropertyValue::Type::UINT32
545- ? FrozenPropertyValue{PropertyName.str (),
546- PropertyValue.asUint32 ()}
547- : FrozenPropertyValue{
548- PropertyName.str (), PropertyValue.asRawByteArray (),
549- PropertyValue.getRawByteArraySize ()};
524+ SmallVector<ModuleDesc, 2 > ESIMDSplits =
525+ splitByESIMD (std::move (MDesc), EmitOnlyKernelsAsEntryPoints);
526+ assert (!ESIMDSplits.empty ());
527+ if (ESIMDSplits.size () > 1 ) {
528+ return createStringError (" Mixing SYCL and ESIMD code is unsupported" );
550529 }
551- };
530+ MDesc = std::move (ESIMDSplits.front ());
531+
532+ if (MDesc.isESIMD ()) {
533+ // `sycl-post-link` has a `-lower-esimd` option, but there's no clang
534+ // driver option to influence it. Rather, the driver sets it
535+ // unconditionally in the multi-file output mode, which we are mimicking
536+ // here.
537+ lowerEsimdConstructs (MDesc, PerformOpts);
538+ }
539+
540+ MDesc.saveSplitInformationAsMetadata ();
541+
542+ DevImgInfo.SymbolTable = FrozenSymbolTable{MDesc.entries ().size ()};
543+ transform (MDesc.entries (), DevImgInfo.SymbolTable .begin (),
544+ [](Function *F) { return F->getName (); });
545+
546+ // TODO: Determine what is requested.
547+ GlobalBinImageProps PropReq{
548+ /* EmitKernelParamInfo=*/ true , /* EmitProgramMetadata=*/ true ,
549+ /* EmitExportedSymbols=*/ true , /* EmitImportedSymbols=*/ true ,
550+ /* DeviceGlobals=*/ false };
551+ PropertySetRegistry Properties =
552+ computeModuleProperties (MDesc.getModule (), MDesc.entries (), PropReq);
553+ // TODO: Manually add `compile_target` property as in
554+ // `saveModuleProperties`?
555+ const auto &PropertySets = Properties.getPropSets ();
556+
557+ DevImgInfo.Properties = FrozenPropertyRegistry{PropertySets.size ()};
558+ for (auto [KV, FrozenPropSet] :
559+ zip_equal (PropertySets, DevImgInfo.Properties )) {
560+ const auto &PropertySetName = KV.first ;
561+ const auto &PropertySet = KV.second ;
562+ FrozenPropSet =
563+ FrozenPropertySet{PropertySetName.str (), PropertySet.size ()};
564+ for (auto [KV2, FrozenProp] :
565+ zip_equal (PropertySet, FrozenPropSet.Values )) {
566+ const auto &PropertyName = KV2.first ;
567+ const auto &PropertyValue = KV2.second ;
568+ FrozenProp =
569+ PropertyValue.getType () == PropertyValue::Type::UINT32
570+ ? FrozenPropertyValue{PropertyName.str (),
571+ PropertyValue.asUint32 ()}
572+ : FrozenPropertyValue{PropertyName.str (),
573+ PropertyValue.asRawByteArray (),
574+ PropertyValue.getRawByteArraySize ()};
575+ }
576+ };
577+
578+ Modules.push_back (MDesc.releaseModulePtr ());
579+ }
552580
553- return PostLinkResult{std::move (BundleInfo), MDesc. releaseModulePtr ( )};
581+ return PostLinkResult{std::move (BundleInfo), std::move (Modules )};
554582}
555583
556584Expected<InputArgList>
@@ -607,21 +635,10 @@ jit_compiler::parseUserArgs(View<const char *> UserArgs) {
607635 }
608636 }
609637
610- if (auto DCSMode = AL.getLastArgValue (OPT_fsycl_device_code_split_EQ, " none" );
611- DCSMode != " none" && DCSMode != " auto" ) {
612- return createStringError (" Device code splitting is not yet supported" );
613- }
614-
615638 if (!AL.hasFlag (OPT_fsycl_device_code_split_esimd,
616639 OPT_fno_sycl_device_code_split_esimd, true )) {
617640 return createStringError (" ESIMD device code split cannot be deactivated" );
618641 }
619642
620- if (AL.hasFlag (OPT_fsycl_dead_args_optimization,
621- OPT_fno_sycl_dead_args_optimization, false )) {
622- return createStringError (
623- " Dead argument optimization must be disabled for runtime compilation" );
624- }
625-
626643 return std::move (AL);
627644}
0 commit comments