@@ -293,6 +293,13 @@ Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
293293 return *Path;
294294}
295295
296+ // / We will defer LTO to the target's linker if we are not doing JIT and it is
297+ // / supported by the toolchain.
298+ bool linkerSupportsLTO (const ArgList &Args) {
299+ llvm::Triple Triple (Args.getLastArgValue (OPT_triple_EQ));
300+ return Triple.isNVPTX () || Triple.isAMDGPU ();
301+ }
302+
296303// / Returns the hashed value for a constant string.
297304std::string getHash (StringRef Str) {
298305 llvm::MD5 Hasher;
@@ -555,18 +562,23 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
555562 llvm::copy (LinkerArgs, std::back_inserter (CmdArgs));
556563 }
557564
558- // Pass on -mllvm options to the clang invocation.
559- for (const opt::Arg *Arg : Args.filtered (OPT_mllvm)) {
560- CmdArgs.push_back (" -mllvm" );
561- CmdArgs.push_back (Arg->getValue ());
562- }
565+ // Pass on -mllvm options to the linker invocation.
566+ for (const opt::Arg *Arg : Args.filtered (OPT_mllvm))
567+ CmdArgs.push_back (
568+ Args.MakeArgString (" -Wl,-mllvm=" + StringRef (Arg->getValue ())));
563569
564570 if (Args.hasArg (OPT_debug))
565571 CmdArgs.push_back (" -g" );
566572
567573 if (SaveTemps)
568574 CmdArgs.push_back (" -save-temps" );
569575
576+ if (SaveTemps && linkerSupportsLTO (Args))
577+ CmdArgs.push_back (" -Wl,--save-temps" );
578+
579+ if (Args.hasArg (OPT_embed_bitcode))
580+ CmdArgs.push_back (" -Wl,--lto-emit-llvm" );
581+
570582 if (Verbose)
571583 CmdArgs.push_back (" -v" );
572584
@@ -587,8 +599,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
587599 Args.MakeArgString (Arg.split (' =' ).second )});
588600 }
589601
590- // The OpenMPOpt pass can introduce new calls and is expensive, we do not want
591- // this when running CodeGen through clang.
602+ // The OpenMPOpt pass can introduce new calls and is expensive, we do
603+ // not want this when running CodeGen through clang.
592604 if (Args.hasArg (OPT_clang_backend) || Args.hasArg (OPT_builtin_bitcode_EQ))
593605 CmdArgs.append ({" -mllvm" , " -openmp-opt-disable" });
594606
@@ -772,8 +784,9 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
772784 BumpPtrAllocator Alloc;
773785 StringSaver Saver (Alloc);
774786
775- // Search for bitcode files in the input and create an LTO input file. If it
776- // is not a bitcode file, scan its symbol table for symbols we need to save.
787+ // Search for bitcode files in the input and create an LTO input file. If
788+ // it is not a bitcode file, scan its symbol table for symbols we need to
789+ // save.
777790 for (OffloadFile &File : InputFiles) {
778791 MemoryBufferRef Buffer = MemoryBufferRef (File.getBinary ()->getImage (), " " );
779792
@@ -807,7 +820,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
807820 if (!Name)
808821 return Name.takeError ();
809822
810- // Record if we've seen these symbols in any object or shared libraries.
823+ // Record if we've seen these symbols in any object or shared
824+ // libraries.
811825 if ((*ObjFile)->isRelocatableObject ())
812826 UsedInRegularObj.insert (Saver.save (*Name));
813827 else
@@ -844,17 +858,18 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
844858 return false ;
845859 };
846860
847- // We assume visibility of the whole program if every input file was bitcode.
861+ // We assume visibility of the whole program if every input file was
862+ // bitcode.
848863 auto Features = getTargetFeatures (BitcodeInputFiles);
849864 auto LTOBackend = Args.hasArg (OPT_embed_bitcode) ||
850865 Args.hasArg (OPT_builtin_bitcode_EQ) ||
851866 Args.hasArg (OPT_clang_backend)
852867 ? createLTO (Args, Features, OutputBitcode)
853868 : createLTO (Args, Features);
854869
855- // We need to resolve the symbols so the LTO backend knows which symbols need
856- // to be kept or can be internalized. This is a simplified symbol resolution
857- // scheme to approximate the full resolution a linker would do.
870+ // We need to resolve the symbols so the LTO backend knows which symbols
871+ // need to be kept or can be internalized. This is a simplified symbol
872+ // resolution scheme to approximate the full resolution a linker would do.
858873 uint64_t Idx = 0 ;
859874 DenseSet<StringRef> PrevailingSymbols;
860875 for (auto &BitcodeInput : BitcodeInputFiles) {
@@ -886,7 +901,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
886901 // We need LTO to preseve the following global symbols:
887902 // 1) Symbols used in regular objects.
888903 // 2) Sections that will be given a __start/__stop symbol.
889- // 3) Prevailing symbols that are needed visible to external libraries.
904+ // 3) Prevailing symbols that are needed visible to external
905+ // libraries.
890906 Res.VisibleToRegularObj =
891907 UsedInRegularObj.contains (Sym.getName ()) ||
892908 isValidCIdentifier (Sym.getSectionName ()) ||
@@ -901,9 +917,9 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
901917 (UsedInSharedLib.contains (Sym.getName ()) ||
902918 !Sym.canBeOmittedFromSymbolTable ());
903919
904- // The final definition will reside in this linkage unit if the symbol is
905- // defined and local to the module. This only checks for bitcode files,
906- // full assertion will require complete symbol resolution.
920+ // The final definition will reside in this linkage unit if the symbol
921+ // is defined and local to the module. This only checks for bitcode
922+ // files, full assertion will require complete symbol resolution.
907923 Res.FinalDefinitionInLinkageUnit =
908924 Sym.getVisibility () != GlobalValue::DefaultVisibility &&
909925 (!Sym.isUndefined () && !Sym.isCommon ());
@@ -956,8 +972,8 @@ Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
956972 return Error::success ();
957973 }
958974
959- // Append the new inputs to the device linker input. If the user requested an
960- // internalizing link we need to pass the bitcode to clang.
975+ // Append the new inputs to the device linker input. If the user requested
976+ // an internalizing link we need to pass the bitcode to clang.
961977 for (StringRef File :
962978 Args.hasArg (OPT_clang_backend) || Args.hasArg (OPT_builtin_bitcode_EQ)
963979 ? BitcodeOutput
@@ -972,10 +988,9 @@ Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
972988
973989 StringRef Prefix =
974990 sys::path::stem (Binary.getMemoryBufferRef ().getBufferIdentifier ());
975- StringRef Suffix = getImageKindName (Binary.getImageKind ());
976991
977992 auto TempFileOrErr = createOutputFile (
978- Prefix + " -" + Binary.getTriple () + " -" + Binary.getArch (), Suffix );
993+ Prefix + " -" + Binary.getTriple () + " -" + Binary.getArch (), " o " );
979994 if (!TempFileOrErr)
980995 return TempFileOrErr.takeError ();
981996
@@ -1188,8 +1203,8 @@ DerivedArgList getLinkerArgs(ArrayRef<OffloadFile> Input,
11881203 DAL.AddJoinedArg (nullptr , Tbl.getOption (OPT_triple_EQ),
11891204 Args.MakeArgString (Input.front ().getBinary ()->getTriple ()));
11901205
1191- // If every input file is bitcode we have whole program visibility as we do
1192- // only support static linking with bitcode.
1206+ // If every input file is bitcode we have whole program visibility as we
1207+ // do only support static linking with bitcode.
11931208 auto ContainsBitcode = [](const OffloadFile &F) {
11941209 return identify_magic (F.getBinary ()->getImage ()) == file_magic::bitcode;
11951210 };
@@ -1277,12 +1292,15 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
12771292 if (File.getBinary ()->getOffloadKind () != OFK_None)
12781293 ActiveOffloadKinds.insert (File.getBinary ()->getOffloadKind ());
12791294
1280- // First link and remove all the input files containing bitcode.
1295+ // First link and remove all the input files containing bitcode if
1296+ // the target linker does not support it natively.
12811297 SmallVector<StringRef> InputFiles;
1282- if (Error Err = linkBitcodeFiles (Input, InputFiles, LinkerArgs))
1283- return Err;
1298+ if (!linkerSupportsLTO (LinkerArgs))
1299+ if (Error Err = linkBitcodeFiles (Input, InputFiles, LinkerArgs))
1300+ return Err;
12841301
1285- // Write any remaining device inputs to an output file for the linker.
1302+ // Write any remaining device inputs to an output file for the
1303+ // linker.
12861304 for (const OffloadFile &File : Input) {
12871305 auto FileNameOrErr = writeOffloadFile (File);
12881306 if (!FileNameOrErr)
@@ -1291,9 +1309,10 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
12911309 }
12921310
12931311 // Link the remaining device files using the device linker.
1294- auto OutputOrErr = !Args.hasArg (OPT_embed_bitcode)
1295- ? linkDevice (InputFiles, LinkerArgs)
1296- : InputFiles.front ();
1312+ auto OutputOrErr =
1313+ !Args.hasArg (OPT_embed_bitcode) || linkerSupportsLTO (LinkerArgs)
1314+ ? linkDevice (InputFiles, LinkerArgs)
1315+ : InputFiles.front ();
12971316 if (!OutputOrErr)
12981317 return OutputOrErr.takeError ();
12991318
@@ -1420,12 +1439,14 @@ Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, OffloadKind Kind,
14201439 bool NewSymbol = Syms.count (Sym.getName ()) == 0 ;
14211440 auto OldSym = NewSymbol ? Sym_None : Syms[Sym.getName ()];
14221441
1423- // We will extract if it defines a currenlty undefined non-weak symbol.
1442+ // We will extract if it defines a currenlty undefined non-weak
1443+ // symbol.
14241444 bool ResolvesStrongReference =
14251445 ((OldSym & Sym_Undefined && !(OldSym & Sym_Weak)) &&
14261446 !Sym.isUndefined ());
1427- // We will extract if it defines a new global symbol visible to the host.
1428- // This is only necessary for code targeting an offloading language.
1447+ // We will extract if it defines a new global symbol visible to the
1448+ // host. This is only necessary for code targeting an offloading
1449+ // language.
14291450 bool NewGlobalSymbol =
14301451 ((NewSymbol || (OldSym & Sym_Undefined)) && !Sym.isUndefined () &&
14311452 !Sym.canBeOmittedFromSymbolTable () && Kind != object::OFK_None &&
@@ -1480,8 +1501,9 @@ Expected<bool> getSymbolsFromObject(const ObjectFile &Obj, OffloadKind Kind,
14801501 !(OldSym & Sym_Weak) &&
14811502 !(*FlagsOrErr & SymbolRef::SF_Undefined);
14821503
1483- // We will extract if it defines a new global symbol visible to the host.
1484- // This is only necessary for code targeting an offloading language.
1504+ // We will extract if it defines a new global symbol visible to the
1505+ // host. This is only necessary for code targeting an offloading
1506+ // language.
14851507 bool NewGlobalSymbol =
14861508 ((NewSymbol || (OldSym & Sym_Undefined)) &&
14871509 !(*FlagsOrErr & SymbolRef::SF_Undefined) && Kind != object::OFK_None &&
@@ -1648,8 +1670,8 @@ getDeviceInput(const ArgList &Args) {
16481670
16491671 Expected<bool > ExtractOrErr =
16501672 getSymbols (Binary.getBinary ()->getImage (),
1651- Binary.getBinary ()->getOffloadKind (), /* IsArchive= */ true ,
1652- Saver, Syms[ID]);
1673+ Binary.getBinary ()->getOffloadKind (),
1674+ /* IsArchive= */ true , Saver, Syms[ID]);
16531675 if (!ExtractOrErr)
16541676 return ExtractOrErr.takeError ();
16551677
0 commit comments