@@ -309,6 +309,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
309
309
params.external_linkage = _external_linkage;
310
310
size_t compile_for[] = { jl_typeinf_world, _world };
311
311
for (int worlds = 0 ; worlds < 2 ; worlds++) {
312
+ JL_TIMING (NATIVE_AOT, NATIVE_Codegen);
312
313
params.world = compile_for[worlds];
313
314
if (!params.world )
314
315
continue ;
@@ -390,37 +391,40 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
390
391
391
392
// clones the contents of the module `m` to the shadow_output collector
392
393
// while examining and recording what kind of function pointer we have
393
- Linker L (*clone.getModuleUnlocked ());
394
- for (auto &def : emitted) {
395
- jl_merge_module (clone, std::move (std::get<0 >(def.second )));
396
- jl_code_instance_t *this_code = def.first ;
397
- jl_llvm_functions_t decls = std::get<1 >(def.second );
398
- StringRef func = decls.functionObject ;
399
- StringRef cfunc = decls.specFunctionObject ;
400
- uint32_t func_id = 0 ;
401
- uint32_t cfunc_id = 0 ;
402
- if (func == " jl_fptr_args" ) {
403
- func_id = -1 ;
404
- }
405
- else if (func == " jl_fptr_sparam" ) {
406
- func_id = -2 ;
407
- }
408
- else {
409
- // Safe b/c context is locked by params
410
- data->jl_sysimg_fvars .push_back (cast<Function>(clone.getModuleUnlocked ()->getNamedValue (func)));
411
- func_id = data->jl_sysimg_fvars .size ();
394
+ {
395
+ JL_TIMING (NATIVE_AOT, NATIVE_Merge);
396
+ Linker L (*clone.getModuleUnlocked ());
397
+ for (auto &def : emitted) {
398
+ jl_merge_module (clone, std::move (std::get<0 >(def.second )));
399
+ jl_code_instance_t *this_code = def.first ;
400
+ jl_llvm_functions_t decls = std::get<1 >(def.second );
401
+ StringRef func = decls.functionObject ;
402
+ StringRef cfunc = decls.specFunctionObject ;
403
+ uint32_t func_id = 0 ;
404
+ uint32_t cfunc_id = 0 ;
405
+ if (func == " jl_fptr_args" ) {
406
+ func_id = -1 ;
407
+ }
408
+ else if (func == " jl_fptr_sparam" ) {
409
+ func_id = -2 ;
410
+ }
411
+ else {
412
+ // Safe b/c context is locked by params
413
+ data->jl_sysimg_fvars .push_back (cast<Function>(clone.getModuleUnlocked ()->getNamedValue (func)));
414
+ func_id = data->jl_sysimg_fvars .size ();
415
+ }
416
+ if (!cfunc.empty ()) {
417
+ // Safe b/c context is locked by params
418
+ data->jl_sysimg_fvars .push_back (cast<Function>(clone.getModuleUnlocked ()->getNamedValue (cfunc)));
419
+ cfunc_id = data->jl_sysimg_fvars .size ();
420
+ }
421
+ data->jl_fvar_map [this_code] = std::make_tuple (func_id, cfunc_id);
412
422
}
413
- if (!cfunc. empty () ) {
414
- // Safe b/c context is locked by params
415
- data-> jl_sysimg_fvars . push_back (cast<Function>(clone. getModuleUnlocked ()-> getNamedValue (cfunc)) );
416
- cfunc_id = data-> jl_sysimg_fvars . size () ;
423
+ if (params. _shared_module ) {
424
+ bool error = L. linkInModule ( std::move ( params. _shared_module ));
425
+ assert (!error && " Error linking in shared module " );
426
+ ( void )error ;
417
427
}
418
- data->jl_fvar_map [this_code] = std::make_tuple (func_id, cfunc_id);
419
- }
420
- if (params._shared_module ) {
421
- bool error = L.linkInModule (std::move (params._shared_module ));
422
- assert (!error && " Error linking in shared module" );
423
- (void )error;
424
428
}
425
429
426
430
// now get references to the globals in the merged module
@@ -986,58 +990,60 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
986
990
}
987
991
assert (!verifyLLVMIR (M));
988
992
989
- timers.optimize .startTimer ();
993
+ {
994
+ timers.optimize .startTimer ();
990
995
991
996
#ifndef JL_USE_NEW_PM
992
- legacy::PassManager optimizer;
993
- addTargetPasses (&optimizer, TM->getTargetTriple (), TM->getTargetIRAnalysis ());
994
- addOptimizationPasses (&optimizer, jl_options.opt_level , true , true );
995
- addMachinePasses (&optimizer, jl_options.opt_level );
997
+ legacy::PassManager optimizer;
998
+ addTargetPasses (&optimizer, TM->getTargetTriple (), TM->getTargetIRAnalysis ());
999
+ addOptimizationPasses (&optimizer, jl_options.opt_level , true , true );
1000
+ addMachinePasses (&optimizer, jl_options.opt_level );
996
1001
#else
997
1002
998
- auto PMTM = std::unique_ptr<TargetMachine>(
999
- SourceTM.getTarget ().createTargetMachine (
1000
- SourceTM.getTargetTriple ().str (),
1001
- SourceTM.getTargetCPU (),
1002
- SourceTM.getTargetFeatureString (),
1003
- SourceTM.Options ,
1004
- SourceTM.getRelocationModel (),
1005
- SourceTM.getCodeModel (),
1006
- SourceTM.getOptLevel ()));
1007
- NewPM optimizer{std::move (PMTM), getOptLevel (jl_options.opt_level ), OptimizationOptions::defaults (true , true )};
1003
+ auto PMTM = std::unique_ptr<TargetMachine>(
1004
+ SourceTM.getTarget ().createTargetMachine (
1005
+ SourceTM.getTargetTriple ().str (),
1006
+ SourceTM.getTargetCPU (),
1007
+ SourceTM.getTargetFeatureString (),
1008
+ SourceTM.Options ,
1009
+ SourceTM.getRelocationModel (),
1010
+ SourceTM.getCodeModel (),
1011
+ SourceTM.getOptLevel ()));
1012
+ NewPM optimizer{std::move (PMTM), getOptLevel (jl_options.opt_level ), OptimizationOptions::defaults (true , true )};
1008
1013
#endif
1009
- optimizer.run (M);
1010
- assert (!verifyLLVMIR (M));
1011
- bool inject_aliases = false ;
1012
- for (auto &F : M.functions ()) {
1013
- if (!F.isDeclaration () && F.getName () != " _DllMainCRTStartup" ) {
1014
- inject_aliases = true ;
1015
- break ;
1014
+ optimizer.run (M);
1015
+ assert (!verifyLLVMIR (M));
1016
+ bool inject_aliases = false ;
1017
+ for (auto &F : M.functions ()) {
1018
+ if (!F.isDeclaration () && F.getName () != " _DllMainCRTStartup" ) {
1019
+ inject_aliases = true ;
1020
+ break ;
1021
+ }
1016
1022
}
1017
- }
1018
- // no need to inject aliases if we have no functions
1023
+ // no need to inject aliases if we have no functions
1019
1024
1020
- if (inject_aliases) {
1025
+ if (inject_aliases) {
1021
1026
#if JULIA_FLOAT16_ABI == 1
1022
- // We would like to emit an alias or an weakref alias to redirect these symbols
1023
- // but LLVM doesn't let us emit a GlobalAlias to a declaration...
1024
- // So for now we inject a definition of these functions that calls our runtime
1025
- // functions. We do so after optimization to avoid cloning these functions.
1026
- injectCRTAlias (M, " __gnu_h2f_ieee" , " julia__gnu_h2f_ieee" ,
1027
- FunctionType::get (Type::getFloatTy (M.getContext ()), { Type::getHalfTy (M.getContext ()) }, false ));
1028
- injectCRTAlias (M, " __extendhfsf2" , " julia__gnu_h2f_ieee" ,
1029
- FunctionType::get (Type::getFloatTy (M.getContext ()), { Type::getHalfTy (M.getContext ()) }, false ));
1030
- injectCRTAlias (M, " __gnu_f2h_ieee" , " julia__gnu_f2h_ieee" ,
1031
- FunctionType::get (Type::getHalfTy (M.getContext ()), { Type::getFloatTy (M.getContext ()) }, false ));
1032
- injectCRTAlias (M, " __truncsfhf2" , " julia__gnu_f2h_ieee" ,
1033
- FunctionType::get (Type::getHalfTy (M.getContext ()), { Type::getFloatTy (M.getContext ()) }, false ));
1034
- injectCRTAlias (M, " __truncdfhf2" , " julia__truncdfhf2" ,
1035
- FunctionType::get (Type::getHalfTy (M.getContext ()), { Type::getDoubleTy (M.getContext ()) }, false ));
1027
+ // We would like to emit an alias or an weakref alias to redirect these symbols
1028
+ // but LLVM doesn't let us emit a GlobalAlias to a declaration...
1029
+ // So for now we inject a definition of these functions that calls our runtime
1030
+ // functions. We do so after optimization to avoid cloning these functions.
1031
+ injectCRTAlias (M, " __gnu_h2f_ieee" , " julia__gnu_h2f_ieee" ,
1032
+ FunctionType::get (Type::getFloatTy (M.getContext ()), { Type::getHalfTy (M.getContext ()) }, false ));
1033
+ injectCRTAlias (M, " __extendhfsf2" , " julia__gnu_h2f_ieee" ,
1034
+ FunctionType::get (Type::getFloatTy (M.getContext ()), { Type::getHalfTy (M.getContext ()) }, false ));
1035
+ injectCRTAlias (M, " __gnu_f2h_ieee" , " julia__gnu_f2h_ieee" ,
1036
+ FunctionType::get (Type::getHalfTy (M.getContext ()), { Type::getFloatTy (M.getContext ()) }, false ));
1037
+ injectCRTAlias (M, " __truncsfhf2" , " julia__gnu_f2h_ieee" ,
1038
+ FunctionType::get (Type::getHalfTy (M.getContext ()), { Type::getFloatTy (M.getContext ()) }, false ));
1039
+ injectCRTAlias (M, " __truncdfhf2" , " julia__truncdfhf2" ,
1040
+ FunctionType::get (Type::getHalfTy (M.getContext ()), { Type::getDoubleTy (M.getContext ()) }, false ));
1036
1041
#else
1037
- emitFloat16Wrappers (M, false );
1042
+ emitFloat16Wrappers (M, false );
1038
1043
#endif
1044
+ }
1045
+ timers.optimize .stopTimer ();
1039
1046
}
1040
- timers.optimize .stopTimer ();
1041
1047
1042
1048
if (opt) {
1043
1049
timers.opt .startTimer ();
@@ -1276,7 +1282,10 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
1276
1282
// Single-threaded case
1277
1283
if (threads == 1 ) {
1278
1284
output_timer.startTimer ();
1279
- outputs[0 ] = add_output_impl (M, TM, timers[0 ], unopt_out, opt_out, obj_out, asm_out);
1285
+ {
1286
+ JL_TIMING (NATIVE_AOT, NATIVE_Opt);
1287
+ outputs[0 ] = add_output_impl (M, TM, timers[0 ], unopt_out, opt_out, obj_out, asm_out);
1288
+ }
1280
1289
output_timer.stopTimer ();
1281
1290
// Don't need M anymore
1282
1291
module_released (M);
@@ -1314,40 +1323,43 @@ static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, Stri
1314
1323
output_timer.startTimer ();
1315
1324
1316
1325
// Start all of the worker threads
1317
- std::vector<std::thread> workers (threads);
1318
- for (unsigned i = 0 ; i < threads; i++) {
1319
- workers[i] = std::thread ([&, i]() {
1320
- LLVMContext ctx;
1321
- // Lazily deserialize the entire module
1322
- timers[i].deserialize .startTimer ();
1323
- auto M = cantFail (getLazyBitcodeModule (MemoryBufferRef (StringRef (serialized.data (), serialized.size ()), " Optimized" ), ctx), " Error loading module" );
1324
- timers[i].deserialize .stopTimer ();
1325
-
1326
- timers[i].materialize .startTimer ();
1327
- materializePreserved (*M, partitions[i]);
1328
- timers[i].materialize .stopTimer ();
1329
-
1330
- timers[i].construct .startTimer ();
1331
- construct_vars (*M, partitions[i]);
1332
- M->setModuleFlag (Module::Error, " julia.mv.suffix" , MDString::get (M->getContext (), " _" + std::to_string (i)));
1333
- // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
1334
- // or it may skip emitting debug info for that file. Here set it to ./julia#N
1335
- DIFile *topfile = DIFile::get (M->getContext (), " julia#" + std::to_string (i), " ." );
1336
- for (DICompileUnit *CU : M->debug_compile_units ())
1337
- CU->replaceOperandWith (0 , topfile);
1338
- timers[i].construct .stopTimer ();
1339
-
1340
- timers[i].deletion .startTimer ();
1341
- dropUnusedGlobals (*M);
1342
- timers[i].deletion .stopTimer ();
1343
-
1344
- outputs[i] = add_output_impl (*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
1345
- });
1346
- }
1326
+ {
1327
+ JL_TIMING (NATIVE_AOT, NATIVE_Opt);
1328
+ std::vector<std::thread> workers (threads);
1329
+ for (unsigned i = 0 ; i < threads; i++) {
1330
+ workers[i] = std::thread ([&, i]() {
1331
+ LLVMContext ctx;
1332
+ // Lazily deserialize the entire module
1333
+ timers[i].deserialize .startTimer ();
1334
+ auto M = cantFail (getLazyBitcodeModule (MemoryBufferRef (StringRef (serialized.data (), serialized.size ()), " Optimized" ), ctx), " Error loading module" );
1335
+ timers[i].deserialize .stopTimer ();
1336
+
1337
+ timers[i].materialize .startTimer ();
1338
+ materializePreserved (*M, partitions[i]);
1339
+ timers[i].materialize .stopTimer ();
1340
+
1341
+ timers[i].construct .startTimer ();
1342
+ construct_vars (*M, partitions[i]);
1343
+ M->setModuleFlag (Module::Error, " julia.mv.suffix" , MDString::get (M->getContext (), " _" + std::to_string (i)));
1344
+ // The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
1345
+ // or it may skip emitting debug info for that file. Here set it to ./julia#N
1346
+ DIFile *topfile = DIFile::get (M->getContext (), " julia#" + std::to_string (i), " ." );
1347
+ for (DICompileUnit *CU : M->debug_compile_units ())
1348
+ CU->replaceOperandWith (0 , topfile);
1349
+ timers[i].construct .stopTimer ();
1350
+
1351
+ timers[i].deletion .startTimer ();
1352
+ dropUnusedGlobals (*M);
1353
+ timers[i].deletion .stopTimer ();
1354
+
1355
+ outputs[i] = add_output_impl (*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
1356
+ });
1357
+ }
1347
1358
1348
- // Wait for all of the worker threads to finish
1349
- for (auto &w : workers)
1350
- w.join ();
1359
+ // Wait for all of the worker threads to finish
1360
+ for (auto &w : workers)
1361
+ w.join ();
1362
+ }
1351
1363
1352
1364
output_timer.stopTimer ();
1353
1365
@@ -1488,6 +1500,7 @@ void jl_dump_native_impl(void *native_code,
1488
1500
SmallVector<AOTOutputs, 16 > data_outputs;
1489
1501
SmallVector<AOTOutputs, 16 > metadata_outputs;
1490
1502
if (z) {
1503
+ JL_TIMING (NATIVE_AOT, NATIVE_Sysimg);
1491
1504
LLVMContext Context;
1492
1505
Module sysimgM (" sysimg" , Context);
1493
1506
sysimgM.setTargetTriple (TheTriple.str ());
@@ -1526,6 +1539,7 @@ void jl_dump_native_impl(void *native_code,
1526
1539
bool has_veccall = false ;
1527
1540
1528
1541
data->M .withModuleDo ([&](Module &dataM) {
1542
+ JL_TIMING (NATIVE_AOT, NATIVE_Setup);
1529
1543
dataM.setTargetTriple (TheTriple.str ());
1530
1544
dataM.setDataLayout (DL);
1531
1545
auto &Context = dataM.getContext ();
@@ -1616,6 +1630,7 @@ void jl_dump_native_impl(void *native_code,
1616
1630
}
1617
1631
1618
1632
{
1633
+ JL_TIMING (NATIVE_AOT, NATIVE_Metadata);
1619
1634
LLVMContext Context;
1620
1635
Module metadataM (" metadata" , Context);
1621
1636
metadataM.setTargetTriple (TheTriple.str ());
@@ -1690,32 +1705,37 @@ void jl_dump_native_impl(void *native_code,
1690
1705
metadata_outputs = compile (metadataM, " data" , 1 , [](Module &) {});
1691
1706
}
1692
1707
1693
- object::Archive::Kind Kind = getDefaultForHost (TheTriple);
1708
+ {
1709
+ JL_TIMING (NATIVE_AOT, NATIVE_Write);
1710
+
1711
+ object::Archive::Kind Kind = getDefaultForHost (TheTriple);
1694
1712
#define WRITE_ARCHIVE (fname, field, prefix, suffix ) \
1695
- if (fname) {\
1696
- std::vector<NewArchiveMember> archive; \
1697
- SmallVector<std::string, 16 > filenames; \
1698
- SmallVector<StringRef, 16 > buffers; \
1699
- for (size_t i = 0 ; i < threads; i++) { \
1700
- filenames.push_back ((StringRef (" text" ) + prefix + " #" + Twine (i) + suffix).str ()); \
1701
- buffers.push_back (StringRef (data_outputs[i].field .data (), data_outputs[i].field .size ())); \
1702
- } \
1703
- filenames.push_back (" metadata" prefix suffix); \
1704
- buffers.push_back (StringRef (metadata_outputs[0 ].field .data (), metadata_outputs[0 ].field .size ())); \
1705
- if (z) { \
1706
- filenames.push_back (" sysimg" prefix suffix); \
1707
- buffers.push_back (StringRef (sysimg_outputs[0 ].field .data (), sysimg_outputs[0 ].field .size ())); \
1708
- } \
1709
- for (size_t i = 0 ; i < filenames.size (); i++) { \
1710
- archive.push_back (NewArchiveMember (MemoryBufferRef (buffers[i], filenames[i]))); \
1711
- } \
1712
- handleAllErrors (writeArchive (fname, archive, true , Kind, true , false ), reportWriterError); \
1713
- }
1714
-
1715
- WRITE_ARCHIVE (unopt_bc_fname, unopt, " _unopt" , " .bc" );
1716
- WRITE_ARCHIVE (bc_fname, opt, " _opt" , " .bc" );
1717
- WRITE_ARCHIVE (obj_fname, obj, " " , " .o" );
1718
- WRITE_ARCHIVE (asm_fname, asm_, " " , " .s" );
1713
+ if (fname) {\
1714
+ std::vector<NewArchiveMember> archive; \
1715
+ SmallVector<std::string, 16 > filenames; \
1716
+ SmallVector<StringRef, 16 > buffers; \
1717
+ for (size_t i = 0 ; i < threads; i++) { \
1718
+ filenames.push_back ((StringRef (" text" ) + prefix + " #" + Twine (i) + suffix).str ()); \
1719
+ buffers.push_back (StringRef (data_outputs[i].field .data (), data_outputs[i].field .size ())); \
1720
+ } \
1721
+ filenames.push_back (" metadata" prefix suffix); \
1722
+ buffers.push_back (StringRef (metadata_outputs[0 ].field .data (), metadata_outputs[0 ].field .size ())); \
1723
+ if (z) { \
1724
+ filenames.push_back (" sysimg" prefix suffix); \
1725
+ buffers.push_back (StringRef (sysimg_outputs[0 ].field .data (), sysimg_outputs[0 ].field .size ())); \
1726
+ } \
1727
+ for (size_t i = 0 ; i < filenames.size (); i++) { \
1728
+ archive.push_back (NewArchiveMember (MemoryBufferRef (buffers[i], filenames[i]))); \
1729
+ } \
1730
+ handleAllErrors (writeArchive (fname, archive, true , Kind, true , false ), reportWriterError); \
1731
+ }
1732
+
1733
+ WRITE_ARCHIVE (unopt_bc_fname, unopt, " _unopt" , " .bc" );
1734
+ WRITE_ARCHIVE (bc_fname, opt, " _opt" , " .bc" );
1735
+ WRITE_ARCHIVE (obj_fname, obj, " " , " .o" );
1736
+ WRITE_ARCHIVE (asm_fname, asm_, " " , " .s" );
1737
+ #undef WRITE_ARCHIVE
1738
+ }
1719
1739
}
1720
1740
1721
1741
void addTargetPasses (legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
0 commit comments