@@ -78,8 +78,13 @@ bool PluginManager::initializePlugin(GenericPluginTy &Plugin) {
7878
7979bool PluginManager::initializeDevice (GenericPluginTy &Plugin,
8080 int32_t DeviceId) {
81- if (Plugin.is_device_initialized (DeviceId))
81+ if (Plugin.is_device_initialized (DeviceId)) {
82+ auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor ();
83+ (*ExclusiveDevicesAccessor)[PM->DeviceIds [std::make_pair (&Plugin,
84+ DeviceId)]]
85+ ->setHasPendingImages (true );
8286 return true ;
87+ }
8388
8489 // Initialize the device information for the RTL we are about to use.
8590 auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor ();
@@ -286,13 +291,194 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
286291 DP (" Done unregistering library!\n " );
287292}
288293
294+ // / Map global data and execute pending ctors
295+ static int loadImagesOntoDevice (DeviceTy &Device) {
296+ /*
297+ * Map global data
298+ */
299+ int32_t DeviceId = Device.DeviceID ;
300+ int Rc = OFFLOAD_SUCCESS;
301+ {
302+ std::lock_guard<decltype (PM->TrlTblMtx )> LG (PM->TrlTblMtx );
303+ for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder ) {
304+ TranslationTable *TransTable =
305+ &PM->HostEntriesBeginToTransTable [HostEntriesBegin];
306+ DP (" Trans table %p : %p\n " , TransTable->HostTable .EntriesBegin ,
307+ TransTable->HostTable .EntriesEnd );
308+ if (TransTable->HostTable .EntriesBegin ==
309+ TransTable->HostTable .EntriesEnd ) {
310+ // No host entry so no need to proceed
311+ continue ;
312+ }
313+
314+ if (TransTable->TargetsTable [DeviceId] != 0 ) {
315+ // Library entries have already been processed
316+ continue ;
317+ }
318+
319+ // 1) get image.
320+ assert (TransTable->TargetsImages .size () > (size_t )DeviceId &&
321+ " Not expecting a device ID outside the table's bounds!" );
322+ __tgt_device_image *Img = TransTable->TargetsImages [DeviceId];
323+ if (!Img) {
324+ REPORT (" No image loaded for device id %d.\n " , DeviceId);
325+ Rc = OFFLOAD_FAIL;
326+ break ;
327+ }
328+
329+ // 2) Load the image onto the given device.
330+ auto BinaryOrErr = Device.loadBinary (Img);
331+ if (llvm::Error Err = BinaryOrErr.takeError ()) {
332+ REPORT (" Failed to load image %s\n " ,
333+ llvm::toString (std::move (Err)).c_str ());
334+ Rc = OFFLOAD_FAIL;
335+ break ;
336+ }
337+
338+ // 3) Create the translation table.
339+ llvm::SmallVector<__tgt_offload_entry> &DeviceEntries =
340+ TransTable->TargetsEntries [DeviceId];
341+ for (__tgt_offload_entry &Entry :
342+ llvm::make_range (Img->EntriesBegin , Img->EntriesEnd )) {
343+ __tgt_device_binary &Binary = *BinaryOrErr;
344+
345+ __tgt_offload_entry DeviceEntry = Entry;
346+ if (Entry.size ) {
347+ if (Device.RTL ->get_global (Binary, Entry.size , Entry.name ,
348+ &DeviceEntry.addr ) != OFFLOAD_SUCCESS)
349+ REPORT (" Failed to load symbol %s\n " , Entry.name );
350+
351+ // If unified memory is active, the corresponding global is a device
352+ // reference to the host global. We need to initialize the pointer on
353+ // the device to point to the memory on the host.
354+ if ((PM->getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
355+ (PM->getRequirements () & OMPX_REQ_AUTO_ZERO_COPY)) {
356+ if (Device.RTL ->data_submit (DeviceId, DeviceEntry.addr , Entry.addr ,
357+ Entry.size ) != OFFLOAD_SUCCESS)
358+ REPORT (" Failed to write symbol for USM %s\n " , Entry.name );
359+ }
360+ } else if (Entry.addr ) {
361+ if (Device.RTL ->get_function (Binary, Entry.name , &DeviceEntry.addr ) !=
362+ OFFLOAD_SUCCESS)
363+ REPORT (" Failed to load kernel %s\n " , Entry.name );
364+ }
365+ DP (" Entry point " DPxMOD " maps to%s %s (" DPxMOD " )\n " ,
366+ DPxPTR (Entry.addr ), (Entry.size ) ? " global" : " " , Entry.name ,
367+ DPxPTR (DeviceEntry.addr ));
368+
369+ DeviceEntries.emplace_back (DeviceEntry);
370+ }
371+
372+ // Set the storage for the table and get a pointer to it.
373+ __tgt_target_table DeviceTable{&DeviceEntries[0 ],
374+ &DeviceEntries[0 ] + DeviceEntries.size ()};
375+ TransTable->DeviceTables [DeviceId] = DeviceTable;
376+ __tgt_target_table *TargetTable = TransTable->TargetsTable [DeviceId] =
377+ &TransTable->DeviceTables [DeviceId];
378+
379+ // 4) Verify whether the two table sizes match.
380+ size_t Hsize =
381+ TransTable->HostTable .EntriesEnd - TransTable->HostTable .EntriesBegin ;
382+ size_t Tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin ;
383+
384+ // Invalid image for these host entries!
385+ if (Hsize != Tsize) {
386+ REPORT (
387+ " Host and Target tables mismatch for device id %d [%zx != %zx].\n " ,
388+ DeviceId, Hsize, Tsize);
389+ TransTable->TargetsImages [DeviceId] = 0 ;
390+ TransTable->TargetsTable [DeviceId] = 0 ;
391+ Rc = OFFLOAD_FAIL;
392+ break ;
393+ }
394+
395+ MappingInfoTy::HDTTMapAccessorTy HDTTMap =
396+ Device.getMappingInfo ().HostDataToTargetMap .getExclusiveAccessor ();
397+
398+ __tgt_target_table *HostTable = &TransTable->HostTable ;
399+ for (__tgt_offload_entry *CurrDeviceEntry = TargetTable->EntriesBegin ,
400+ *CurrHostEntry = HostTable->EntriesBegin ,
401+ *EntryDeviceEnd = TargetTable->EntriesEnd ;
402+ CurrDeviceEntry != EntryDeviceEnd;
403+ CurrDeviceEntry++, CurrHostEntry++) {
404+ if (CurrDeviceEntry->size == 0 )
405+ continue ;
406+
407+ assert (CurrDeviceEntry->size == CurrHostEntry->size &&
408+ " data size mismatch" );
409+
410+ // Fortran may use multiple weak declarations for the same symbol,
411+ // therefore we must allow for multiple weak symbols to be loaded from
412+ // the fat binary. Treat these mappings as any other "regular"
413+ // mapping. Add entry to map.
414+ if (Device.getMappingInfo ().getTgtPtrBegin (HDTTMap, CurrHostEntry->addr ,
415+ CurrHostEntry->size ))
416+ continue ;
417+
418+ void *CurrDeviceEntryAddr = CurrDeviceEntry->addr ;
419+
420+ // For indirect mapping, follow the indirection and map the actual
421+ // target.
422+ if (CurrDeviceEntry->flags & OMP_DECLARE_TARGET_INDIRECT) {
423+ AsyncInfoTy AsyncInfo (Device);
424+ void *DevPtr;
425+ Device.retrieveData (&DevPtr, CurrDeviceEntryAddr, sizeof (void *),
426+ AsyncInfo, /* Entry=*/ nullptr , &HDTTMap);
427+ if (AsyncInfo.synchronize () != OFFLOAD_SUCCESS)
428+ return OFFLOAD_FAIL;
429+ CurrDeviceEntryAddr = DevPtr;
430+ }
431+
432+ DP (" Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
433+ " , name \" %s\"\n " ,
434+ DPxPTR (CurrHostEntry->addr ), DPxPTR (CurrDeviceEntry->addr ),
435+ CurrDeviceEntry->size , CurrDeviceEntry->name );
436+ HDTTMap->emplace (new HostDataToTargetTy (
437+ (uintptr_t )CurrHostEntry->addr /* HstPtrBase*/ ,
438+ (uintptr_t )CurrHostEntry->addr /* HstPtrBegin*/ ,
439+ (uintptr_t )CurrHostEntry->addr + CurrHostEntry->size /* HstPtrEnd*/ ,
440+ (uintptr_t )CurrDeviceEntryAddr /* TgtAllocBegin*/ ,
441+ (uintptr_t )CurrDeviceEntryAddr /* TgtPtrBegin*/ ,
442+ false /* UseHoldRefCount*/ , CurrHostEntry->name ,
443+ true /* IsRefCountINF*/ ));
444+
445+ // Notify about the new mapping.
446+ if (Device.notifyDataMapped (CurrHostEntry->addr , CurrHostEntry->size ))
447+ return OFFLOAD_FAIL;
448+ }
449+ }
450+ Device.setHasPendingImages (false );
451+ }
452+
453+ if (Rc != OFFLOAD_SUCCESS)
454+ return Rc;
455+
456+ static Int32Envar DumpOffloadEntries =
457+ Int32Envar (" OMPTARGET_DUMP_OFFLOAD_ENTRIES" , -1 );
458+ if (DumpOffloadEntries.get () == DeviceId)
459+ Device.dumpOffloadEntries ();
460+
461+ return OFFLOAD_SUCCESS;
462+ }
463+
289464Expected<DeviceTy &> PluginManager::getDevice (uint32_t DeviceNo) {
290- auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor ();
291- if (DeviceNo >= ExclusiveDevicesAccessor->size ())
292- return createStringError (
293- inconvertibleErrorCode (),
294- " Device number '%i' out of range, only %i devices available" , DeviceNo,
295- ExclusiveDevicesAccessor->size ());
465+ DeviceTy *DevicePtr;
466+ {
467+ auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor ();
468+ if (DeviceNo >= ExclusiveDevicesAccessor->size ())
469+ return createStringError (
470+ inconvertibleErrorCode (),
471+ " Device number '%i' out of range, only %i devices available" ,
472+ DeviceNo, ExclusiveDevicesAccessor->size ());
473+
474+ DevicePtr = &*(*ExclusiveDevicesAccessor)[DeviceNo];
475+ }
296476
297- return *(*ExclusiveDevicesAccessor)[DeviceNo];
477+ // Check whether global data has been mapped for this device
478+ if (DevicePtr->hasPendingImages ())
479+ if (loadImagesOntoDevice (*DevicePtr) != OFFLOAD_SUCCESS)
480+ return createStringError (inconvertibleErrorCode (),
481+ " Failed to load images on device '%i'" ,
482+ DeviceNo);
483+ return *DevicePtr;
298484}
0 commit comments