@@ -3705,7 +3705,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3705
3705
3706
3706
// For large transfers use synchronous behavior.
3707
3707
// If OMPT is enabled or synchronous behavior is explicitly requested:
3708
+ // FIXME: Currently hsa async copy fails to see completion signal for
3709
+ // non-x86 dataSubmit/Retrieve. Other non-x86 calls to asyncMemCopy
3710
+ // work. So for now, skip async copy for non-x86 for dataSubmit
3711
+ // and dataRetrive only.
3712
+ #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)
3708
3713
if (OMPX_ForceSyncRegions || Size >= OMPX_MaxAsyncCopyBytes) {
3714
+ #else
3715
+ if (false ) {
3716
+ #endif
3709
3717
if (AsyncInfoWrapper.hasQueue ())
3710
3718
if (auto Err = synchronize (AsyncInfoWrapper))
3711
3719
return Err;
@@ -3792,7 +3800,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
3792
3800
3793
3801
// For large transfers use synchronous behavior.
3794
3802
// If OMPT is enabled or synchronous behavior is explicitly requested:
3803
+ // FIXME: Currently hsa async copy fails to see completion signal for
3804
+ // non-x86 dataSubmit/Retrieve. Other non-x86 calls to asyncMemCopy
3805
+ // work. So for now, skip async copy for non-x86 for dataSubmit
3806
+ // and dataRetrive only.
3807
+ #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)
3795
3808
if (OMPX_ForceSyncRegions || Size >= OMPX_MaxAsyncCopyBytes) {
3809
+ #else
3810
+ if (false ) {
3811
+ #endif
3796
3812
if (AsyncInfoWrapper.hasQueue ())
3797
3813
if (auto Err = synchronize (AsyncInfoWrapper))
3798
3814
return Err;
0 commit comments