Skip to content

Commit b5929cd

Browse files
authored
Merge pull request #45801 from makortel/alpakaCopyPostCopy
Add possibility for a `CopyToHost::postCopy()` operation
2 parents bc5cfae + 0db641f commit b5929cd

File tree

16 files changed

+282
-2
lines changed

16 files changed

+282
-2
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#ifndef DataFormats_PortableTest_interface_TestProductWithPtr_h
2+
#define DataFormats_PortableTest_interface_TestProductWithPtr_h
3+
4+
#include "DataFormats/Portable/interface/PortableCollection.h"
5+
#include "DataFormats/SoATemplate/interface/SoACommon.h"
6+
#include "DataFormats/SoATemplate/interface/SoALayout.h"
7+
#include "DataFormats/SoATemplate/interface/SoAView.h"
8+
#include "HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h"
9+
#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h"
10+
11+
#include <alpaka/alpaka.hpp>
12+
13+
/**
14+
* This data product is part of a test for CopyToHost::postCopy()
15+
* (i.e. updating a data product after the device-to-host copy). For
16+
* any practical purposes the indirection to 'buffer' array via the
17+
* 'ptr' pointer scalar is completely unnecessary. Do not take this
18+
* case as an example for good design of a data product.
19+
*/
20+
namespace portabletest {
21+
GENERATE_SOA_LAYOUT(TestSoALayoutWithPtr, SOA_COLUMN(int, buffer), SOA_SCALAR(int*, ptr));
22+
using TestSoAWithPtr = TestSoALayoutWithPtr<>;
23+
24+
template <typename TDev>
25+
using TestProductWithPtr = PortableCollection<TestSoAWithPtr, TDev>;
26+
27+
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void setPtrInTestProductWithPtr(TestSoAWithPtr::View view) {
28+
view.ptr() = &view.buffer(0);
29+
}
30+
} // namespace portabletest
31+
32+
namespace cms::alpakatools {
33+
template <typename TDev>
34+
struct CopyToHost<PortableDeviceCollection<portabletest::TestSoAWithPtr, TDev>> {
35+
template <typename TQueue>
36+
static auto copyAsync(TQueue& queue, PortableDeviceCollection<portabletest::TestSoAWithPtr, TDev> const& src) {
37+
PortableHostCollection<portabletest::TestSoAWithPtr> dst(src->metadata().size(), queue);
38+
alpaka::memcpy(queue, dst.buffer(), src.buffer());
39+
return dst;
40+
}
41+
42+
static void postCopy(PortableHostCollection<portabletest::TestSoAWithPtr>& dst) {
43+
portabletest::setPtrInTestProductWithPtr(dst.view());
44+
}
45+
};
46+
} // namespace cms::alpakatools
47+
48+
#endif

DataFormats/PortableTestObjects/src/alpaka/classes_cuda.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
// these first to make sure they get included before any SoA header
2+
#include <Eigen/Core>
3+
#include <Eigen/Dense>
4+
15
#include "DataFormats/Common/interface/DeviceProduct.h"
26
#include "DataFormats/Common/interface/Wrapper.h"
7+
#include "DataFormats/PortableTestObjects/interface/TestProductWithPtr.h"
38
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
49
#include "DataFormats/PortableTestObjects/interface/TestStruct.h"
510
#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"

DataFormats/PortableTestObjects/src/alpaka/classes_cuda_def.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,8 @@
1414
<class name="alpaka_cuda_async::portabletest::TestDeviceMultiCollection3" persistent="false"/>
1515
<class name="edm::DeviceProduct<alpaka_cuda_async::portabletest::TestDeviceMultiCollection3>" persistent="false"/>
1616
<class name="edm::Wrapper<edm::DeviceProduct<alpaka_cuda_async::portabletest::TestDeviceMultiCollection3>>" persistent="false"/>
17+
18+
<class name="portabletest::TestProductWithPtr<alpaka_cuda_async::Device>"/>
19+
<class name="edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_cuda_async::Device>>"/>
20+
<class name="edm::Wrapper<edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_cuda_async::Device>>>" persistent="false"/>
1721
</lcgdict>

DataFormats/PortableTestObjects/src/alpaka/classes_rocm.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
// these first to make sure they get included before any SoA header
2+
#include <Eigen/Core>
3+
#include <Eigen/Dense>
4+
15
#include "DataFormats/Common/interface/DeviceProduct.h"
26
#include "DataFormats/Common/interface/Wrapper.h"
7+
#include "DataFormats/PortableTestObjects/interface/TestProductWithPtr.h"
38
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
49
#include "DataFormats/PortableTestObjects/interface/TestStruct.h"
510
#include "DataFormats/PortableTestObjects/interface/alpaka/TestDeviceCollection.h"

DataFormats/PortableTestObjects/src/alpaka/classes_rocm_def.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,8 @@
1414
<class name="alpaka_rocm_async::portabletest::TestDeviceMultiCollection3" persistent="false"/>
1515
<class name="edm::DeviceProduct<alpaka_rocm_async::portabletest::TestDeviceMultiCollection3>" persistent="false"/>
1616
<class name="edm::Wrapper<edm::DeviceProduct<alpaka_rocm_async::portabletest::TestDeviceMultiCollection3>>" persistent="false"/>
17+
18+
<class name="portabletest::TestProductWithPtr<alpaka_rocm_async::Device>"/>
19+
<class name="edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_rocm_async::Device>>"/>
20+
<class name="edm::Wrapper<edm::DeviceProduct<portabletest::TestProductWithPtr<alpaka_rocm_async::Device>>>" persistent="false"/>
1721
</lcgdict>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1+
// these first to make sure they get included before any SoA header
2+
#include <Eigen/Core>
3+
#include <Eigen/Dense>
4+
15
#include "DataFormats/Common/interface/Wrapper.h"
26
#include "DataFormats/PortableTestObjects/interface/TestHostCollection.h"
37
#include "DataFormats/PortableTestObjects/interface/TestHostObject.h"
8+
#include "DataFormats/PortableTestObjects/interface/TestProductWithPtr.h"
49
#include "DataFormats/PortableTestObjects/interface/TestSoA.h"
510
#include "DataFormats/PortableTestObjects/interface/TestStruct.h"

DataFormats/PortableTestObjects/src/classes_def.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,7 @@
3939
<class name="portabletest::TestHostMultiCollection3"/>
4040

4141
<class name="edm::Wrapper<portabletest::TestHostMultiCollection3>" splitLevel="0"/>
42+
43+
<class name="portabletest::TestProductWithPtr<alpaka_common::DevHost>"/>
44+
<class name="edm::Wrapper<portabletest::TestProductWithPtr<alpaka_common::DevHost>>" persistent="false"/>
4245
</lcgdict>

HeterogeneousCore/AlpakaCore/README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,27 @@ Note that the destination (device-side) type `TDst` can be different from or the
102102

103103
The `CopyToDevice` class template is partially specialized for all `PortableCollection` instantiations.
104104

105+
#### Data products with `memcpy()`ed pointers
106+
107+
If the data product in question contains pointers to memory elsewhere within the data product, after the `alpaka::memcpy()` calls in the `copyAsync()` those pointers still point to device memory, and need to be updated. **Such data products are generally discouraged.** Nevertheless, such pointers can be updated without any additional synchronization by implementing a `postCopy()` function in the `CopyToHost` specialization along (extending the `CopyToHost` example [above](#edproducer))
108+
```cpp
109+
namespace cms::alpakatools {
110+
template <>
111+
struct CopyToHost<TSrc> {
112+
// copyAsync() definition from above
113+
114+
static void postCopy(TDst& obj) {
115+
// modify obj
116+
// any modifications must be such that the postCopy() can be
117+
// skipped when the obj originates from the host (i.e. on CPU backends)
118+
}
119+
};
120+
}
121+
```
122+
The `postCopy()` is called after the operations enqueued in the `copyAsync()` have finished. The code in `postCopy()` must be such that the call to `postCopy()` can be omitted on CPU backends.
123+
124+
Note that for `CopyToDevice` such `postCopy()` functionality is **not** provided. It should be possible to a issue kernel call (via an intermediate host-side function) from the `CopyToDevice::copyAsync()` function to achieve the same effect.
125+
105126
### `PortableCollection`
106127

107128
For more information see [`DataFormats/Portable/README.md`](../../DataFormats/Portable/README.md) and [`DataFormats/SoATemplate/README.md`](../../DataFormats/SoATemplate/README.md).

HeterogeneousCore/AlpakaCore/interface/alpaka/ProducerBase.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
9393
return Base::template produces<TToken, Tr>(std::move(instanceName));
9494
} else {
9595
edm::EDPutTokenT<TToken> token = Base::template produces<TToken, Tr>(instanceName);
96+
using CopyT = cms::alpakatools::CopyToHost<TProduct>;
9697
this->registerTransformAsync(
9798
token,
9899
[](TToken const& deviceProduct, edm::WaitingTaskWithArenaHolder holder) {
@@ -103,7 +104,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
103104
TProduct const& productOnDevice =
104105
deviceProduct.template getSynchronized<EDMetadata>(*metadataPtr, tryReuseQueue);
105106

106-
using CopyT = cms::alpakatools::CopyToHost<TProduct>;
107107
auto productOnHost = CopyT::copyAsync(metadataPtr->queue(), productOnDevice);
108108

109109
// Need to keep the EDMetadata object from sentry.finish()
@@ -112,7 +112,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
112112
// Wrap possibly move-only type into a copyable type
113113
return std::make_shared<TplType>(std::move(productOnHost), sentry.finish());
114114
},
115-
[](auto tplPtr) { return std::move(std::get<0>(*tplPtr)); },
115+
[](auto tplPtr) {
116+
auto& productOnHost = std::get<0>(*tplPtr);
117+
if constexpr (requires { CopyT::postCopy(productOnHost); }) {
118+
CopyT::postCopy(productOnHost);
119+
}
120+
return std::move(productOnHost);
121+
},
116122
std::move(instanceName));
117123
return token;
118124
}

HeterogeneousCore/AlpakaInterface/interface/CopyToHost.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,29 @@ namespace cms::alpakatools {
2828
* queue. The ExampleDeviceProduct and ExampleHostProduct can be the
2929
* same type, if they internally are able to handle the memory
3030
* allocation difference between host and device.
31+
*
32+
* Data products that contain pointers to memory elsewhere in the
33+
* data product need those pointers to be updated after the copy
34+
* from device-to-host completes. While such data structures are
35+
* generally discouraged, such an update of the data product can be
36+
* implemented (without any additional synchronization) with an
37+
* optional postCopy() static member function in the CopyToHost
38+
* specialization. The postCopy() is called for the host-side data
39+
* product after the copy operations enqueued in the copyAsync()
40+
* have finished. Following the example above, the expected
41+
* signature is
42+
* \code
43+
* template <>
44+
* struct CopyToHost<ExampleDeviceProduct> {
45+
* // copyAsync() definition from above
46+
*
47+
* static void postCopy(ExampleHostProduct& obj) {
48+
* // modify obj
49+
* // any modifications must be such that the postCopy() can be
50+
* // skipped when the obj originates from the host (i.e. on CPU backends)
51+
* }
52+
* };
53+
* \endcode
3154
*/
3255
template <typename TDeviceData>
3356
struct CopyToHost;

0 commit comments

Comments
 (0)