Skip to content

Commit 77b225a

Browse files
authored
Merge pull request #59 from projectchrono/copilot/update-demsovler-gpu-selection
Add GPU selection constructor overloads and migrate ownership to unique_ptr
2 parents 34d6f9b + d0f3bba commit 77b225a

File tree

10 files changed

+148
-46
lines changed

10 files changed

+148
-46
lines changed

_codeql_detected_source_root

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.

src/DEM/API.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <set>
1111
#include <cfloat>
1212
#include <functional>
13+
#include <memory>
1314

1415
#include "kT.h"
1516
#include "dT.h"
@@ -48,7 +49,11 @@ class DEMTracker;
4849
/// Main DEM-Engine solver.
4950
class DEMSolver {
5051
public:
52+
/// Default constructor: scans available GPUs and uses at most 2.
5153
DEMSolver(unsigned int nGPUs = 2);
54+
/// Construct using explicit GPU device IDs. Errors if any ID is unavailable. Warns and uses the first 2 if more
55+
/// than 2 are given.
56+
DEMSolver(std::vector<int> device_ids);
5257
~DEMSolver();
5358

5459
/// Set output detail level.
@@ -1806,12 +1811,12 @@ class DEMSolver {
18061811
// DEM system's workers, helpers, friends
18071812
////////////////////////////////////////////////////////////////////////////////
18081813

1809-
WorkerReportChannel* kTMain_InteractionManager;
1810-
WorkerReportChannel* dTMain_InteractionManager;
1811-
GpuManager* dTkT_GpuManager;
1812-
ThreadManager* dTkT_InteractionManager;
1813-
DEMKinematicThread* kT;
1814-
DEMDynamicThread* dT;
1814+
std::unique_ptr<GpuManager> dTkT_GpuManager;
1815+
std::unique_ptr<ThreadManager> dTkT_InteractionManager;
1816+
std::unique_ptr<WorkerReportChannel> dTMain_InteractionManager;
1817+
std::unique_ptr<WorkerReportChannel> kTMain_InteractionManager;
1818+
std::unique_ptr<DEMDynamicThread> dT;
1819+
std::unique_ptr<DEMKinematicThread> kT;
18151820

18161821
////////////////////////////////////////////////////////////////////////////////
18171822
// DEM system's private methods

src/DEM/APIPrivate.cpp

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,9 @@ void DEMSolver::decideCDMarginStrat() {
579579

580580
void DEMSolver::reportInitStats() const {
581581
DEME_INFO("\n");
582-
DEME_INFO("Number of total active devices: %d", dTkT_GpuManager->getNumDevices());
582+
583+
DEME_INFO("Number of system devices detected: %d", GpuManager::scanNumDevices());
584+
DEME_INFO("Number of active devices used by DEME: %d", dTkT_GpuManager->getNumDevices());
583585

584586
DEME_INFO("User-specified X-dimension range: [%.7g, %.7g]", m_user_box_min.x, m_user_box_max.x);
585587
DEME_INFO("User-specified Y-dimension range: [%.7g, %.7g]", m_user_box_min.y, m_user_box_max.y);
@@ -1280,8 +1282,8 @@ void DEMSolver::packDataPointers() {
12801282
kT->packDataPointers();
12811283
// Each worker thread needs pointers used for data transfering. Note this step must be done after packDataPointers
12821284
// are called, so each thread has its own pointers packed.
1283-
dT->packTransferPointers(kT);
1284-
kT->packTransferPointers(dT);
1285+
dT->packTransferPointers(kT.get());
1286+
kT->packTransferPointers(dT.get());
12851287
// Finally, the API needs to map all mesh to their owners
12861288
for (const auto& mmesh : m_meshes) {
12871289
m_owner_mesh_map[mmesh->owner] = mmesh->cache_offset;
@@ -1313,21 +1315,21 @@ void DEMSolver::validateUserInputs() {
13131315
// LoadClumpType.");
13141316
// }
13151317

1316-
// If not 2 GPUs detected, output warnings as needed
1317-
int ndevices = dTkT_GpuManager->getNumDevices();
1318-
if (ndevices == 0) {
1319-
DEME_ERROR(
1320-
"No GPU device is detected. Try lspci and see what you get.\nIf you indeed have GPU devices, maybe you "
1321-
"should try rebooting or reinstalling cuda components?");
1322-
// } else if (ndevices == 1) {
1323-
// DEME_WARNING(
1324-
// "One GPU device is detected. On consumer cards, DEME's performance edge is limited with only one"
1325-
// "GPU.\nTry allocating 2 GPU devices if possible.");
1326-
} else if (ndevices > 2) {
1327-
DEME_WARNING(
1328-
"More than two GPU devices are detected.\nCurrently, DEME can make use of at most two devices.\nMore "
1329-
"devices will not improve the performance.");
1330-
}
1318+
// // If not 2 GPUs detected, output warnings as needed
1319+
// int ndevices = dTkT_GpuManager->getNumDevices();
1320+
// if (ndevices == 0) {
1321+
// DEME_ERROR(
1322+
// "No GPU device is detected. Try lspci and see what you get.\nIf you indeed have GPU devices, maybe you "
1323+
// "should try rebooting or reinstalling cuda components?");
1324+
// // } else if (ndevices == 1) {
1325+
// // DEME_WARNING(
1326+
// // "One GPU device is detected. On consumer cards, DEME's performance edge is limited with only one"
1327+
// // "GPU.\nTry allocating 2 GPU devices if possible.");
1328+
// } else if (ndevices > 2) {
1329+
// DEME_WARNING(
1330+
// "More than two GPU devices are detected.\nCurrently, DEME can make use of at most two devices.\nMore "
1331+
// "devices will not improve the performance.");
1332+
// }
13311333

13321334
// Box size OK?
13331335
float3 user_box_size = m_user_box_max - m_user_box_min;

src/DEM/APIPublic.cpp

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,47 +21,120 @@
2121
namespace deme {
2222

2323
DEMSolver::DEMSolver(unsigned int nGPUs) {
24-
dTkT_InteractionManager = new ThreadManager();
25-
kTMain_InteractionManager = new WorkerReportChannel();
26-
dTMain_InteractionManager = new WorkerReportChannel();
24+
if (nGPUs == 0) {
25+
DEME_ERROR("DEMSolver was set to use 0 GPUs and that is currently not supported.");
26+
}
2727

28-
// 2 means 2 threads (nGPUs is currently not used)
29-
dTkT_GpuManager = new GpuManager(2);
28+
dTkT_InteractionManager = std::make_unique<ThreadManager>();
29+
kTMain_InteractionManager = std::make_unique<WorkerReportChannel>();
30+
dTMain_InteractionManager = std::make_unique<WorkerReportChannel>();
3031

3132
// Set default solver params
3233
setDefaultSolverParams();
3334

35+
// Determine which device IDs to use: scan available GPUs, use at most 2
36+
int detected = GpuManager::scanNumDevices();
37+
if (nGPUs > 2) {
38+
DEME_WARNING("DEMSolver was requested to use %u GPUs, but at most 2 are supported. Using 2 GPUs.", nGPUs);
39+
nGPUs = 2;
40+
}
41+
// Build a 2-element device ID list (one per thread); both threads share the same device when using 1 GPU
42+
unsigned int nToUse = std::min((unsigned int)detected, nGPUs);
43+
std::vector<int> device_ids;
44+
if (nToUse >= 2) {
45+
device_ids = {0, 1};
46+
} else {
47+
device_ids = {0, 0};
48+
}
49+
// Always use id list to avoid GpuManager deciding device usage by itself
50+
dTkT_GpuManager = std::make_unique<GpuManager>(device_ids);
51+
3452
// Thread-based worker creation may be needed as the workers allocate DualStructs on construction
3553
std::thread dT_construct([&]() {
3654
// Get a device/stream ID to use from the GPU Manager
3755
const GpuManager::StreamInfo dT_stream_info = dTkT_GpuManager->getAvailableStream();
3856
DEME_GPU_CALL(cudaSetDevice(dT_stream_info.device));
39-
dT = new DEMDynamicThread(dTMain_InteractionManager, dTkT_InteractionManager, dT_stream_info);
57+
dT = std::make_unique<DEMDynamicThread>(dTMain_InteractionManager.get(), dTkT_InteractionManager.get(),
58+
dT_stream_info);
59+
});
60+
61+
std::thread kT_construct([&]() {
62+
const GpuManager::StreamInfo kT_stream_info = dTkT_GpuManager->getAvailableStream();
63+
DEME_GPU_CALL(cudaSetDevice(kT_stream_info.device));
64+
kT = std::make_unique<DEMKinematicThread>(kTMain_InteractionManager.get(), dTkT_InteractionManager.get(),
65+
kT_stream_info);
66+
});
67+
68+
dT_construct.join();
69+
kT_construct.join();
70+
71+
// Make friends
72+
dT->kT = kT.get();
73+
kT->dT = dT.get();
74+
}
75+
76+
DEMSolver::DEMSolver(std::vector<int> device_ids) {
77+
dTkT_InteractionManager = std::make_unique<ThreadManager>();
78+
kTMain_InteractionManager = std::make_unique<WorkerReportChannel>();
79+
dTMain_InteractionManager = std::make_unique<WorkerReportChannel>();
80+
81+
// Set default solver params
82+
setDefaultSolverParams();
83+
84+
// Validate device IDs against the number of physically available GPUs
85+
int detected = GpuManager::scanNumDevices();
86+
if (device_ids.empty()) {
87+
DEME_ERROR("DEMSolver was given an empty device ID list. Please provide at least one device ID.");
88+
}
89+
for (int id : device_ids) {
90+
if (id < 0 || id >= detected) {
91+
DEME_ERROR("DEMSolver was given device ID %d, but only %d GPU device(s) are available.", id, detected);
92+
}
93+
}
94+
if (device_ids.size() > 2) {
95+
DEME_WARNING("DEMSolver was given %zu device IDs, but at most 2 are supported. Using only the first 2.",
96+
device_ids.size());
97+
device_ids.resize(2);
98+
}
99+
// When only one device is specified, both threads run on that device
100+
if (device_ids.size() == 1) {
101+
device_ids = {device_ids[0], device_ids[0]};
102+
}
103+
// Always use id list to avoid GpuManager deciding device usage by itself
104+
dTkT_GpuManager = std::make_unique<GpuManager>(device_ids);
105+
106+
// Thread-based worker creation may be needed as the workers allocate DualStructs on construction
107+
std::thread dT_construct([&]() {
108+
const GpuManager::StreamInfo dT_stream_info = dTkT_GpuManager->getAvailableStream();
109+
DEME_GPU_CALL(cudaSetDevice(dT_stream_info.device));
110+
dT = std::make_unique<DEMDynamicThread>(dTMain_InteractionManager.get(), dTkT_InteractionManager.get(),
111+
dT_stream_info);
40112
});
41113

42114
std::thread kT_construct([&]() {
43115
const GpuManager::StreamInfo kT_stream_info = dTkT_GpuManager->getAvailableStream();
44116
DEME_GPU_CALL(cudaSetDevice(kT_stream_info.device));
45-
kT = new DEMKinematicThread(kTMain_InteractionManager, dTkT_InteractionManager, kT_stream_info);
117+
kT = std::make_unique<DEMKinematicThread>(kTMain_InteractionManager.get(), dTkT_InteractionManager.get(),
118+
kT_stream_info);
46119
});
47120

48121
dT_construct.join();
49122
kT_construct.join();
50123

51124
// Make friends
52-
dT->kT = kT;
53-
kT->dT = dT;
125+
dT->kT = kT.get();
126+
kT->dT = dT.get();
54127
}
55128

56129
DEMSolver::~DEMSolver() {
57130
if (sys_initialized)
58131
DoDynamicsThenSync(0.0);
59-
delete kT;
60-
delete dT;
61-
delete kTMain_InteractionManager;
62-
delete dTMain_InteractionManager;
63-
delete dTkT_InteractionManager;
64-
delete dTkT_GpuManager;
132+
kT.reset();
133+
dT.reset();
134+
kTMain_InteractionManager.reset();
135+
dTMain_InteractionManager.reset();
136+
dTkT_InteractionManager.reset();
137+
dTkT_GpuManager.reset();
65138
}
66139

67140
void DEMSolver::SetVerbosity(const std::string& verbose) {
@@ -1910,13 +1983,13 @@ std::shared_ptr<DEMMeshConnected> DEMSolver::AddWavefrontMeshObject(const std::s
19101983
}
19111984

19121985
std::shared_ptr<DEMInspector> DEMSolver::CreateInspector(const std::string& quantity) {
1913-
DEMInspector insp(this, this->dT, quantity);
1986+
DEMInspector insp(this, this->dT.get(), quantity);
19141987
m_inspectors.push_back(std::make_shared<DEMInspector>(std::move(insp)));
19151988
return m_inspectors.back();
19161989
}
19171990

19181991
std::shared_ptr<DEMInspector> DEMSolver::CreateInspector(const std::string& quantity, const std::string& region) {
1919-
DEMInspector insp(this, this->dT, quantity, region);
1992+
DEMInspector insp(this, this->dT.get(), quantity, region);
19201993
m_inspectors.push_back(std::make_shared<DEMInspector>(std::move(insp)));
19211994
return m_inspectors.back();
19221995
}

src/DEM/dT.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ bodyID_t DEMDynamicThread::getGeoOwnerID(const bodyID_t& geoB, const contact_t&
269269
}
270270

271271
// packTransferPointers
272-
void DEMDynamicThread::packTransferPointers(DEMKinematicThread*& kT) {
272+
void DEMDynamicThread::packTransferPointers(DEMKinematicThread* kT) {
273273
// These are the pointers for sending data to dT
274274
granData->pKTOwnedBuffer_absVel = kT->absVel_buffer.data();
275275
granData->pKTOwnedBuffer_voxelID = kT->voxelID_buffer.data();

src/DEM/dT.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ class DEMDynamicThread {
575575

576576
/// Put sim data array pointers in place
577577
void packDataPointers();
578-
void packTransferPointers(DEMKinematicThread*& kT);
578+
void packTransferPointers(DEMKinematicThread* kT);
579579

580580
// Move array data to or from device
581581
void migrateDataToDevice();

src/DEM/kT.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ void DEMKinematicThread::migrateDeviceModifiableInfoToHost() {
518518
migrateFamilyToHost();
519519
}
520520

521-
void DEMKinematicThread::packTransferPointers(DEMDynamicThread*& dT) {
521+
void DEMKinematicThread::packTransferPointers(DEMDynamicThread* dT) {
522522
// Set the pointers to dT owned buffers
523523
granData->pDTOwnedBuffer_nContactPairs = &(dT->nContactPairs_buffer);
524524
granData->pDTOwnedBuffer_idGeometryA = dT->idGeometryA_buffer.data();

src/DEM/kT.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ class DEMKinematicThread {
342342

343343
// Put sim data array pointers in place
344344
void packDataPointers();
345-
void packTransferPointers(DEMDynamicThread*& dT);
345+
void packTransferPointers(DEMDynamicThread* dT);
346346

347347
// Move array data to or from device
348348
void migrateDataToDevice();

src/core/utils/GpuManager.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include <algorithm>
2+
#include <set>
23
#include <stdexcept>
34
#include <iostream>
45

@@ -32,6 +33,24 @@ GpuManager::GpuManager(unsigned int total_streams) {
3233
}
3334
}
3435

36+
GpuManager::GpuManager(const std::vector<int>& device_ids) {
37+
// ndevices counts distinct physical devices in use
38+
std::set<int> unique_ids(device_ids.begin(), device_ids.end());
39+
ndevices = static_cast<int>(unique_ids.size());
40+
41+
if (!device_ids.empty()) {
42+
int max_id = *std::max_element(device_ids.begin(), device_ids.end());
43+
this->streams.resize(max_id + 1);
44+
}
45+
46+
// Each entry in device_ids creates one stream; duplicate device IDs are intentional and result in multiple
47+
// streams on the same device (used when both kT and dT threads run on the same physical device).
48+
for (int dev_id : device_ids) {
49+
cudaStream_t new_stream = nullptr;
50+
this->streams[dev_id].push_back(StreamInfo{dev_id, new_stream, false});
51+
}
52+
}
53+
3554
// TODO: add CUDA error checking
3655
GpuManager::~GpuManager() {
3756
for (auto outer = this->streams.begin(); outer != this->streams.end(); outer++) {

src/core/utils/GpuManager.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
class GpuManager {
1010
public:
1111
GpuManager(unsigned int total_streams = 1);
12+
// Construct using explicit device IDs (one stream per device ID).
13+
GpuManager(const std::vector<int>& device_ids);
1214
~GpuManager();
1315

1416
struct StreamInfo {
@@ -24,7 +26,7 @@ class GpuManager {
2426
// Returns the HIGHEST number of streams per device.
2527
unsigned int getMaxStreamsPerDevice();
2628

27-
int scanNumDevices();
29+
static int scanNumDevices();
2830

2931
// DO NOT USE UNLESS YOU INTEND TO MANUALLY HANDLE YOUR STREAMS.
3032
const std::vector<StreamInfo>& getStreamsFromDevice(int index);

0 commit comments

Comments
 (0)