Skip to content

Commit 945ccda

Browse files
authored
Merge pull request #857 from UWB-Biocomputing/BenMerge
Moving GPU memory management to OperationManager (clean)
2 parents 6b5a7d0 + 733518c commit 945ccda

27 files changed

+311
-322
lines changed

Simulator/Core/Core.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ int Core::runSimulation(string executableName, string cmdLineArguments)
206206
}
207207

208208
// Helper function for recorder to register spike history variables for all neurons.
209-
simulator.getModel().getLayout().getVertices().registerHistoryVariables();
209+
OperationManager::getInstance().executeOperation(Operations::registerHistoryVariables);
210210

211211
// Run simulation
212212
LOG4CPLUS_TRACE(consoleLogger, "Starting Simulation");
@@ -247,4 +247,4 @@ int Core::runSimulation(string executableName, string cmdLineArguments)
247247
cout << "time elapsed: " << timeElapsed << endl;
248248
cout << "ssps (simulation seconds / real time seconds): " << ssps << endl;
249249
return 0;
250-
}
250+
}

Simulator/Core/GPUModel.cpp

Lines changed: 87 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
#include "AllVertices.h"
1313
#include "Connections.h"
1414
#include "Global.h"
15+
#include "OperationManager.h"
16+
1517
#ifdef VALIDATION_MODE
1618
#include "AllIFNeurons.h"
1719
#include "OperationManager.h"
@@ -27,51 +29,51 @@ GPUModel::GPUModel() :
2729
Model::Model(), edgeIndexMapDevice_(nullptr), randNoise_d(nullptr), allVerticesDevice_(nullptr),
2830
allEdgesDevice_(nullptr)
2931
{
32+
// Register allocNeuronDeviceStruct function as a allocateGPU operation in the OperationManager
33+
function<void()> allocateGPU = bind(&GPUModel::allocDeviceStruct, this);
34+
OperationManager::getInstance().registerOperation(Operations::allocateGPU, allocateGPU);
35+
36+
// Register copyCPUtoGPU function as a copyCPUtoGPU operation in the OperationManager
37+
function<void()> copyCPUtoGPU = bind(&GPUModel::copyCPUtoGPU, this);
38+
OperationManager::getInstance().registerOperation(Operations::copyToGPU, copyCPUtoGPU);
39+
40+
// Note: We do not register a corresponding copyFromGPU operation here because
41+
// we are only copying the synapseIndexMap to the GPU. This map is a read-only lookup table
42+
// that gets recreated from scratch on each update. As a result, we only need to allocate,
43+
// copy to GPU, and deallocate — there is no meaningful data to copy back from the GPU.
44+
45+
// Register deleteSynapseImap function as a deallocateGPUMemory operation in the OperationManager
46+
function<void()> deallocateGPUMemory = bind(&GPUModel::deleteDeviceStruct, this);
47+
OperationManager::getInstance().registerOperation(Operations::deallocateGPUMemory,
48+
deallocateGPUMemory);
3049
}
3150

3251
/// Allocates and initializes memories on CUDA device.
33-
/// @param[out] allVerticesDevice Memory location of the pointer to the vertices list on device memory.
34-
/// @param[out] allEdgesDevice Memory location of the pointer to the edges list on device memory.
35-
void GPUModel::allocDeviceStruct(void **allVerticesDevice, void **allEdgesDevice)
52+
void GPUModel::allocDeviceStruct()
3653
{
37-
// Get vertices and edges
38-
AllVertices &vertices = layout_->getVertices();
39-
AllEdges &edges = connections_->getEdges();
40-
41-
// Allocate vertices and edges structs on GPU device memory
42-
vertices.allocVerticesDeviceStruct(allVerticesDevice);
43-
edges.allocEdgeDeviceStruct(allEdgesDevice);
44-
4554
// Allocate memory for random noise array
4655
int numVertices = Simulator::getInstance().getTotalVertices();
4756
BGSIZE randNoise_d_size = numVertices * sizeof(float); // size of random noise array
4857
HANDLE_ERROR(cudaMalloc((void **)&randNoise_d, randNoise_d_size));
4958

50-
// Copy host vertex and edge arrays into GPU device
51-
vertices.copyToDevice(*allVerticesDevice);
52-
edges.copyEdgeHostToDevice(*allEdgesDevice);
53-
54-
// Allocate edge inverse map in device memory
59+
// Allocate synapse inverse map in device memory
5560
allocEdgeIndexMap(numVertices);
5661
}
5762

5863
/// Copies device memories to host memories and deallocates them.
59-
/// @param[out] allVerticesDevice Memory location of the pointer to the vertices list on device memory.
60-
/// @param[out] allEdgesDevice Memory location of the pointer to the edges list on device memory.
61-
void GPUModel::deleteDeviceStruct(void **allVerticesDevice, void **allEdgesDevice)
64+
void GPUModel::deleteDeviceStruct()
6265
{
63-
// Get vertices and edges
64-
AllVertices &vertices = layout_->getVertices();
65-
AllEdges &edges = connections_->getEdges();
66-
67-
// Copy device edge and vertex structs to host memory
68-
vertices.copyFromDevice(*allVerticesDevice);
69-
// Deallocate device memory
70-
vertices.deleteVerticesDeviceStruct(*allVerticesDevice);
71-
// Copy device edge and vertex structs to host memory
72-
edges.copyEdgeDeviceToHost(*allEdgesDevice);
7366
// Deallocate device memory
74-
edges.deleteEdgeDeviceStruct(*allEdgesDevice);
67+
EdgeIndexMapDevice synapseIMapDevice;
68+
HANDLE_ERROR(cudaMemcpy(&synapseIMapDevice, edgeIndexMapDevice_, sizeof(EdgeIndexMapDevice),
69+
cudaMemcpyDeviceToHost));
70+
HANDLE_ERROR(cudaFree(synapseIMapDevice.outgoingEdgeBegin_));
71+
HANDLE_ERROR(cudaFree(synapseIMapDevice.outgoingEdgeCount_));
72+
HANDLE_ERROR(cudaFree(synapseIMapDevice.outgoingEdgeIndexMap_));
73+
HANDLE_ERROR(cudaFree(synapseIMapDevice.incomingEdgeBegin_));
74+
HANDLE_ERROR(cudaFree(synapseIMapDevice.incomingEdgeCount_));
75+
HANDLE_ERROR(cudaFree(synapseIMapDevice.incomingEdgeIndexMap_));
76+
HANDLE_ERROR(cudaFree(edgeIndexMapDevice_));
7577
HANDLE_ERROR(cudaFree(randNoise_d));
7678
}
7779

@@ -104,13 +106,9 @@ void GPUModel::setupSim()
104106
t_gpu_advanceSynapses = 0.0;
105107
t_gpu_calcSummation = 0.0;
106108
#endif // PERFORMANCE_METRICS
107-
108-
// allocates memories on CUDA device
109-
allocDeviceStruct((void **)&allVerticesDevice_, (void **)&allEdgesDevice_);
110-
111-
EdgeIndexMap &edgeIndexMap = connections_->getEdgeIndexMap();
112-
// copy inverse map to the device memory
113-
copyEdgeIndexMapHostToDevice(edgeIndexMap, Simulator::getInstance().getTotalVertices());
109+
// Allocate and copy neuron/synapse data structures to GPU memory
110+
OperationManager::getInstance().executeOperation(Operations::allocateGPU);
111+
OperationManager::getInstance().executeOperation(Operations::copyToGPU);
114112

115113
AllEdges &edges = connections_->getEdges();
116114
// set some parameters used for advanceVerticesDevice
@@ -123,9 +121,10 @@ void GPUModel::setupSim()
123121
/// Performs any finalization tasks on network following a simulation.
124122
void GPUModel::finish()
125123
{
124+
// copy device synapse and neuron structs to host memory
125+
OperationManager::getInstance().executeOperation(Operations::copyFromGPU);
126126
// deallocates memories on CUDA device
127-
deleteDeviceStruct((void **)&allVerticesDevice_, (void **)&allEdgesDevice_);
128-
deleteEdgeIndexMap();
127+
OperationManager::getInstance().executeOperation(Operations::deallocateGPUMemory);
129128

130129
#ifdef PERFORMANCE_METRICS
131130
cudaEventDestroy(start);
@@ -234,7 +233,7 @@ void GPUModel::updateConnections()
234233
AllVertices &vertices = layout_->getVertices();
235234
AllEdges &edges = connections_->getEdges();
236235

237-
vertices.copyFromDevice(allVerticesDevice_);
236+
vertices.copyFromDevice();
238237

239238
// Update Connections data
240239
if (connections_->updateConnections(vertices)) {
@@ -243,8 +242,7 @@ void GPUModel::updateConnections()
243242
// create edge index map
244243
connections_->createEdgeIndexMap();
245244
// copy index map to the device memory
246-
copyEdgeIndexMapHostToDevice(connections_->getEdgeIndexMap(),
247-
Simulator::getInstance().getTotalVertices());
245+
copyCPUtoGPU();
248246
}
249247
}
250248

@@ -281,83 +279,67 @@ void GPUModel::allocEdgeIndexMap(int count)
281279
cudaMemcpyHostToDevice));
282280
}
283281

284-
/// Deallocate device memory for edge inverse map.
285-
void GPUModel::deleteEdgeIndexMap()
286-
{
287-
EdgeIndexMapDevice edgeIndexMapDevice;
288-
HANDLE_ERROR(cudaMemcpy(&edgeIndexMapDevice, edgeIndexMapDevice_, sizeof(EdgeIndexMapDevice),
289-
cudaMemcpyDeviceToHost));
290-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.outgoingEdgeBegin_));
291-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.outgoingEdgeCount_));
292-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.outgoingEdgeIndexMap_));
293-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.incomingEdgeBegin_));
294-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.incomingEdgeCount_));
295-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.incomingEdgeIndexMap_));
296-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice_));
297-
}
298-
299-
/// Copy EdgeIndexMap in host memory to EdgeIndexMap in device memory.
300-
/// @param edgeIndexMapHost Reference to the EdgeIndexMap in host memory.
301-
void GPUModel::copyEdgeIndexMapHostToDevice(EdgeIndexMap &edgeIndexMapHost, int numVertices)
282+
/// Allocate and Copy CPU Synapse data to GPU.
283+
void GPUModel::copyCPUtoGPU()
302284
{
303-
AllEdges &edges = connections_->getEdges();
304-
int totalEdgeCount = edges.totalEdgeCount_;
305-
if (totalEdgeCount == 0)
285+
EdgeIndexMap synapseIndexMapHost = connections_->getEdgeIndexMap();
286+
int numVertices = Simulator::getInstance().getTotalVertices();
287+
AllEdges &synapses = connections_->getEdges();
288+
int totalSynapseCount = dynamic_cast<AllEdges &>(synapses).totalEdgeCount_;
289+
if (totalSynapseCount == 0)
306290
return;
307-
EdgeIndexMapDevice edgeIndexMapDevice;
308-
HANDLE_ERROR(cudaMemcpy(&edgeIndexMapDevice, edgeIndexMapDevice_, sizeof(EdgeIndexMapDevice),
291+
EdgeIndexMapDevice synapseIMapDevice;
292+
HANDLE_ERROR(cudaMemcpy(&synapseIMapDevice, edgeIndexMapDevice_, sizeof(EdgeIndexMapDevice),
309293
cudaMemcpyDeviceToHost));
310-
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice.outgoingEdgeBegin_,
311-
edgeIndexMapHost.outgoingEdgeBegin_.data(), numVertices * sizeof(BGSIZE),
312-
cudaMemcpyHostToDevice));
313-
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice.outgoingEdgeCount_,
314-
edgeIndexMapHost.outgoingEdgeCount_.data(), numVertices * sizeof(BGSIZE),
315-
cudaMemcpyHostToDevice));
316-
if (edgeIndexMapDevice.outgoingEdgeIndexMap_ != nullptr) {
317-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.outgoingEdgeIndexMap_));
294+
HANDLE_ERROR(cudaMemcpy(synapseIMapDevice.outgoingEdgeBegin_,
295+
synapseIndexMapHost.outgoingEdgeBegin_.data(),
296+
numVertices * sizeof(BGSIZE), cudaMemcpyHostToDevice));
297+
HANDLE_ERROR(cudaMemcpy(synapseIMapDevice.outgoingEdgeCount_,
298+
synapseIndexMapHost.outgoingEdgeCount_.data(),
299+
numVertices * sizeof(BGSIZE), cudaMemcpyHostToDevice));
300+
if (synapseIMapDevice.outgoingEdgeIndexMap_ != nullptr) {
301+
HANDLE_ERROR(cudaFree(synapseIMapDevice.outgoingEdgeIndexMap_));
318302
}
319-
HANDLE_ERROR(cudaMalloc((void **)&edgeIndexMapDevice.outgoingEdgeIndexMap_,
320-
totalEdgeCount * sizeof(BGSIZE)));
321-
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice.outgoingEdgeIndexMap_,
322-
edgeIndexMapHost.outgoingEdgeIndexMap_.data(),
323-
totalEdgeCount * sizeof(BGSIZE), cudaMemcpyHostToDevice));
303+
HANDLE_ERROR(cudaMalloc((void **)&synapseIMapDevice.outgoingEdgeIndexMap_,
304+
totalSynapseCount * sizeof(BGSIZE)));
305+
HANDLE_ERROR(cudaMemcpy(synapseIMapDevice.outgoingEdgeIndexMap_,
306+
synapseIndexMapHost.outgoingEdgeIndexMap_.data(),
307+
totalSynapseCount * sizeof(BGSIZE), cudaMemcpyHostToDevice));
324308
// active synapse map
325-
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice.incomingEdgeBegin_,
326-
edgeIndexMapHost.incomingEdgeBegin_.data(), numVertices * sizeof(BGSIZE),
327-
cudaMemcpyHostToDevice));
328-
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice.incomingEdgeCount_,
329-
edgeIndexMapHost.incomingEdgeCount_.data(), numVertices * sizeof(BGSIZE),
330-
cudaMemcpyHostToDevice));
309+
HANDLE_ERROR(cudaMemcpy(synapseIMapDevice.incomingEdgeBegin_,
310+
synapseIndexMapHost.incomingEdgeBegin_.data(),
311+
numVertices * sizeof(BGSIZE), cudaMemcpyHostToDevice));
312+
HANDLE_ERROR(cudaMemcpy(synapseIMapDevice.incomingEdgeCount_,
313+
synapseIndexMapHost.incomingEdgeCount_.data(),
314+
numVertices * sizeof(BGSIZE), cudaMemcpyHostToDevice));
331315
// the number of synapses may change, so we reallocate the memory
332-
if (edgeIndexMapDevice.incomingEdgeIndexMap_ != nullptr) {
333-
HANDLE_ERROR(cudaFree(edgeIndexMapDevice.incomingEdgeIndexMap_));
334-
edgeIndexMapDevice.incomingEdgeIndexMap_ = nullptr;
316+
if (synapseIMapDevice.incomingEdgeIndexMap_ != nullptr) {
317+
HANDLE_ERROR(cudaFree(synapseIMapDevice.incomingEdgeIndexMap_));
318+
synapseIMapDevice.incomingEdgeIndexMap_ = nullptr;
335319
}
336-
HANDLE_ERROR(cudaMalloc((void **)&edgeIndexMapDevice.incomingEdgeIndexMap_,
337-
totalEdgeCount * sizeof(BGSIZE)));
338-
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice.incomingEdgeIndexMap_,
339-
edgeIndexMapHost.incomingEdgeIndexMap_.data(),
340-
totalEdgeCount * sizeof(BGSIZE), cudaMemcpyHostToDevice));
341-
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice_, &edgeIndexMapDevice, sizeof(EdgeIndexMapDevice),
320+
HANDLE_ERROR(cudaMalloc((void **)&synapseIMapDevice.incomingEdgeIndexMap_,
321+
totalSynapseCount * sizeof(BGSIZE)));
322+
HANDLE_ERROR(cudaMemcpy(synapseIMapDevice.incomingEdgeIndexMap_,
323+
synapseIndexMapHost.incomingEdgeIndexMap_.data(),
324+
totalSynapseCount * sizeof(BGSIZE), cudaMemcpyHostToDevice));
325+
HANDLE_ERROR(cudaMemcpy(edgeIndexMapDevice_, &synapseIMapDevice, sizeof(EdgeIndexMapDevice),
342326
cudaMemcpyHostToDevice));
343327
}
344328

345-
/// Copy GPU edge data to CPU.
346-
void GPUModel::copyGPUtoCPU()
329+
/// Print out EdgeProps on the GPU.
330+
void GPUModel::printGPUEdgesPropsModel() const
347331
{
348-
// copy device edge structs to host memory
349-
connections_->getEdges().copyEdgeDeviceToHost(allEdgesDevice_);
332+
connections_->getEdges().printGPUEdgesProps(allEdgesDevice_);
350333
}
351334

352-
/// Copy CPU edge data to GPU.
353-
void GPUModel::copyCPUtoGPU()
335+
/// Getter for neuron structure in device memory
336+
AllVerticesDeviceProperties *&GPUModel::getAllVerticesDevice()
354337
{
355-
// copy host edge structs to device memory
356-
connections_->getEdges().copyEdgeHostToDevice(allEdgesDevice_);
338+
return allVerticesDevice_;
357339
}
358340

359-
/// Print out EdgeProps on the GPU.
360-
void GPUModel::printGPUEdgesPropsModel() const
341+
/// Getter for synapse structures in device memory
342+
AllEdgesDeviceProperties *&GPUModel::getAllEdgesDevice()
361343
{
362-
connections_->getEdges().printGPUEdgesProps(allEdgesDevice_);
363-
}
344+
return allEdgesDevice_;
345+
}

Simulator/Core/GPUModel.h

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
#pragma once
2323

2424
#include "AllEdges.h"
25+
#include "AllSpikingNeurons.h"
26+
#include "AllSpikingSynapses.h"
2527
#include "AllVertices.h"
28+
#include "OperationManager.h"
2629

2730
#ifdef VALIDATION_MODE
2831
#include <fstream>
@@ -81,25 +84,33 @@ class GPUModel : public Model {
8184
/// over the past epoch. Should be called once every epoch.
8285
virtual void updateConnections() override;
8386

84-
/// Copy GPU edge data to CPU.
85-
virtual void copyGPUtoCPU() override;
86-
87-
/// Copy CPU edge data to GPU.
87+
/// Copies neuron and synapse data from CPU to GPU memory.
88+
/// TODO: Refactor this. Currently, GPUModel handles low-level memory transfer for vertices and edges.
89+
/// Consider moving this responsibility to a more appropriate class, such as a dedicated memory manager
90+
/// or the OperationManager, to better separate concerns and keep the model focused on high-level coordination.
8891
virtual void copyCPUtoGPU() override;
8992

93+
// GPUModel itself does not have anything to be copied back, this function is a
94+
// dummy function just to make GPUModel non virtual
95+
virtual void copyGPUtoCPU() override
96+
{
97+
}
98+
9099
/// Print out EdgeProps on the GPU.
91100
void printGPUEdgesPropsModel() const;
92101

102+
/// Getter for edge (synapse) structures in device memory
103+
AllEdgesDeviceProperties *&getAllEdgesDevice();
104+
105+
/// Getter for vertex (neuron) structures in device memory
106+
AllVerticesDeviceProperties *&getAllVerticesDevice();
107+
93108
protected:
94109
/// Allocates and initializes memories on CUDA device.
95-
/// @param[out] allVerticesDevice Memory location of the pointer to the vertices list on device memory.
96-
/// @param[out] allEdgesDevice Memory location of the pointer to the edges list on device memory.
97-
void allocDeviceStruct(void **allVerticesDevice, void **allEdgesDevice);
110+
void allocDeviceStruct();
98111

99-
/// Copies device memories to host memories and deallocates them.
100-
/// @param[out] allVerticesDevice Memory location of the pointer to the vertices list on device memory.
101-
/// @param[out] allEdgesDevice Memory location of the pointer to the edges list on device memory.
102-
virtual void deleteDeviceStruct(void **allVerticesDevice, void **allEdgesDevice);
112+
/// Deallocates device memories.
113+
virtual void deleteDeviceStruct();
103114

104115
/// Pointer to device random noise array.
105116
float *randNoise_d;
@@ -118,11 +129,6 @@ class GPUModel : public Model {
118129
private:
119130
void allocEdgeIndexMap(int count);
120131

121-
void deleteEdgeIndexMap();
122-
123-
public: //2020/03/14 changed to public for accessing in Core
124-
void copyEdgeIndexMapHostToDevice(EdgeIndexMap &edgeIndexMapHost, int numVertices);
125-
126132
private:
127133
void updateHistory();
128134

@@ -144,4 +150,4 @@ void normalMTGPU(float *randNoise_d);
144150
void initMTGPU(unsigned int seed, unsigned int blocks, unsigned int threads, unsigned int nPerRng,
145151
unsigned int mt_rng_count);
146152
}
147-
#endif
153+
#endif

Simulator/Core/OperationManager.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ string OperationManager::operationToString(const Operations &operation) const
7171
return "copyToGPU";
7272
case Operations::copyFromGPU:
7373
return "copyFromGPU";
74+
case Operations::allocateGPU:
75+
return "allocateGPU";
7476
default:
7577
return "Operation isn't in OperationManager::operationToString()";
7678
}
77-
}
79+
}

Simulator/Core/Operations.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,7 @@ enum class Operations {
2020
deallocateGPUMemory, // Make sure deallocate memory isn't called until all GPU memory is copied back.
2121
restoreToDefault, // Not sure what this refers to.
2222
copyToGPU,
23-
copyFromGPU
23+
copyFromGPU,
24+
allocateGPU,
25+
registerHistoryVariables
2426
};

0 commit comments

Comments
 (0)