Skip to content

Commit 2fb3f02

Browse files
committed
vd2: swap+pool
new class of pool: SwapPool important APIs: PoolOpt(), Malloc(), Free() PoolOpt() takes in M/F sequences including those induced by swapping cross-iteration variables and last iteration case solved. record down MF after swap done, for one iteration
1 parent 383fffe commit 2fb3f02

File tree

7 files changed

+376
-225
lines changed

7 files changed

+376
-225
lines changed

examples/cifar10/train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
130130
dev = device.get_default_device()
131131
else:
132132
print('Using GPU')
133-
dev = device.create_cuda_gpu_on(1)
133+
dev = device.create_cuda_gpu_on(0)
134134

135135
net.to_device(dev)
136136
opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
@@ -153,7 +153,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
153153
fileTimeLog.write('Epoch %d: ' % epoch)
154154
fileTimeLog.write(str(int(round(time.time()*1000))))
155155
fileTimeLog.write('\n')
156-
for b in range(15): #num_train_batch):
156+
for b in range(20): #num_train_batch):
157157
print ("start of iteration %d: " %b)
158158
#time.sleep(1)
159159
fileTimeLog.write('iteration %d: ' % b)

include/singa/core/device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ class SwapGPU : public Device {
365365
//vec_block
366366
vector<string>vec_block; //iteration 0-3
367367
vector<string>vec_block_fresh; //iteration 4 5 6
368+
vector<string>vec_block_mf; //itr 8 9 10
368369
vector<double>global_load; // from begining
369370
vector<double>origin_load; //vec_load 3 itr. TODO(junzhe) to delete vec_load, global_load after use.
370371
vector<onePieceMsg>vec_run;

include/singa/core/memory.h

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ class DeviceMemPool {
5050
virtual void Malloc(void** ptr, const size_t size) = 0;
5151
virtual void Free(void* ptr) = 0;
5252
virtual void Append(string blockInfo) = 0;
53+
54+
virtual void PoolOpt(vector<string> &vec_mf) = 0;
5355

5456
virtual void SwapOut(void* data_) = 0;
5557
virtual void SwapIn(void* data_) = 0;
@@ -74,7 +76,9 @@ class CnMemPool : public DeviceMemPool {
7476

7577
void Malloc(void** ptr, const size_t size);
7678
void Free(void* ptr);
77-
void Append(string blockInfo){}
79+
void Append(string blockInfo){}
80+
81+
void PoolOpt(vector<string> &vec_mf) override {}
7882

7983
void SwapOut(void* data_) override {}
8084
void SwapIn(void* data_) override {}
@@ -102,7 +106,9 @@ class CudaMemPool : public DeviceMemPool {
102106
public:
103107
void Malloc(void** ptr, const size_t size) override;
104108
void Free(void* ptr) override;
105-
void Append(string blockInfo){}
109+
void Append(string blockInfo){}
110+
111+
void PoolOpt(vector<string> &vec_mf) override {}
106112

107113
void SwapOut(void* data_) override {}
108114
void SwapIn(void* data_) override {}
@@ -134,9 +140,11 @@ class SmartMemPool: public DeviceMemPool {
134140
void getMaxLoad(void);
135141
std::pair<size_t, size_t> GetMemUsage() override;
136142
void Append(string blockInfo);
143+
144+
void PoolOpt(vector<string> &vec_mf) override {}
137145

138-
void SwapOut(void* data_) override {}
139-
void SwapIn(void* data_) override {}
146+
void SwapOut(void* data_) override {}
147+
void SwapIn(void* data_) override {}
140148
protected:
141149
void Init();
142150
private:
@@ -196,19 +204,22 @@ struct SwapMeta{
196204
void* d_ptr; //not used for
197205
};
198206

199-
class Swap : public DeviceMemPool {
207+
class SwapPool : public DeviceMemPool {
200208
public:
201-
Swap(const MemPoolConf &conf); //constructor
209+
SwapPool(const MemPoolConf &conf); //constructor
202210
//TODO(junzhe) in Singa, void Malloc( void**, size_t); change to cudaMalloc and cudaFree.
203211
void Malloc(void** ptr, const size_t size);
204212
void Free(void* ptr);
205-
~Swap();
213+
~SwapPool();
206214
void getMaxLoad(void);
207215
std::pair<size_t, size_t> GetMemUsage() override;
208216
void Append(string blockInfo);
209217

210218
void SwapOut(void* data_);
211219
void SwapIn(void* data_);
220+
221+
//PoolOpt() construct pool based on MF info after Swap constructed.
222+
void PoolOpt(vector<string> &vec_mf);
212223
protected:
213224
void Init();
214225
private:
@@ -219,8 +230,15 @@ class Swap : public DeviceMemPool {
219230
std::mutex mtx_;
220231
vector<string> vec_block;
221232
size_t swapLimit = 1<<23; //8MB
222-
map<void*,swapLookUpElement>Table_id2LookUpElement; //old TODO(junzhe) remove
223-
map<void*,pair<SwapMeta,SwapMeta>>Table_Meta;
233+
int poolFlag = 0;
234+
int pc = 0;
235+
int maxLen_mf = 0;
236+
void* ptrPool = nullptr;
237+
map<void*,int>Table_p2r; //ptr for arrival idx, for look up Table during free
238+
map<int,lookUpElement>Table_r2v; //r-> vertex
239+
vector<pair<int,lookUpElement>>Vec_r2Ver; //Table_r2Ver No need anymore, replaced by Table_r2v TODO(junzhe)
240+
// map<void*,swapLookUpElement>Table_id2LookUpElement; //old TODO(junzhe) remove
241+
// map<void*,pair<SwapMeta,SwapMeta>>Table_Meta;
224242
};
225243

226244
#endif

src/core/device/cuda_gpu.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ const int kNumCudaStream = 1;
4848
CudaGPU::CudaGPU(int id) : Device(id, kNumCudaStream) {
4949
MemPoolConf conf;
5050
conf.add_device(id);
51-
pool_ = std::make_shared<Swap>(conf);
51+
pool_ = std::make_shared<CnMemPool>(conf);
5252
Setup();
5353
}
5454

src/core/device/platform.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ Platform::CreateCudaGPUsOn(const vector<int> &devices, size_t init_size) {
128128
conf.add_device(device);
129129
CHECK_LE(bytes, Platform::GetGPUMemSize(device).first);
130130
}
131-
auto pool = std::make_shared<CnMemPool>(conf);
131+
auto pool = std::make_shared<SwapPool>(conf);
132132

133133
vector<shared_ptr<Device> > ret;
134134
for (auto device : devices) {

src/core/device/swap_gpu.cc

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ const cudaMemcpyKind copyKind[] = {cudaMemcpyHostToHost, cudaMemcpyHostToDevice,
4242

4343
///functions to be used
4444
///Section for structs and respective sorting function:
45-
// onePieceMsg, onePairMsg, oneIterMsg, version 11/30 3pm
4645

4746

4847

@@ -924,7 +923,8 @@ SwapGPU::SwapGPU(int id) : Device(id, kNumCudaStream) {
924923

925924
MemPoolConf conf;
926925
conf.add_device(id);
927-
pool_ = std::make_shared<Swap>(conf);
926+
//TODO(junzhe) note that it has been <Swap> for building SwapGPU, which doesnt matter.
927+
pool_ = std::make_shared<SwapPool>(conf);
928928
Setup();
929929

930930
}
@@ -987,6 +987,26 @@ void* SwapGPU::Malloc(int size) {
987987
if (size > 0) {
988988
CUDA_CHECK(cudaSetDevice(id_));
989989
pool_->Malloc((void**)&ptr, size);
990+
991+
///append vec_block_mf
992+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
993+
&& ((gc - maxLen) >= three_more_globeCounter)){
994+
string tempStr1 ="Malloc ";
995+
stringstream strm2;
996+
strm2<<ptr;
997+
string tempStr2 = strm2.str();
998+
stringstream strm3;
999+
strm3<<size;
1000+
string tempStr3 = strm3.str();
1001+
string temp = tempStr1+tempStr2+" "+tempStr3;
1002+
vec_block_mf.push_back(temp);
1003+
}
1004+
//record mf semantics after swap plan done
1005+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
1006+
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
1007+
file_mf_one_itr<<"Malloc "<<ptr<<" "<<size;
1008+
file_mf_one_itr<<endl;
1009+
}
9901010
// TODO(wangwei) remove the memset.
9911011
CUDA_CHECK(cudaMemset(ptr, 0, size));
9921012
}
@@ -1000,6 +1020,21 @@ void SwapGPU::Free(void* ptr) {
10001020
if (ptr != nullptr) {
10011021
CUDA_CHECK(cudaSetDevice(id_));
10021022
pool_->Free(ptr);
1023+
///append vec_block_mf
1024+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
1025+
&& ((gc - maxLen) >= three_more_globeCounter)){
1026+
string tempStr1 ="Free ";
1027+
stringstream strm2;
1028+
strm2<<ptr;
1029+
string tempStr2 = strm2.str();
1030+
string temp = tempStr1+tempStr2;
1031+
vec_block_mf.push_back(temp);
1032+
}
1033+
1034+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
1035+
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
1036+
file_mf_one_itr<<"Free "<<ptr<<endl;
1037+
}
10031038
}
10041039

10051040
//cout<<"free done"<<endl;
@@ -1115,6 +1150,21 @@ void SwapGPU::DeploySwap_exec(int r_gc){
11151150
last_meta.block_->update_data(nullptr);
11161151
// cout<<"to free data_"<<last_meta.data_<<endl;
11171152
pool_->Free(last_meta.data_);
1153+
///append vec_block_mf
1154+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
1155+
&& ((gc - maxLen) >= three_more_globeCounter)){
1156+
string tempStr1 ="Free ";
1157+
stringstream strm2;
1158+
strm2<<last_meta.data_;
1159+
string tempStr2 = strm2.str();
1160+
string temp = tempStr1+tempStr2;
1161+
vec_block_mf.push_back(temp);
1162+
}
1163+
1164+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
1165+
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
1166+
file_mf_one_itr<<"Free "<<last_meta.data_<<" SwapOut(Sync)"<<endl;
1167+
}
11181168
last_meta.data_ = nullptr; //not really needed TODO(junzhe)
11191169
cout<<"----sync out "<<sync_idx<<endl;
11201170
Table_meta.find(sync_idx)->second = last_meta;
@@ -1213,8 +1263,24 @@ void SwapGPU::Append(string blockInfo){
12131263

12141264
//test moved from start of malloc/free to end of append, only gc+1 changed
12151265
Test_sched_switch_swap();
1216-
//NOTE: this gc++ includes read/write and AppendLayer as well, in addition to malloc/free.
1266+
//NOTE: this gc includes read/write and AppendLayer as well, in addition to malloc/free.
12171267
gc++;
1268+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) == three_more_globeCounter)){
1269+
cout<<"==================to call PoolOpt"<<endl;
1270+
fstream file_mf_8910("mf_8910.csv", ios::in|ios::out|ios::app);
1271+
for (int i = 0; i< vec_block_mf.size();i++){
1272+
file_mf_8910<<vec_block_mf[i]<<endl;
1273+
}
1274+
cout<<"len of vec_block_mf: "<<vec_block_mf.size()<<endl;
1275+
pool_->PoolOpt(vec_block_mf);
1276+
cout<<"==================to call PoolOpt done"<<endl;
1277+
}
1278+
1279+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
1280+
&& ((gc - three_more_globeCounter)%maxLen == 0)){
1281+
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
1282+
file_mf_one_itr<<"-----new itr------"<<endl;
1283+
}
12181284

12191285
}
12201286

@@ -1297,6 +1363,23 @@ void SwapGPU::SwapIn_idx(const int r_idx){
12971363
//cout<<"update block and data of r_idx: "<<r_idx<<' '<<meta.block_<<' '<<meta.data_<<endl;
12981364
void* ptr = nullptr;
12991365
pool_->Malloc((void**)&ptr, meta.size);
1366+
///append vec_block_mf
1367+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)
1368+
&& ((gc - maxLen) >= three_more_globeCounter)){
1369+
string tempStr1 ="Malloc ";
1370+
stringstream strm2;
1371+
strm2<<ptr;
1372+
string tempStr2 = strm2.str();
1373+
stringstream strm3;
1374+
strm3<<meta.size;
1375+
string tempStr3 = strm3.str();
1376+
string temp = tempStr1+tempStr2+" "+tempStr3;
1377+
vec_block_mf.push_back(temp);
1378+
}
1379+
if ((asyncSwapFlag == 1) && ((gc - 4*maxLen) < three_more_globeCounter)){
1380+
fstream file_mf_one_itr("mf_one_itr.csv", ios::in|ios::out|ios::app);
1381+
file_mf_one_itr<<"Malloc "<<ptr<<" "<<meta.size<<" swapIn"<<endl;
1382+
}
13001383
//cout<<"expected results update_data:: "<<meta.block_<<" "<<ptr<<endl;
13011384
//cout<<"malloc due to swapIn ("<<r_idx<<") "<<ptr<<endl;
13021385
//void* to_rm_ptr = meta.data_;

0 commit comments

Comments
 (0)