@@ -66,10 +66,8 @@ class Device {
6666 // / Called by Tensor.
6767 void FreeBlock (Block* block);
6868
69- void AppendInfo (string blockInfo);
70- void * GetRealGpuPtrInfo (const Block* block_);
71- void SwapOutInfo (const Block* block_);
72- void SwapInInfo (const Block* block_);
69+ void AppendInfo (string block_info);
70+ void * UpdateGpuPtrInfo (const Block* block_ptr);
7371
7472 // / Return the size (bytes) of memory in use
7573 // / TODO(wangwei) override this function for all devices.
@@ -108,7 +106,7 @@ class Device {
108106
109107 int id () const { return id_; }
110108
111- virtual void * GetRealGpuPtr (const Block* block_ ) = 0;
109+ virtual void * UpdateGpuPtr (const Block* block_ptr ) = 0;
112110
113111 private:
114112 Device () {};
@@ -125,11 +123,8 @@ class Device {
125123
126124 // / Free device memory.
127125 virtual void Free (void * ptr) = 0;
128- virtual void MakeMetaTable (Block* block,void * data_,int size) = 0;
129- virtual void Append (string blockInfo) = 0;
130-
131- virtual void SwapOut (const Block* block_) = 0;
132- virtual void SwapIn (const Block* block_) = 0;
126+ virtual void AppendAfterMalloc (Block* block,void * data_ptr,int size) = 0;
127+ virtual void Append (string block_info) = 0;
133128
134129 protected:
135130 int id_ = 0 ;
@@ -171,11 +166,10 @@ class CppCPU : public Device {
171166
172167 // / Free cpu memory.
173168 void Free (void * ptr) override ;
174- void MakeMetaTable (Block* block,void * data_,int size) override {}
175- void Append (string blockInfo) override {}
176- void * GetRealGpuPtr (const Block* block_) override {}
177- void SwapOut (const Block* block_) override {}
178- void SwapIn (const Block* block_) override {}
169+ void AppendAfterMalloc (Block* block,void * data_ptr,int size) override {}
170+ void Append (string block_info) override {}
171+ void * UpdateGpuPtr (const Block* block_ptr) override {}
172+
179173};
180174
181175
@@ -206,11 +200,9 @@ class CudaGPU : public Device {
206200
207201 // / Free cpu memory.
208202 void Free (void * ptr) override ;
209- void MakeMetaTable (Block* block,void * data_,int size) override {}
210- void Append (string blockInfo) override ;
211- void * GetRealGpuPtr (const Block* block_) override ;
212- void SwapOut (const Block* block_) override ;
213- void SwapIn (const Block* block_) override ;
203+ void AppendAfterMalloc (Block* block,void * data_ptr,int size) override {}
204+ void Append (string block_info) override ;
205+ void * UpdateGpuPtr (const Block* block_ptr) override ;
214206
215207 private:
216208 void Setup ();
@@ -222,21 +214,21 @@ class CudaGPU : public Device {
222214// / CudaCPU which uses cudaMallocHost to allocate pinned memory for host.
223215
224216// /SwapGPU
225- struct onePieceMsg {
217+ struct DeviceOptInfo {
226218 /*
227- members: [ptr, size, MallocFree , idx]
219+ members: [ptr, size, operation_type , idx]
228220 */
229221 string ptr;
230222 size_t size;
231- int MallocFree ;
223+ int operation_type ;
232224 int idx;
233225 double t;
234- onePieceMsg (string p, size_t s, int M, int i):ptr(p),size(s),MallocFree (M),idx(i){}
226+ DeviceOptInfo (string p, size_t s, int M, int i):ptr(p),size(s),operation_type (M),idx(i){}
235227};
236228
237229struct BlockMeta {
238230 /*
239- block Meta.
231+ meta of swapping memory blocks
240232 */
241233 Block* block_ = nullptr ;
242234 void * data_ = nullptr ;
@@ -249,34 +241,39 @@ struct BlockMeta{
249241};
250242
251243struct SwapBlock {
252-
244+ /*
245+ meta of candidate blocks
246+ */
253247 string ptr;
254- string cat; // A1, A2, A3.. .
248+ string cat; // sub category of the candidate blocks, read-read, write-read, etc .
255249 int name;
256250 size_t size;
251+ // index of last read/write before swap out, and first read/write after swap in
257252 int r_idx; // out idx
258253 int d_idx; // in idx
254+ // index of last read/write before swap out, and first read/write after swap in
259255 double r_time; // out time
260256 double d_time; // in time
261- double dt; // delta t: t2'-t1'
262- double pri; // look at here if big enough TODO(junzhe)
263- double dto; // t2-t1
264- double wdto = 0 ; // t2-t1 weighted by swap_load
265- double r_idx_ready; // r_idx + buffer, could be set during selection.
266- // int free = -1; //when it is freed
267- // below as per planned.
268- int i1 = 0 ;
269- int i1p = 0 ;
270- int i2 = 0 ;
271- int i2p = 0 ;
272- double t1 = 0 ;
273- double t2 = 0 ;
274- double t1p = 0 ;
275- double t2p = 0 ;
276- SwapBlock (string p, size_t s, int i1, int i2, double t1, double t2):
277- ptr (p), size(s), r_idx(i1),d_idx(i2),r_time(t1), d_time(t2) {}
257+ double DOA; // Duation of Absence
258+ double AOA; // Area of Absence
259+ double DOA_origin; // t2-t1, DOA without taking out time spent
260+ double WDOA = 0 ; // weighted DOA
261+ double majority_voting = 0 ;
262+ int r_idx_ready; // r_idx + buffer
263+
264+ // below are index and time for scheduling
265+ int idx_out_start = 0 ;
266+ int idx_out_end = 0 ;
267+ int idx_in_end = 0 ;
268+ int idx_in_start = 0 ;
269+ double t_out_start = 0 ;
270+ double t_out_end = 0 ;
271+ double t_in_end = 0 ;
272+ double t_in_start = 0 ;
273+ SwapBlock (string p, size_t s, int idx_out_start, int idx_in_end, double t_out_start, double t_in_end):
274+ ptr (p), size(s), r_idx(idx_out_start),d_idx(idx_in_end),r_time(t_out_start), d_time(t_in_end) {}
278275};
279- // / Device able to Swap memory between Nvidia GPU and Swap
276+ // / Device able to Swap memory between Nvidia GPU and CPU
280277class SwapGPU : public Device {
281278 public:
282279 ~SwapGPU ();
@@ -300,98 +297,92 @@ class SwapGPU : public Device {
300297 // / Free cpu memory.
301298 void Free (void * ptr) override ;
302299
303- // Append at every index: malloc, free, read, mutable
304- void Append (string blockInfo ) override ;
300+ // Append at every index: free, read, mutable
301+ void Append (string block_info ) override ;
305302
306- // append info after Malloc, pair .
307- void MakeMetaTable (Block* block,void * data_ ,int size) override ;
303+ // append info after Malloc, as Block* is not available till Malloc() done .
304+ void AppendAfterMalloc (Block* block,void * data_ptr ,int size) override ;
308305
309- // all the testing, without swap, during Append()
310- void Test_sched_switch_swap ();
306+ // Detection and Plan
307+ void DetectionPlan ();
311308
312309 // test iteration, return GC
313- int swap_test (vector<string>vec_block,int &maxLen , int &location );
310+ int Detection (vector<string>vec_block,int &iteration_length , int &location_of_2nd_iteration );
314311
315- // entire plan, from swap_select () to swap_sched (), swap_deploy_tables ()
316- void swap_plan ();
312+ // entire plan, from SelectBlock () to Scheduling (), BuildMetaTables ()
313+ void Plan ();
317314
318- // selection algo
319- vector<SwapBlock> swap_select (vector<SwapBlock>vec_swap,vector<double > tempLoad ,double memLimit ,string mode);
315+ // block selection algo
316+ vector<SwapBlock> SelectBlock (vector<SwapBlock>vec_swap,vector<double > temp_load ,double mem_limit ,string mode);
320317
321318 // schedule algo
322- void swap_sched (vector<SwapBlock>&vec_swap_selct, vector<double >&vec_load_temp,double &overhead,double memLimit ,string mode);
319+ void Scheduling (vector<SwapBlock>&vec_swap_selct, vector<double >&vec_load_temp,double &overhead,double mem_limit ,string mode);
323320
324- // make tables Table_sched and Table_meta
325- void swap_construct_tables (vector<SwapBlock>vec_swap_selct);
321+ // make tables table_sched and table_meta
322+ void BuildMetaTables (vector<SwapBlock>vec_swap_selct);
326323
327- // update Table_meta , during Append()
328- void swap_update_tables (Block* tempBlock_ );
324+ // update table_meta , during Append()
325+ void UpdateMetaTables (Block* block_ptr );
329326
330327 // swap/sync during Append()
331328 void DeploySwap ();
332329
333330 // exec DelpoySwap
334- void DeploySwap_exec (int r_gc);
335-
336-
331+ void DeploySwapExec (int relative_counter);
337332
338333 // load profile as per synchronous swap.
339- vector<double > swap_load_ideal (vector<double >vec_load,vector<SwapBlock> vec_swap_selct);
334+ vector<double > GetIdealLoad (vector<double >vec_load,vector<SwapBlock> vec_swap_selct);
340335
341- // in case gpu ptr wrong. TODO(junzhe) to verify if needed.
342- void * GetRealGpuPtr (const Block* block_ ) override ;
336+ // in case gpu ptr wrong, updated it after swap_in ad hoc
337+ void * UpdateGpuPtr (const Block* block_ptr ) override ;
343338
344- void SwapOut (const Block* block_) override ;
345- void SwapIn (const Block* block_) override ;
339+ // Swap Synchronous, for early iterations
340+ void SwapOutSynchronous (const Block* block_ptr);
341+ void SwapInSynchronous (const Block* block_ptr);
346342
347- // changed to intake data_ instead
348- void SwapOut_idx (const int r_idx );
349- void SwapIn_idx (const int r_idx );
343+ // Swap asynchronous, for middle iteraions
344+ void SwapOut (const int idx );
345+ void SwapIn (const int idx );
350346
351347 private:
352348 void Setup ();
353- // /Tables needed
354- // r_idx->BlockMeta
355- map<int ,BlockMeta>Table_meta;
356- map<const Block*,BlockMeta>Table_block_meta; // TODO(junzhe) for measure speed only.
357- map<const Block*, int >Table_not_at_device; // int refers to its r_idx of the block/meta
358- // map<const Block*, size_t>Table_block_size; //Table block_ -> size TODO(junzhe) no need, can call block_->size()
359-
360- // schedule: idx--> r_idx, dir; sync_r_idx,dir. int 0 means D2H, 1 means H2D.
361- map<int ,std::tuple<int ,int ,int ,int >>Table_sched; // changed to with sync_r_idx
362349
363- // vector<SwapBlock>vec_swap_selct_global;
350+ map<int ,BlockMeta>table_meta;
351+ map<const Block*,BlockMeta>table_block_meta; // for measure speed only.
352+ map<const Block*, int >table_not_at_device; // int refers to its r_idx of the block/meta
353+ map<int ,std::tuple<int ,int ,int ,int >>table_sched; // changed to with sync_r_idx
364354
365355 // vec_block
366- vector<string>vec_block; // iteration 0-3
367- vector<string>vec_block_fresh; // iteration 4 5 6
368- vector<string>vec_block_mf; // itr 8 9 10
369- vector<double >global_load; // from begining
370- vector<double >origin_load; // vec_load 3 itr. TODO(junzhe) to delete vec_load, global_load after use.
371- vector<onePieceMsg>vec_run;
372- vector<int >opsSequence; // sequence of operations of one middle iteration
373- vector<size_t >sizeSequence; // size of all operations of one middle iteration
374- int asyncSwapFlag = 0 ; // 0 for sync, 1 for async.
375- int testFlag = 0 ; // 0 means open for test, 1 means no need test anymore.
376- int gc = 0 ; // global counter, index, add 1 after each Malloc/Free/read/write.
377- int globeCounter = -1 ;
378- int maxLen = 0 ;
379- int location = 0 ;
380- int three_more_location = 0 ; // location at 3 more iterations later.
381- int three_more_globeCounter = -1 ; //
382- // design requirement TODO(junzhe)
383- float memLimit_ratio = 0.70 ;
356+ vector<string>vec_block; // iterations for Detection, i.e. detect iterations.
357+ vector<string>vec_block_fresh; // iterations that are used for Planning,
358+ vector<string>vec_block_mf; // iterations used to construct pool
359+ vector<double >global_load; // load from begining
360+ vector<double >origin_load; // 3 iteration load, for planning.
361+ vector<DeviceOptInfo>vec_run;
362+ vector<int >operation_sequence; // sequence of operations of one middle iteration
363+ vector<size_t >size_sequence; // size of all operations of one middle iteration
364+
365+ int async_swap_flag = 0 ; // 0 for sync, 1 for async.
366+ int past_test_flag = 0 ; // 0 means need to test, 1 means no need test anymore.
367+ int global_index = 0 ; // global counter, index, add 1 after each Malloc/Free/read/write.
368+ int global_index_threshold = -1 ;
369+ int iteration_length = 0 ;
370+ int location_of_2nd_iteration = 0 ; // index of start of 2nd iteration
371+ int location_of_5th_iteration = 0 ; // index of start of 5th iteration
372+ int three_more_iteration_global_index_threshold = -1 ;
373+
374+ // design specs
375+ float mem_limit_ratio = 0.70 ;
384376 size_t smallest_block = 1 <<20 ; // 1 MB
385377 int data_buffer = 4 ; // used to control readyIdx
386378 int mutable_data_buffer = 6 ;
387- double maxLoad;
388- int maxIdx;
389- double total_swapInTime = 0 ;
390- double total_swapOutTime = 0 ;
391- double tempTime = 0 ;
392- double tempTime2 = 0 ;
393- double tempTime_baseline; // vec_run[0] time
394- int maxLen_threshold = 1000 ;
379+ double max_load;
380+ int max_idx;
381+ double total_swap_in_time = 0 ;
382+ double total_swap_out_time = 0 ;
383+ double temp_time = 0 ;
384+ double temp_time_baseline; // vec_run[0] time
385+ int iteration_length_threshold = 1000 ;
395386
396387 private:
397388 shared_ptr<DeviceMemPool> pool_;
@@ -447,11 +438,9 @@ class OpenclDevice : public singa::Device {
447438 // / Converts the void pointer into a Buffer object, then deletes the object.
448439 // / This has the effect of freeing up device memory.
449440 void Free (void * ptr) override ;
450- void MakeMetaTable (Block* block,void * data_,int size) override {}
451- void Append (string blockInfo) override {}
452- void * GetRealGpuPtr (const Block* block_) override {}
453- void SwapOut (const Block* block_) override {}
454- void SwapIn (const Block* block_) override {}
441+ void AppendAfterMalloc (Block* block,void * data_ptr,int size) override {}
442+ void Append (string block_info) override {}
443+ void * UpdateGpuPtr (const Block* block_ptr) override {}
455444
456445
457446private:
0 commit comments