@@ -86,16 +86,16 @@ namespace dl_infra {
8686 }
8787
8888 ConvLayer::ConvLayer (WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
89- Timer* timer, TensorMgr* tensor_mgr, engine eng, stream s,
89+ Timer* timer, TensorMgr* tensor_mgr, engine * eng, stream * s,
9090 int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[]): workloadParams_(workloadParams) {
9191
9292 Tracer::func_begin (" ConvLayer::ConvLayer" );
9393
9494 index_in_network_ = index_in_network;
9595 total_layers_in_nw_ = total_layers_in_nw;
9696 timer_ = timer;
97- eng_ = std::move ( eng) ;
98- s_ = std::move (s) ;
97+ eng_ = eng;
98+ s_ = s ;
9999 tensor_mgr_ = tensor_mgr;
100100
101101 input_tensor_dims_ = input_tensor_dims;
@@ -106,9 +106,9 @@ namespace dl_infra {
106106 }
107107
108108 ConvLayer::ConvLayer (WorkloadParams* workloadParams, int index_in_network, int total_layers_in_nw,
109- Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine eng, stream s,
109+ Timer* timer, TensorMgr* tensor_mgr, IConvLayer* nextConvLayer, engine * eng, stream * s,
110110 int input_tensor_dims[], int filter_tensor_dims[], int output_tensor_dims[])
111- : ConvLayer(workloadParams, index_in_network, total_layers_in_nw, timer, tensor_mgr, std::move( eng), std::move(s) , input_tensor_dims, filter_tensor_dims, output_tensor_dims) {
111+ : ConvLayer(workloadParams, index_in_network, total_layers_in_nw, timer, tensor_mgr, eng, s , input_tensor_dims, filter_tensor_dims, output_tensor_dims) {
112112 nextConvLayer_ = nextConvLayer;
113113 };
114114
@@ -132,7 +132,7 @@ namespace dl_infra {
132132#ifdef DEVICE_TIMER
133133 Time start = get_time_now ();
134134#endif
135- conv_pd = convolution_forward::primitive_desc (eng_,
135+ conv_pd = convolution_forward::primitive_desc (* eng_,
136136 prop_kind::forward_inference, algo,
137137 tensor_mgr_->getTensorBagAt (index_in_network_)->conv_src_md ,
138138 tensor_mgr_->getTensorBagAt (index_in_network_)->conv_weights_md ,
@@ -151,10 +151,33 @@ namespace dl_infra {
151151 timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " CONV_FORWARD CREATION" );
152152#endif
153153 createWorkspace ();
154-
154+ reorderWeightsIfRequired ();
155+
155156 Tracer::func_end (" ConvLayer::initialize" );
156157 }
157158
159+ void ConvLayer::reorderWeightsIfRequired () {
160+ need_reorder_weights_ = conv_pd.weights_desc () != tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ .get_desc ();
161+ // if(need_reorder_weights_)
162+ // std::cout << "need_reorder_weights_" << std::endl;
163+ auto conv_weights_mem = need_reorder_weights_ ? memory (conv_pd.weights_desc (), *eng_) : tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ ;
164+
165+ if (need_reorder_weights_) {
166+ #ifdef DEVICE_TIMER
167+ start = get_time_now ();
168+ #endif
169+ auto reorder_weights = reorder (tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ , conv_weights_mem);
170+ reorder_weights.execute (*s_,
171+ {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ },
172+ {DNNL_ARG_TO, conv_weights_mem}});
173+ s_->wait (); // wait for the reorder to complete
174+ tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ = conv_weights_mem;
175+ #ifdef DEVICE_TIMER
176+ timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " REORDER WEIGHTS" );
177+ #endif
178+ }
179+ }
180+
158181 void ConvLayer::doIOTensorAndWSAllocs () {
159182 Tracer::func_begin (" ConvLayer::doTensorAndWSAllocs" );
160183
@@ -167,7 +190,7 @@ namespace dl_infra {
167190#ifdef DEVICE_TIMER
168191 Time start = get_time_now ();
169192#endif
170- auto sycl_queue = dnnl::sycl_interop::get_queue (dnnl::stream (eng_));
193+ auto sycl_queue = dnnl::sycl_interop::get_queue (dnnl::stream (* eng_));
171194 sycl::free (tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ .get_data_handle (), sycl_queue);
172195#ifdef DEVICE_TIMER
173196 timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " FREE_INPUT_DEV_PTR" );
@@ -194,7 +217,7 @@ namespace dl_infra {
194217#ifdef DEVICE_TIMER
195218 Time start = get_time_now ();
196219#endif
197- conv_scratchpad_mem_ = memory (conv_pd.scratchpad_desc (), eng_);
220+ conv_scratchpad_mem_ = memory (conv_pd.scratchpad_desc (), * eng_);
198221#ifdef DEVICE_TIMER
199222 timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " MEMALLOC_SCRATCHPAD_DEV_MEM" );
200223#endif
@@ -225,6 +248,8 @@ namespace dl_infra {
225248 need_reorder_src_ = conv_pd.src_desc () != tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ .get_desc ();
226249
227250 // need_reorder_weights_ = conv_pd.weights_desc() != tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_.get_desc();
251+ // if(need_reorder_weights_)
252+ // std::cout << "need_reorder_weights_" << std::endl;
228253
229254 if (index_in_network_ == total_layers_in_nw_-1 ) {
230255 need_reorder_dst_ = conv_pd.dst_desc () != tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ .get_desc ();
@@ -239,13 +264,14 @@ namespace dl_infra {
239264#ifdef DEVICE_TIMER
240265 start = get_time_now ();
241266#endif
242- auto conv_src_mem = need_reorder_src_ ? memory (conv_pd.src_desc (), eng_) : tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ ;
243- // auto conv_weights_mem = need_reorder_weights_ ? memory(conv_pd.weights_desc(), eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
267+ auto conv_src_mem = need_reorder_src_ ? memory (conv_pd.src_desc (), *eng_) : tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ ;
268+ // auto conv_weights_mem = need_reorder_weights_ ? memory(conv_pd.weights_desc(), *eng_) : tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
269+ auto conv_weights_mem = tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ ;
244270
245271 // in this workload we will forego reordering of weights
246272 // we will assume that the pre-trained weights have been created in the memory format as determined by conv_pd.weights_desc()
247- auto conv_weights_mem = tensor_mgr_->getTensorBagAt (index_in_network_)->weights_mem_ ;
248- auto conv_dst_mem = memory (conv_pd.dst_desc (), eng_, tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ .get_data_handle ());
273+ // auto conv_weights_mem = tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_;
274+ auto conv_dst_mem = memory (conv_pd.dst_desc (), * eng_, tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ .get_data_handle ());
249275 tensor_mgr_->getTensorBagAt (index_in_network_)->dst_mem_ = conv_dst_mem;
250276#ifdef DEVICE_TIMER
251277 timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " REORDERED MEM CREATE" );
@@ -257,8 +283,8 @@ namespace dl_infra {
257283#endif
258284 auto reorder_src = reorder (tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ , conv_src_mem);
259285 reorder_src.execute (
260- s_, {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ }, {DNNL_ARG_TO, conv_src_mem}});
261- s_. wait (); // wait for the reorder to complete
286+ * s_, {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt (index_in_network_)->src_mem_ }, {DNNL_ARG_TO, conv_src_mem}});
287+ s_-> wait (); // wait for the reorder to complete
262288#ifdef DEVICE_TIMER
263289 timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " REORDER SRC" );
264290#endif
@@ -267,10 +293,10 @@ namespace dl_infra {
267293 // if (need_reorder_weights_) {
268294 // //start = get_time_now();
269295 // auto reorder_weights = reorder(tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_, conv_weights_mem);
270- // reorder_weights.execute(s_,
296+ // reorder_weights.execute(* s_,
271297 // {{DNNL_ARG_FROM, tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_},
272298 // {DNNL_ARG_TO, conv_weights_mem}});
273- // s_. wait(); // wait for the reorder to complete
299+ // s_-> wait(); // wait for the reorder to complete
274300 // timer_->recordOpTimeTaken(index_in_network_, calculate_op_time_taken(start), "REORDER WEIGHTS");
275301 // }
276302 // }
@@ -281,10 +307,10 @@ namespace dl_infra {
281307 // conv_.execute(s_,
282308 // {{DNNL_ARG_SRC, tensor_mgr_->getTensorBagAt(index_in_network_)->src_mem_}, {DNNL_ARG_WEIGHTS, tensor_mgr_->getTensorBagAt(index_in_network_)->weights_mem_},
283309 // {DNNL_ARG_DST, tensor_mgr_->getTensorBagAt(index_in_network_)->dst_mem_}});
284- conv_.execute (s_,
310+ conv_.execute (* s_,
285311 {{DNNL_ARG_SRC, conv_src_mem}, {DNNL_ARG_WEIGHTS, conv_weights_mem},
286312 {DNNL_ARG_DST, conv_dst_mem}});
287- s_. wait ();
313+ s_-> wait ();
288314#ifdef DEVICE_TIMER
289315 timer_->recordOpTimeTaken (index_in_network_, calculate_op_time_taken (start), " CONV_FORWARD EXECUTION" );
290316#endif
0 commit comments