1313import itertools
1414import torch
1515import intel_pytorch_extension as ipex
16+ import contextlib
17+ import io
1618
1719from common_ipex_conf import AutoMixPrecision , AutoDNNL
1820
@@ -1303,6 +1305,33 @@ def test_unsqueeze(self):
13031305 x_dpcpp = x .clone ().to (device = device )
13041306 self .assertEqual (x_dpcpp .unsqueeze (1 ), x .unsqueeze (1 ))
13051307
1308+ with AutoDNNL (True ):
1309+ x = torch .randn (3 , 64 , 64 , dtype = torch .float32 )
1310+ x_xpu = x .clone ().to (device = device )
1311+ conv2d_cpu = torch .nn .Conv2d (3 , 6 , (3 , 3 ))
1312+ conv2d_xpu = copy .deepcopy (conv2d_cpu ).to (device = device )
1313+ x_nchw = x .unsqueeze (0 )
1314+ x_xpu_nchw = x_xpu .unsqueeze (0 )
1315+ self .assertEqual (conv2d_cpu (x_nchw ), conv2d_xpu (x_xpu_nchw ))
1316+
1317+ conv2d_cpu = torch .nn .Conv2d (3 , 1 , (3 , 3 ))
1318+ conv2d_xpu = copy .deepcopy (conv2d_cpu ).to (ipex .DEVICE )
1319+ # reshape the conv2d weight to chw
1320+ conv2d_weight_seq = conv2d_xpu .weight .clone ().squeeze ()
1321+ # reshape the conv2d weight to nchw
1322+ conv2d_weight_unseq = torch .unsqueeze (conv2d_weight_seq , 0 )
1323+
1324+ conv2d_xpu .weight .data = conv2d_weight_unseq
1325+
1326+ a = torch .randn (1 , 3 , 10 , 10 ).to (ipex .DEVICE )
1327+ # Make sure the conv2d_xpu.weight is blocked format
1328+ conv2d_xpu (a )
1329+ # Make sure the unsqueeze does not trigger reorder
1330+ conv2d_weight_unseq = torch .unsqueeze (conv2d_weight_seq , 0 )
1331+ self .assertEqual (conv2d_xpu (a ), conv2d_cpu (a .to ("cpu" )))
1332+
1333+
1334+
13061335class TestSoftMax (TestCase ):
13071336 def test_softmax (self ):
13081337 with AutoDNNL (True ):
@@ -1580,7 +1609,7 @@ def _lstm_params_list(self, cell):
15801609 if cell == "RNN" :
15811610 params_dict ["nonlinearity" ] = ["tanh" ] # ["tanh", "relu"] TODO relu has accuracy issue
15821611 elif cell == "GRU" :
1583- params_dict ["nonlinearity" ] = ["" ]
1612+ params_dict ["nonlinearity" ] = ["" ]
15841613
15851614 params_list = []
15861615
@@ -1592,16 +1621,16 @@ def _test_lstm(self, training):
15921621 rand_seed = int (get_rand_seed ())
15931622 print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
15941623 torch .manual_seed (rand_seed )
1595-
1624+
15961625 params_list = self ._lstm_params_list ("LSTM" )
15971626
15981627 for input_size , hidden_size , num_layers , bidirectional , bias , empty_state , batch_first , dropout , batch_size , seq_len in itertools .product (* params_list ):
15991628 # dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1
16001629 if dropout > 0 and num_layers == 1 :
16011630 continue
1602-
1631+
16031632 num_directions = 2 if bidirectional else 1
1604-
1633+
16051634 if batch_first :
16061635 input = torch .randn (batch_size , seq_len , input_size )
16071636 else :
@@ -1649,7 +1678,7 @@ def _test_lstm(self, training):
16491678 hy_cpu [0 ].sum ().backward (retain_graph = True )
16501679 hy_dpcpp [0 ].sum ().backward (retain_graph = True )
16511680 self .assertEqual (h0_dpcpp .grad .to ('cpu' ), h_cpu .grad )
1652-
1681+
16531682 hy_cpu [1 ].sum ().backward (retain_graph = True )
16541683 hy_dpcpp [1 ].sum ().backward (retain_graph = True )
16551684 self .assertEqual (c0_dpcpp .grad .to ('cpu' ), c_cpu .grad )
@@ -1658,16 +1687,16 @@ def _test_rnn(self, cell, training):
16581687 rand_seed = int (get_rand_seed ())
16591688 print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
16601689 torch .manual_seed (rand_seed )
1661-
1690+
16621691 params_list = self ._lstm_params_list (cell )
16631692
16641693 for input_size , hidden_size , num_layers , bidirectional , bias , empty_state , batch_first , dropout , batch_size , seq_len , nonlinearity in itertools .product (* params_list ):
16651694 # dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1
16661695 if dropout > 0 and num_layers == 1 :
16671696 continue
1668-
1697+
16691698 num_directions = 2 if bidirectional else 1
1670-
1699+
16711700 if batch_first :
16721701 input = torch .randn (batch_size , seq_len , input_size )
16731702 else :
@@ -1683,7 +1712,7 @@ def _test_rnn(self, cell, training):
16831712 model_cpu = torch .nn .RNN (input_size = input_size , hidden_size = hidden_size , num_layers = num_layers , bidirectional = bidirectional , bias = bias , dropout = dropout , batch_first = batch_first , nonlinearity = nonlinearity )
16841713 elif cell == "GRU" :
16851714 model_cpu = torch .nn .GRU (input_size = input_size , hidden_size = hidden_size , num_layers = num_layers , bidirectional = bidirectional , bias = bias , dropout = dropout , batch_first = batch_first )
1686-
1715+
16871716 model_cpu .train () if training else model_cpu .eval ()
16881717
16891718 input_dpcpp = input .clone ().to (device = device ).requires_grad_ (training )
@@ -1720,7 +1749,7 @@ def _test_pack_padded_sequence_lstm(self, training):
17201749 rand_seed = int (get_rand_seed ())
17211750 print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
17221751 torch .manual_seed (rand_seed )
1723-
1752+
17241753 embedding_dim = 1024
17251754 hidden_dim = 10
17261755 batch_size = 24
@@ -1755,7 +1784,7 @@ def _test_pack_padded_sequence_lstm(self, training):
17551784
17561785 lstm_out , hidden_out = lstm (embeds , (hidden_0 , hidden_1 ))
17571786 lstm_out , _ = torch .nn .utils .rnn .pad_packed_sequence (lstm_out , batch_first = True )
1758-
1787+
17591788 with AutoDNNL (True ):
17601789 lstm_out_dpcpp , hidden_out_dpcpp = lstm_dpcpp (embeds_dpcpp , (hidden_0_dpcpp , hidden_1_dpcpp ))
17611790 lstm_out_dpcpp , _ = torch .nn .utils .rnn .pad_packed_sequence (lstm_out_dpcpp , batch_first = True )
@@ -1770,16 +1799,16 @@ def _test_pack_padded_sequence_lstm(self, training):
17701799 self .assertEqual (sentences_dpcpp .grad .to ('cpu' ), sentences .grad )
17711800 self .assertEqual (lstm_dpcpp .weight_ih_l0 .grad .to ('cpu' ), lstm .weight_ih_l0 .grad )
17721801 self .assertEqual (lstm_dpcpp .weight_hh_l0 .grad .to ('cpu' ), lstm .weight_hh_l0 .grad )
1773-
1802+
17741803 self .assertEqual (lstm_dpcpp .bias_ih_l0 .grad .to ('cpu' ), lstm .bias_ih_l0 .grad )
17751804 self .assertEqual (lstm_dpcpp .bias_hh_l0 .grad .to ('cpu' ), lstm .bias_hh_l0 .grad )
1776-
1805+
17771806 self .assertEqual (hidden_0_dpcpp .grad .to ('cpu' ), hidden_0 .grad )
17781807 self .assertEqual (hidden_1_dpcpp .grad .to ('cpu' ), hidden_1 .grad )
17791808
17801809 def test_lstm_inference (self ):
17811810 self ._test_lstm (training = False )
1782-
1811+
17831812 def test_lstm_training (self ):
17841813 self ._test_lstm (training = True )
17851814
@@ -1937,6 +1966,17 @@ def test_upsample_bilinear2d_scale_factor(self):
19371966 y_dpcpp .sum ().backward ()
19381967 self .assertEqual (x_cpu .grad , x_dpcpp .grad )
19391968
1969+ with AutoDNNL (True ):
1970+ x = torch .randn (2 , 2 , 4 , 4 )
1971+ x_cpu = x .clone ().requires_grad_ ()
1972+ x_dpcpp = x .clone ().to (device = device ).requires_grad_ ()
1973+ y_cpu = F .interpolate (x_cpu , scale_factor = [2 , 3 ], mode = 'bilinear' , align_corners = False , recompute_scale_factor = False )
1974+ y_dpcpp = F .interpolate (x_dpcpp , scale_factor = [2 , 3 ], mode = 'bilinear' , align_corners = False , recompute_scale_factor = False )
1975+ self .assertEqual (y_cpu , y_dpcpp )
1976+ y_cpu .sum ().backward ()
1977+ y_dpcpp .sum ().backward ()
1978+ self .assertEqual (x_cpu .grad , x_dpcpp .grad )
1979+
19401980 def test_upsample_bilinear2d_size (self ):
19411981 rand_seed = int (get_rand_seed ())
19421982 print ("{} rand sed: {}" .format (sys ._getframe ().f_code .co_name , rand_seed ))
0 commit comments