1515import warnings
1616import math
1717from paddle import _C_ops
18+ from paddle .framework import core
19+ from paddle import in_dynamic_mode
1820
1921def padding_format (padding ):
2022 """
@@ -1484,23 +1486,24 @@ def concat_states(states, bidirectional=False, state_components=1):
14841486 componnets .append (states [i ::state_components ])
14851487 return tuple ([pd .stack (item ) for item in componnets ])
14861488
1489+
14871490class rnnbase (LayerList ):
14881491
14891492 def __init__ (
1490- self ,
1491- mode ,
1492- input_size ,
1493- hidden_size ,
1494- num_layers ,
1495- bias ,
1496- batch_first ,
1497- dropout ,
1498- bidirectional ,
1499- is_train ,
1500- w_ih ,
1501- w_hh ,
1502- b_ih ,
1503- b_hh ,
1493+ self ,
1494+ mode ,
1495+ input_size ,
1496+ hidden_size ,
1497+ num_layers ,
1498+ bias ,
1499+ batch_first ,
1500+ dropout ,
1501+ bidirectional ,
1502+ is_train ,
1503+ w_ih ,
1504+ w_hh ,
1505+ b_ih ,
1506+ b_hh ,
15041507 ):
15051508 super (rnnbase , self ).__init__ ()
15061509 self .mode = mode
@@ -1596,39 +1599,104 @@ def __init__(
15961599 self .flatten_parameters ()
15971600
15981601 def flatten_parameters (self ):
1602+ """
1603+ Resets parameter data pointer to address in continuous memory block for
1604+ cudnn usage.
1605+ """
15991606 if self .could_use_cudnn :
1600- self ._all_weights = self .parameters (include_sublayers = False )
1601- shape = [np .prod (param .shape ) for param in self ._all_weights ]
1607+ # layer.parameters() is depth first and ordered
1608+ # for i in layer: for j in direct: w_ih, w_hh, b_ih, b_hh
1609+ # need to reorganize to cudnn param layout:
1610+ # all bias following all weights
1611+ params = self .parameters (include_sublayers = False )
1612+ shape = [np .prod (param .shape ) for param in params ]
1613+ self ._all_weights = [None ] * len (params )
1614+ for i , param in enumerate (params ):
1615+ base = self .num_layers * self .bidirect
1616+ num = i // base
1617+ odd = num % 2
1618+ offset = (2 * base ) * (num // 2 )
1619+ new_id = (i - num * base ) * 2 + odd + offset
1620+ self ._all_weights [new_id ] = param
1621+ # Wrap using a list to avoid registed into params and saving, maybe
1622+ # need a better way to handle this later. Use `create_parameter` to
1623+ # add both to main_program and startup_program for static-graph.
1624+ # Use Constant initializer to avoid make effect on random generator.
16021625 self ._flat_weight = [
16031626 self .create_parameter (
1604- shape = [np .sum (shape )], dtype = self ._all_weights [0 ].dtype , default_initializer = I .Constant (0.0 )
1605- )
1627+ shape = [np .sum (shape )],
1628+ dtype = params [0 ].dtype ,
1629+ default_initializer = I .Constant (0.0 ))
16061630 ]
1607- self ._dropout_state = self .create_variable (dtype = fluid .core .VarDesc .VarType .UINT8 )
1608- with fluid .program_guard (fluid .default_startup_program (), fluid .default_startup_program ()):
1609- with framework .no_grad ():
1631+ # dropout state may also can be hided and avoid saving
1632+ # should dropout state be persistable for static-graph
1633+ self ._dropout_state = self .create_variable (
1634+ dtype = core .VarDesc .VarType .UINT8 )
1635+ with fluid .program_guard (fluid .default_startup_program (),
1636+ fluid .default_startup_program ()):
1637+ with paddle .no_grad ():
16101638 self ._helper .append_op (
1611- type = "coalesce_tensor" , inputs = {"Input" : self ._all_weights }, outputs = {
1639+ type = "coalesce_tensor" ,
1640+ inputs = {"Input" : self ._all_weights },
1641+ outputs = {
16121642 "Output" : self ._all_weights ,
16131643 "FusedOutput" : self ._flat_weight
1614- }, attrs = {
1644+ },
1645+ attrs = {
16151646 "copy_data" : True ,
16161647 "use_align" : False ,
1617- "dtype" : self ._all_weights [0 ].dtype
1618- }
1619- )
1648+ "dtype" : params [0 ].dtype
1649+ })
16201650
16211651 def _cudnn_impl (self , inputs , initial_states , sequence_length ):
16221652 if not self .time_major :
1623- inputs = pd .tensor .transpose (inputs , [1 , 0 , 2 ])
1624- _ , _ , out , state = _C_ops .rnn (
1625- inputs , initial_states , self ._all_weights , sequence_length ,
1626- self ._dropout_state , self .state_components , 'dropout_prob' ,
1627- self .dropout , 'is_bidirec' , self .bidirect == 2 ,
1628- 'input_size' , self .input_size , 'hidden_size' , self .hidden_size ,
1629- 'num_layers' , self .num_layers , 'mode' , self .mode , 'is_test' ,
1630- not self .training )
1631- out = pd .tensor .transpose (out , [1 , 0 , 2 ]) if not self .time_major else out
1653+ inputs = paddle .tensor .transpose (inputs , [1 , 0 , 2 ])
1654+
1655+ if in_dynamic_mode ():
1656+ _ , _ , out , state = _C_ops .rnn (
1657+ inputs , initial_states , self ._all_weights , sequence_length ,
1658+ self ._dropout_state , self .state_components , 'dropout_prob' ,
1659+ self .dropout , 'is_bidirec' , self .bidirect == 2 ,
1660+ 'input_size' , self .input_size , 'hidden_size' , self .hidden_size ,
1661+ 'num_layers' , self .num_layers , 'mode' , self .mode , 'is_test' ,
1662+ not self .training )
1663+ else :
1664+ out = self ._helper .create_variable_for_type_inference (inputs .dtype )
1665+ state = [
1666+ self ._helper .create_variable_for_type_inference (inputs .dtype )
1667+ for i in range (self .state_components )
1668+ ]
1669+ reserve = self ._helper .create_variable_for_type_inference (
1670+ dtype = core .VarDesc .VarType .UINT8 , stop_gradient = True )
1671+
1672+ inputs = {
1673+ 'Input' : inputs ,
1674+ 'WeightList' : self ._all_weights ,
1675+ 'PreState' : initial_states ,
1676+ 'SequenceLength' : sequence_length
1677+ }
1678+ attrs = {
1679+ 'dropout_prob' : self .dropout ,
1680+ 'is_bidirec' : self .bidirect == 2 ,
1681+ 'input_size' : self .input_size ,
1682+ 'hidden_size' : self .hidden_size ,
1683+ 'num_layers' : self .num_layers ,
1684+ 'mode' : self .mode ,
1685+ 'is_test' : not self .training
1686+ }
1687+
1688+ outputs = {
1689+ 'Out' : out ,
1690+ 'State' : state ,
1691+ 'Reserve' : reserve ,
1692+ 'DropoutState' : self ._dropout_state ,
1693+ }
1694+
1695+ self ._helper .append_op (
1696+ type = "rnn" , inputs = inputs , outputs = outputs , attrs = attrs )
1697+
1698+ out = paddle .tensor .transpose (out ,
1699+ [1 , 0 , 2 ]) if not self .time_major else out
16321700 return out , tuple (state ) if len (state ) > 1 else state [0 ]
16331701
16341702 def check_hidden (self , h , batch_size ):
@@ -1661,9 +1729,13 @@ def forward(self, inputs, initial_states=None):
16611729 self .check_hidden (c , batch_size )
16621730 else :
16631731 self .check_hidden (initial_states , batch_size )
1732+
16641733 if not isinstance (initial_states , (tuple , list )):
1665- initial_states = [initial_states ,]
1666- if self .could_use_cudnn :
1734+ initial_states = [initial_states , ]
1735+
1736+ if self .could_use_cudnn and (
1737+ not paddle .device .is_compiled_with_rocm () or
1738+ sequence_length is None ):
16671739 # Add CPU kernel and dispatch in backend later
16681740 return self ._cudnn_impl (inputs , initial_states , sequence_length )
16691741
@@ -1672,15 +1744,18 @@ def forward(self, inputs, initial_states=None):
16721744
16731745 for i , rnn_layer in enumerate (self ):
16741746 if i > 0 :
1675- inputs = F .dropout (inputs , self .dropout , training = self .training , mode = "upscale_in_train" )
1747+ inputs = F .dropout (
1748+ inputs ,
1749+ self .dropout ,
1750+ training = self .training ,
1751+ mode = "upscale_in_train" )
16761752 outputs , final_state = rnn_layer (inputs , states [i ], sequence_length )
16771753 final_states .append (final_state )
16781754 inputs = outputs
16791755
16801756 final_states = concat_states (final_states , self .bidirect == 2 , self .state_components )
16811757 return outputs , final_states
16821758
1683-
16841759class layernorm (object ):
16851760
16861761 def __init__ (self , normalized_shape , gamma , beta , eps , input_shape ):
0 commit comments