@@ -69,12 +69,10 @@ enum {
69
69
MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10 ,
70
70
};
71
71
72
- static struct mlx5_cmd_work_ent * alloc_cmd (struct mlx5_cmd * cmd ,
73
- struct mlx5_cmd_msg * in ,
74
- struct mlx5_cmd_msg * out ,
75
- void * uout , int uout_size ,
76
- mlx5_cmd_cbk_t cbk ,
77
- void * context , int page_queue )
72
+ static struct mlx5_cmd_work_ent *
73
+ cmd_alloc_ent (struct mlx5_cmd * cmd , struct mlx5_cmd_msg * in ,
74
+ struct mlx5_cmd_msg * out , void * uout , int uout_size ,
75
+ mlx5_cmd_cbk_t cbk , void * context , int page_queue )
78
76
{
79
77
gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL ;
80
78
struct mlx5_cmd_work_ent * ent ;
@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
83
81
if (!ent )
84
82
return ERR_PTR (- ENOMEM );
85
83
84
+ ent -> idx = - EINVAL ;
86
85
ent -> in = in ;
87
86
ent -> out = out ;
88
87
ent -> uout = uout ;
@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
91
90
ent -> context = context ;
92
91
ent -> cmd = cmd ;
93
92
ent -> page_queue = page_queue ;
93
+ refcount_set (& ent -> refcnt , 1 );
94
94
95
95
return ent ;
96
96
}
97
97
98
+ static void cmd_free_ent (struct mlx5_cmd_work_ent * ent )
99
+ {
100
+ kfree (ent );
101
+ }
102
+
98
103
static u8 alloc_token (struct mlx5_cmd * cmd )
99
104
{
100
105
u8 token ;
@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
109
114
return token ;
110
115
}
111
116
112
- static int alloc_ent (struct mlx5_cmd * cmd )
117
+ static int cmd_alloc_index (struct mlx5_cmd * cmd )
113
118
{
114
119
unsigned long flags ;
115
120
int ret ;
@@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
123
128
return ret < cmd -> max_reg_cmds ? ret : - ENOMEM ;
124
129
}
125
130
126
- static void free_ent (struct mlx5_cmd * cmd , int idx )
131
+ static void cmd_free_index (struct mlx5_cmd * cmd , int idx )
127
132
{
128
133
unsigned long flags ;
129
134
@@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
132
137
spin_unlock_irqrestore (& cmd -> alloc_lock , flags );
133
138
}
134
139
140
+ static void cmd_ent_get (struct mlx5_cmd_work_ent * ent )
141
+ {
142
+ refcount_inc (& ent -> refcnt );
143
+ }
144
+
145
+ static void cmd_ent_put (struct mlx5_cmd_work_ent * ent )
146
+ {
147
+ if (!refcount_dec_and_test (& ent -> refcnt ))
148
+ return ;
149
+
150
+ if (ent -> idx >= 0 )
151
+ cmd_free_index (ent -> cmd , ent -> idx );
152
+
153
+ cmd_free_ent (ent );
154
+ }
155
+
135
156
static struct mlx5_cmd_layout * get_inst (struct mlx5_cmd * cmd , int idx )
136
157
{
137
158
return cmd -> cmd_buf + (idx << cmd -> log_stride );
@@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
219
240
ent -> ret = - ETIMEDOUT ;
220
241
}
221
242
222
- static void free_cmd (struct mlx5_cmd_work_ent * ent )
223
- {
224
- kfree (ent );
225
- }
226
-
227
243
static int verify_signature (struct mlx5_cmd_work_ent * ent )
228
244
{
229
245
struct mlx5_cmd_mailbox * next = ent -> out -> next ;
@@ -837,11 +853,22 @@ static void cb_timeout_handler(struct work_struct *work)
837
853
struct mlx5_core_dev * dev = container_of (ent -> cmd , struct mlx5_core_dev ,
838
854
cmd );
839
855
856
+ mlx5_cmd_eq_recover (dev );
857
+
858
+ /* Maybe got handled by eq recover ? */
859
+ if (!test_bit (MLX5_CMD_ENT_STATE_PENDING_COMP , & ent -> state )) {
860
+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) Async, recovered after timeout\n" , ent -> idx ,
861
+ mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
862
+ goto out ; /* phew, already handled */
863
+ }
864
+
840
865
ent -> ret = - ETIMEDOUT ;
841
- mlx5_core_warn (dev , "%s(0x%x) timeout. Will cause a leak of a command resource\n" ,
842
- mlx5_command_str (msg_to_opcode (ent -> in )),
843
- msg_to_opcode (ent -> in ));
866
+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n" ,
867
+ ent -> idx , mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
844
868
mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
869
+
870
+ out :
871
+ cmd_ent_put (ent ); /* for the cmd_ent_get() took on schedule delayed work */
845
872
}
846
873
847
874
static void free_msg (struct mlx5_core_dev * dev , struct mlx5_cmd_msg * msg );
@@ -856,6 +883,32 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
856
883
return cmd -> allowed_opcode == opcode ;
857
884
}
858
885
886
+ static int cmd_alloc_index_retry (struct mlx5_cmd * cmd )
887
+ {
888
+ unsigned long alloc_end = jiffies + msecs_to_jiffies (1000 );
889
+ int idx ;
890
+
891
+ retry :
892
+ idx = cmd_alloc_index (cmd );
893
+ if (idx < 0 && time_before (jiffies , alloc_end )) {
894
+ /* Index allocation can fail on heavy load of commands. This is a temporary
895
+ * situation as the current command already holds the semaphore, meaning that
896
+ * another command completion is being handled and it is expected to release
897
+ * the entry index soon.
898
+ */
899
+ cpu_relax ();
900
+ goto retry ;
901
+ }
902
+ return idx ;
903
+ }
904
+
905
+ bool mlx5_cmd_is_down (struct mlx5_core_dev * dev )
906
+ {
907
+ return pci_channel_offline (dev -> pdev ) ||
908
+ dev -> cmd .state != MLX5_CMDIF_STATE_UP ||
909
+ dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR ;
910
+ }
911
+
859
912
static void cmd_work_handler (struct work_struct * work )
860
913
{
861
914
struct mlx5_cmd_work_ent * ent = container_of (work , struct mlx5_cmd_work_ent , work );
@@ -873,14 +926,14 @@ static void cmd_work_handler(struct work_struct *work)
873
926
sem = ent -> page_queue ? & cmd -> pages_sem : & cmd -> sem ;
874
927
down (sem );
875
928
if (!ent -> page_queue ) {
876
- alloc_ret = alloc_ent (cmd );
929
+ alloc_ret = cmd_alloc_index_retry (cmd );
877
930
if (alloc_ret < 0 ) {
878
931
mlx5_core_err_rl (dev , "failed to allocate command entry\n" );
879
932
if (ent -> callback ) {
880
933
ent -> callback (- EAGAIN , ent -> context );
881
934
mlx5_free_cmd_msg (dev , ent -> out );
882
935
free_msg (dev , ent -> in );
883
- free_cmd (ent );
936
+ cmd_ent_put (ent );
884
937
} else {
885
938
ent -> ret = - EAGAIN ;
886
939
complete (& ent -> done );
@@ -916,15 +969,12 @@ static void cmd_work_handler(struct work_struct *work)
916
969
ent -> ts1 = ktime_get_ns ();
917
970
cmd_mode = cmd -> mode ;
918
971
919
- if (ent -> callback )
920
- schedule_delayed_work ( & ent -> cb_timeout_work , cb_timeout );
972
+ if (ent -> callback && schedule_delayed_work ( & ent -> cb_timeout_work , cb_timeout ) )
973
+ cmd_ent_get ( ent );
921
974
set_bit (MLX5_CMD_ENT_STATE_PENDING_COMP , & ent -> state );
922
975
923
976
/* Skip sending command to fw if internal error */
924
- if (pci_channel_offline (dev -> pdev ) ||
925
- dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
926
- cmd -> state != MLX5_CMDIF_STATE_UP ||
927
- !opcode_allowed (& dev -> cmd , ent -> op )) {
977
+ if (mlx5_cmd_is_down (dev ) || !opcode_allowed (& dev -> cmd , ent -> op )) {
928
978
u8 status = 0 ;
929
979
u32 drv_synd ;
930
980
@@ -933,13 +983,10 @@ static void cmd_work_handler(struct work_struct *work)
933
983
MLX5_SET (mbox_out , ent -> out , syndrome , drv_synd );
934
984
935
985
mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
936
- /* no doorbell, no need to keep the entry */
937
- free_ent (cmd , ent -> idx );
938
- if (ent -> callback )
939
- free_cmd (ent );
940
986
return ;
941
987
}
942
988
989
+ cmd_ent_get (ent ); /* for the _real_ FW event on completion */
943
990
/* ring doorbell after the descriptor is valid */
944
991
mlx5_core_dbg (dev , "writing 0x%x to command doorbell\n" , 1 << ent -> idx );
945
992
wmb ();
@@ -983,6 +1030,35 @@ static const char *deliv_status_to_str(u8 status)
983
1030
}
984
1031
}
985
1032
1033
+ enum {
1034
+ MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000 ,
1035
+ };
1036
+
1037
+ static void wait_func_handle_exec_timeout (struct mlx5_core_dev * dev ,
1038
+ struct mlx5_cmd_work_ent * ent )
1039
+ {
1040
+ unsigned long timeout = msecs_to_jiffies (MLX5_CMD_TIMEOUT_RECOVER_MSEC );
1041
+
1042
+ mlx5_cmd_eq_recover (dev );
1043
+
1044
+ /* Re-wait on the ent->done after executing the recovery flow. If the
1045
+ * recovery flow (or any other recovery flow running simultaneously)
1046
+ * has recovered an EQE, it should cause the entry to be completed by
1047
+ * the command interface.
1048
+ */
1049
+ if (wait_for_completion_timeout (& ent -> done , timeout )) {
1050
+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) recovered after timeout\n" , ent -> idx ,
1051
+ mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
1052
+ return ;
1053
+ }
1054
+
1055
+ mlx5_core_warn (dev , "cmd[%d]: %s(0x%x) No done completion\n" , ent -> idx ,
1056
+ mlx5_command_str (msg_to_opcode (ent -> in )), msg_to_opcode (ent -> in ));
1057
+
1058
+ ent -> ret = - ETIMEDOUT ;
1059
+ mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
1060
+ }
1061
+
986
1062
static int wait_func (struct mlx5_core_dev * dev , struct mlx5_cmd_work_ent * ent )
987
1063
{
988
1064
unsigned long timeout = msecs_to_jiffies (MLX5_CMD_TIMEOUT_MSEC );
@@ -994,12 +1070,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
994
1070
ent -> ret = - ECANCELED ;
995
1071
goto out_err ;
996
1072
}
997
- if (cmd -> mode == CMD_MODE_POLLING || ent -> polling ) {
1073
+ if (cmd -> mode == CMD_MODE_POLLING || ent -> polling )
998
1074
wait_for_completion (& ent -> done );
999
- } else if (!wait_for_completion_timeout (& ent -> done , timeout )) {
1000
- ent -> ret = - ETIMEDOUT ;
1001
- mlx5_cmd_comp_handler (dev , 1UL << ent -> idx , true);
1002
- }
1075
+ else if (!wait_for_completion_timeout (& ent -> done , timeout ))
1076
+ wait_func_handle_exec_timeout (dev , ent );
1003
1077
1004
1078
out_err :
1005
1079
err = ent -> ret ;
@@ -1039,11 +1113,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
1039
1113
if (callback && page_queue )
1040
1114
return - EINVAL ;
1041
1115
1042
- ent = alloc_cmd (cmd , in , out , uout , uout_size , callback , context ,
1043
- page_queue );
1116
+ ent = cmd_alloc_ent (cmd , in , out , uout , uout_size ,
1117
+ callback , context , page_queue );
1044
1118
if (IS_ERR (ent ))
1045
1119
return PTR_ERR (ent );
1046
1120
1121
+ /* put for this ent is when consumed, depending on the use case
1122
+ * 1) (!callback) blocking flow: by caller after wait_func completes
1123
+ * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
1124
+ */
1125
+
1047
1126
ent -> token = token ;
1048
1127
ent -> polling = force_polling ;
1049
1128
@@ -1062,12 +1141,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
1062
1141
}
1063
1142
1064
1143
if (callback )
1065
- goto out ;
1144
+ goto out ; /* mlx5_cmd_comp_handler() will put(ent) */
1066
1145
1067
1146
err = wait_func (dev , ent );
1068
- if (err == - ETIMEDOUT )
1069
- goto out ;
1070
- if (err == - ECANCELED )
1147
+ if (err == - ETIMEDOUT || err == - ECANCELED )
1071
1148
goto out_free ;
1072
1149
1073
1150
ds = ent -> ts2 - ent -> ts1 ;
@@ -1085,7 +1162,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
1085
1162
* status = ent -> status ;
1086
1163
1087
1164
out_free :
1088
- free_cmd (ent );
1165
+ cmd_ent_put (ent );
1089
1166
out :
1090
1167
return err ;
1091
1168
}
@@ -1516,14 +1593,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
1516
1593
if (!forced ) {
1517
1594
mlx5_core_err (dev , "Command completion arrived after timeout (entry idx = %d).\n" ,
1518
1595
ent -> idx );
1519
- free_ent (cmd , ent -> idx );
1520
- free_cmd (ent );
1596
+ cmd_ent_put (ent );
1521
1597
}
1522
1598
continue ;
1523
1599
}
1524
1600
1525
- if (ent -> callback )
1526
- cancel_delayed_work (& ent -> cb_timeout_work );
1601
+ if (ent -> callback && cancel_delayed_work (& ent -> cb_timeout_work ))
1602
+ cmd_ent_put (ent ); /* timeout work was canceled */
1603
+
1604
+ if (!forced || /* Real FW completion */
1605
+ pci_channel_offline (dev -> pdev ) || /* FW is inaccessible */
1606
+ dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR )
1607
+ cmd_ent_put (ent );
1608
+
1527
1609
if (ent -> page_queue )
1528
1610
sem = & cmd -> pages_sem ;
1529
1611
else
@@ -1545,10 +1627,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
1545
1627
ent -> ret , deliv_status_to_str (ent -> status ), ent -> status );
1546
1628
}
1547
1629
1548
- /* only real completion will free the entry slot */
1549
- if (!forced )
1550
- free_ent (cmd , ent -> idx );
1551
-
1552
1630
if (ent -> callback ) {
1553
1631
ds = ent -> ts2 - ent -> ts1 ;
1554
1632
if (ent -> op < MLX5_CMD_OP_MAX ) {
@@ -1576,10 +1654,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
1576
1654
free_msg (dev , ent -> in );
1577
1655
1578
1656
err = err ? err : ent -> status ;
1579
- if (! forced )
1580
- free_cmd (ent );
1657
+ /* final consumer is done, release ent */
1658
+ cmd_ent_put (ent );
1581
1659
callback (err , context );
1582
1660
} else {
1661
+ /* release wait_func() so mlx5_cmd_invoke()
1662
+ * can make the final ent_put()
1663
+ */
1583
1664
complete (& ent -> done );
1584
1665
}
1585
1666
up (sem );
@@ -1589,8 +1670,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
1589
1670
1590
1671
void mlx5_cmd_trigger_completions (struct mlx5_core_dev * dev )
1591
1672
{
1673
+ struct mlx5_cmd * cmd = & dev -> cmd ;
1674
+ unsigned long bitmask ;
1592
1675
unsigned long flags ;
1593
1676
u64 vector ;
1677
+ int i ;
1594
1678
1595
1679
/* wait for pending handlers to complete */
1596
1680
mlx5_eq_synchronize_cmd_irq (dev );
@@ -1599,11 +1683,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
1599
1683
if (!vector )
1600
1684
goto no_trig ;
1601
1685
1686
+ bitmask = vector ;
1687
+ /* we must increment the allocated entries refcount before triggering the completions
1688
+ * to guarantee pending commands will not get freed in the meanwhile.
1689
+ * For that reason, it also has to be done inside the alloc_lock.
1690
+ */
1691
+ for_each_set_bit (i , & bitmask , (1 << cmd -> log_sz ))
1692
+ cmd_ent_get (cmd -> ent_arr [i ]);
1602
1693
vector |= MLX5_TRIGGERED_CMD_COMP ;
1603
1694
spin_unlock_irqrestore (& dev -> cmd .alloc_lock , flags );
1604
1695
1605
1696
mlx5_core_dbg (dev , "vector 0x%llx\n" , vector );
1606
1697
mlx5_cmd_comp_handler (dev , vector , true);
1698
+ for_each_set_bit (i , & bitmask , (1 << cmd -> log_sz ))
1699
+ cmd_ent_put (cmd -> ent_arr [i ]);
1607
1700
return ;
1608
1701
1609
1702
no_trig :
@@ -1711,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
1711
1804
u8 token ;
1712
1805
1713
1806
opcode = MLX5_GET (mbox_in , in , opcode );
1714
- if (pci_channel_offline (dev -> pdev ) ||
1715
- dev -> state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
1716
- dev -> cmd .state != MLX5_CMDIF_STATE_UP ||
1717
- !opcode_allowed (& dev -> cmd , opcode )) {
1807
+ if (mlx5_cmd_is_down (dev ) || !opcode_allowed (& dev -> cmd , opcode )) {
1718
1808
err = mlx5_internal_err_ret_value (dev , opcode , & drv_synd , & status );
1719
1809
MLX5_SET (mbox_out , out , status , status );
1720
1810
MLX5_SET (mbox_out , out , syndrome , drv_synd );
0 commit comments