Skip to content

Commit 186e588

Browse files
committed
Merge branch '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue
Tony Nguyen says: ==================== igc: Fix PTM timeout Christopher S M Hall says: There have been sporadic reports of PTM timeouts using i225/i226 devices These timeouts have been root caused to: 1) Manipulating the PTM status register while PTM is enabled and triggered 2) The hardware retrying too quickly when an inappropriate response is received from the upstream device The issue can be reproduced with the following: $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to quickly reproduce the issue. PHC2SYS exits with: "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction fails The first patch in this series also resolves an issue reported by Corinna Vinschen relating to kdump: This patch also fixes a hang in igc_probe() when loading the igc driver in the kdump kernel on systems supporting PTM. The igc driver running in the base kernel enables PTM trigger in igc_probe(). Therefore the driver is always in PTM trigger mode, except in brief periods when manually triggering a PTM cycle. When a crash occurs, the NIC is reset while PTM trigger is enabled. Due to a hardware problem, the NIC is subsequently in a bad busmaster state and doesn't handle register reads/writes. When running igc_probe() in the kdump kernel, the first register access to a NIC register hangs driver probing and ultimately breaks kdump. With this patch, igc has PTM trigger disabled most of the time, and the trigger is only enabled for very brief (10 - 100 us) periods when manually triggering a PTM cycle. Chances that a crash occurs during a PTM trigger are not zero, but extremly reduced. * '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue: igc: add lock preventing multiple simultaneous PTM transactions igc: cleanup PTP module if probe fails igc: handle the IGC_PTP_ENABLED flag correctly igc: move ktime snapshot into PTM retry loop igc: increase wait time before retrying PTM igc: fix PTM cycle trigger logic ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 747fb84 + 1a931c4 commit 186e588

File tree

4 files changed

+81
-40
lines changed

4 files changed

+81
-40
lines changed

drivers/net/ethernet/intel/igc/igc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ struct igc_adapter {
319319
struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
320320
ktime_t ptp_reset_start; /* Reset time in clock mono */
321321
struct system_time_snapshot snapshot;
322+
struct mutex ptm_lock; /* Only allow one PTM transaction at a time */
322323

323324
char fw_version[32];
324325

drivers/net/ethernet/intel/igc/igc_defines.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,10 @@
574574
#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2)
575575
#define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8)
576576

577-
#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */
577+
/* A short cycle time of 1us theoretically should work, but appears to be too
578+
* short in practice.
579+
*/
580+
#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */
578581
#define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */
579582
#define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */
580583

@@ -593,6 +596,7 @@
593596
#define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */
594597
#define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */
595598
#define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */
599+
#define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */
596600

597601
/* PCIe PTM Cycle Control */
598602
#define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */

drivers/net/ethernet/intel/igc/igc_main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7231,6 +7231,7 @@ static int igc_probe(struct pci_dev *pdev,
72317231

72327232
err_register:
72337233
igc_release_hw_control(adapter);
7234+
igc_ptp_stop(adapter);
72347235
err_eeprom:
72357236
if (!igc_check_reset_block(hw))
72367237
igc_reset_phy(hw);

drivers/net/ethernet/intel/igc/igc_ptp.c

Lines changed: 74 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -974,45 +974,62 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat)
974974
}
975975
}
976976

977+
/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */
978+
static void igc_ptm_trigger(struct igc_hw *hw)
979+
{
980+
u32 ctrl;
981+
982+
/* To "manually" start the PTM cycle we need to set the
983+
* trigger (TRIG) bit
984+
*/
985+
ctrl = rd32(IGC_PTM_CTRL);
986+
ctrl |= IGC_PTM_CTRL_TRIG;
987+
wr32(IGC_PTM_CTRL, ctrl);
988+
/* Perform flush after write to CTRL register otherwise
989+
* transaction may not start
990+
*/
991+
wrfl();
992+
}
993+
994+
/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */
995+
static void igc_ptm_reset(struct igc_hw *hw)
996+
{
997+
u32 ctrl;
998+
999+
ctrl = rd32(IGC_PTM_CTRL);
1000+
ctrl &= ~IGC_PTM_CTRL_TRIG;
1001+
wr32(IGC_PTM_CTRL, ctrl);
1002+
/* Write to clear all status */
1003+
wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL);
1004+
}
1005+
9771006
static int igc_phc_get_syncdevicetime(ktime_t *device,
9781007
struct system_counterval_t *system,
9791008
void *ctx)
9801009
{
981-
u32 stat, t2_curr_h, t2_curr_l, ctrl;
9821010
struct igc_adapter *adapter = ctx;
9831011
struct igc_hw *hw = &adapter->hw;
1012+
u32 stat, t2_curr_h, t2_curr_l;
9841013
int err, count = 100;
9851014
ktime_t t1, t2_curr;
9861015

987-
/* Get a snapshot of system clocks to use as historic value. */
988-
ktime_get_snapshot(&adapter->snapshot);
989-
1016+
/* Doing this in a loop because in the event of a
1017+
* badly timed (ha!) system clock adjustment, we may
1018+
* get PTM errors from the PCI root, but these errors
1019+
* are transitory. Repeating the process returns valid
1020+
* data eventually.
1021+
*/
9901022
do {
991-
/* Doing this in a loop because in the event of a
992-
* badly timed (ha!) system clock adjustment, we may
993-
* get PTM errors from the PCI root, but these errors
994-
* are transitory. Repeating the process returns valid
995-
* data eventually.
996-
*/
1023+
/* Get a snapshot of system clocks to use as historic value. */
1024+
ktime_get_snapshot(&adapter->snapshot);
9971025

998-
/* To "manually" start the PTM cycle we need to clear and
999-
* then set again the TRIG bit.
1000-
*/
1001-
ctrl = rd32(IGC_PTM_CTRL);
1002-
ctrl &= ~IGC_PTM_CTRL_TRIG;
1003-
wr32(IGC_PTM_CTRL, ctrl);
1004-
ctrl |= IGC_PTM_CTRL_TRIG;
1005-
wr32(IGC_PTM_CTRL, ctrl);
1006-
1007-
/* The cycle only starts "for real" when software notifies
1008-
* that it has read the registers, this is done by setting
1009-
* VALID bit.
1010-
*/
1011-
wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
1026+
igc_ptm_trigger(hw);
10121027

10131028
err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat,
10141029
stat, IGC_PTM_STAT_SLEEP,
10151030
IGC_PTM_STAT_TIMEOUT);
1031+
igc_ptm_reset(hw);
1032+
10161033
if (err < 0) {
10171034
netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
10181035
return err;
@@ -1021,15 +1038,7 @@ static int igc_phc_get_syncdevicetime(ktime_t *device,
10211038
if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID)
10221039
break;
10231040

1024-
if (stat & ~IGC_PTM_STAT_VALID) {
1025-
/* An error occurred, log it. */
1026-
igc_ptm_log_error(adapter, stat);
1027-
/* The STAT register is write-1-to-clear (W1C),
1028-
* so write the previous error status to clear it.
1029-
*/
1030-
wr32(IGC_PTM_STAT, stat);
1031-
continue;
1032-
}
1041+
igc_ptm_log_error(adapter, stat);
10331042
} while (--count);
10341043

10351044
if (!count) {
@@ -1061,9 +1070,16 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp,
10611070
{
10621071
struct igc_adapter *adapter = container_of(ptp, struct igc_adapter,
10631072
ptp_caps);
1073+
int ret;
1074+
1075+
/* This blocks until any in progress PTM transactions complete */
1076+
mutex_lock(&adapter->ptm_lock);
10641077

1065-
return get_device_system_crosststamp(igc_phc_get_syncdevicetime,
1066-
adapter, &adapter->snapshot, cts);
1078+
ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime,
1079+
adapter, &adapter->snapshot, cts);
1080+
mutex_unlock(&adapter->ptm_lock);
1081+
1082+
return ret;
10671083
}
10681084

10691085
static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp,
@@ -1162,6 +1178,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
11621178
spin_lock_init(&adapter->ptp_tx_lock);
11631179
spin_lock_init(&adapter->free_timer_lock);
11641180
spin_lock_init(&adapter->tmreg_lock);
1181+
mutex_init(&adapter->ptm_lock);
11651182

11661183
adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
11671184
adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
@@ -1174,6 +1191,7 @@ void igc_ptp_init(struct igc_adapter *adapter)
11741191
if (IS_ERR(adapter->ptp_clock)) {
11751192
adapter->ptp_clock = NULL;
11761193
netdev_err(netdev, "ptp_clock_register failed\n");
1194+
mutex_destroy(&adapter->ptm_lock);
11771195
} else if (adapter->ptp_clock) {
11781196
netdev_info(netdev, "PHC added\n");
11791197
adapter->ptp_flags |= IGC_PTP_ENABLED;
@@ -1203,10 +1221,12 @@ static void igc_ptm_stop(struct igc_adapter *adapter)
12031221
struct igc_hw *hw = &adapter->hw;
12041222
u32 ctrl;
12051223

1224+
mutex_lock(&adapter->ptm_lock);
12061225
ctrl = rd32(IGC_PTM_CTRL);
12071226
ctrl &= ~IGC_PTM_CTRL_EN;
12081227

12091228
wr32(IGC_PTM_CTRL, ctrl);
1229+
mutex_unlock(&adapter->ptm_lock);
12101230
}
12111231

12121232
/**
@@ -1237,13 +1257,18 @@ void igc_ptp_suspend(struct igc_adapter *adapter)
12371257
**/
12381258
void igc_ptp_stop(struct igc_adapter *adapter)
12391259
{
1260+
if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
1261+
return;
1262+
12401263
igc_ptp_suspend(adapter);
12411264

1265+
adapter->ptp_flags &= ~IGC_PTP_ENABLED;
12421266
if (adapter->ptp_clock) {
12431267
ptp_clock_unregister(adapter->ptp_clock);
12441268
netdev_info(adapter->netdev, "PHC removed\n");
12451269
adapter->ptp_flags &= ~IGC_PTP_ENABLED;
12461270
}
1271+
mutex_destroy(&adapter->ptm_lock);
12471272
}
12481273

12491274
/**
@@ -1255,10 +1280,13 @@ void igc_ptp_stop(struct igc_adapter *adapter)
12551280
void igc_ptp_reset(struct igc_adapter *adapter)
12561281
{
12571282
struct igc_hw *hw = &adapter->hw;
1258-
u32 cycle_ctrl, ctrl;
1283+
u32 cycle_ctrl, ctrl, stat;
12591284
unsigned long flags;
12601285
u32 timadj;
12611286

1287+
if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
1288+
return;
1289+
12621290
/* reset the tstamp_config */
12631291
igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
12641292

@@ -1280,6 +1308,7 @@ void igc_ptp_reset(struct igc_adapter *adapter)
12801308
if (!igc_is_crosststamp_supported(adapter))
12811309
break;
12821310

1311+
mutex_lock(&adapter->ptm_lock);
12831312
wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT);
12841313
wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT);
12851314

@@ -1290,14 +1319,20 @@ void igc_ptp_reset(struct igc_adapter *adapter)
12901319
ctrl = IGC_PTM_CTRL_EN |
12911320
IGC_PTM_CTRL_START_NOW |
12921321
IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) |
1293-
IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) |
1294-
IGC_PTM_CTRL_TRIG;
1322+
IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT);
12951323

12961324
wr32(IGC_PTM_CTRL, ctrl);
12971325

12981326
/* Force the first cycle to run. */
1299-
wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
1327+
igc_ptm_trigger(hw);
1328+
1329+
if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat,
1330+
stat, IGC_PTM_STAT_SLEEP,
1331+
IGC_PTM_STAT_TIMEOUT))
1332+
netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
13001333

1334+
igc_ptm_reset(hw);
1335+
mutex_unlock(&adapter->ptm_lock);
13011336
break;
13021337
default:
13031338
/* No work to do. */

0 commit comments

Comments
 (0)