|
4 | 4 | *
|
5 | 5 | * Copyright Gavin Shan, IBM Corporation 2016.
|
6 | 6 | * Copyright (C) 2025 Raptor Engineering, LLC
|
| 7 | + * Copyright (C) 2025 Raptor Computing Systems, LLC |
7 | 8 | */
|
8 | 9 |
|
9 | 10 | #include <linux/bitfield.h>
|
10 | 11 | #include <linux/libfdt.h>
|
11 | 12 | #include <linux/module.h>
|
12 | 13 | #include <linux/pci.h>
|
| 14 | +#include <linux/delay.h> |
13 | 15 | #include <linux/pci_hotplug.h>
|
14 | 16 | #include <linux/of_fdt.h>
|
15 | 17 |
|
@@ -469,6 +471,61 @@ static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state)
|
469 | 471 | return 0;
|
470 | 472 | }
|
471 | 473 |
|
| 474 | +static int pnv_php_activate_slot(struct pnv_php_slot *php_slot, |
| 475 | + struct hotplug_slot *slot) |
| 476 | +{ |
| 477 | + int ret, i; |
| 478 | + |
| 479 | + /* |
| 480 | + * Issue initial slot activation command to firmware |
| 481 | + * |
| 482 | + * Firmware will power slot on, attempt to train the link, and |
| 483 | + * discover any downstream devices. If this process fails, firmware |
| 484 | + * will return an error code and an invalid device tree. Failure |
| 485 | + * can be caused for multiple reasons, including a faulty |
| 486 | + * downstream device, poor connection to the downstream device, or |
| 487 | + * a previously latched PHB fence. On failure, issue fundamental |
| 488 | + * reset up to three times before aborting. |
| 489 | + */ |
| 490 | + ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); |
| 491 | + if (ret) { |
| 492 | + SLOT_WARN( |
| 493 | + php_slot, |
| 494 | + "PCI slot activation failed with error code %d, possible frozen PHB", |
| 495 | + ret); |
| 496 | + SLOT_WARN( |
| 497 | + php_slot, |
| 498 | + "Attempting complete PHB reset before retrying slot activation\n"); |
| 499 | + for (i = 0; i < 3; i++) { |
| 500 | + /* |
| 501 | + * Slot activation failed, PHB may be fenced from a |
| 502 | + * prior device failure. |
| 503 | + * |
| 504 | + * Use the OPAL fundamental reset call to both try a |
| 505 | + * device reset and clear any potentially active PHB |
| 506 | + * fence / freeze. |
| 507 | + */ |
| 508 | + SLOT_WARN(php_slot, "Try %d...\n", i + 1); |
| 509 | + pci_set_pcie_reset_state(php_slot->pdev, |
| 510 | + pcie_warm_reset); |
| 511 | + msleep(250); |
| 512 | + pci_set_pcie_reset_state(php_slot->pdev, |
| 513 | + pcie_deassert_reset); |
| 514 | + |
| 515 | + ret = pnv_php_set_slot_power_state( |
| 516 | + slot, OPAL_PCI_SLOT_POWER_ON); |
| 517 | + if (!ret) |
| 518 | + break; |
| 519 | + } |
| 520 | + |
| 521 | + if (i >= 3) |
| 522 | + SLOT_WARN(php_slot, |
| 523 | + "Failed to bring slot online, aborting!\n"); |
| 524 | + } |
| 525 | + |
| 526 | + return ret; |
| 527 | +} |
| 528 | + |
472 | 529 | static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
|
473 | 530 | {
|
474 | 531 | struct hotplug_slot *slot = &php_slot->slot;
|
@@ -531,7 +588,7 @@ static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan)
|
531 | 588 | goto scan;
|
532 | 589 |
|
533 | 590 | /* Power is off, turn it on and then scan the slot */
|
534 |
| - ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); |
| 591 | + ret = pnv_php_activate_slot(php_slot, slot); |
535 | 592 | if (ret)
|
536 | 593 | return ret;
|
537 | 594 |
|
@@ -838,16 +895,63 @@ static int pnv_php_enable_msix(struct pnv_php_slot *php_slot)
|
838 | 895 | return entry.vector;
|
839 | 896 | }
|
840 | 897 |
|
| 898 | +static void |
| 899 | +pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot) |
| 900 | +{ |
| 901 | + struct pci_dev *pdev = php_slot->pdev; |
| 902 | + struct eeh_dev *edev; |
| 903 | + struct eeh_pe *pe; |
| 904 | + int i, rc; |
| 905 | + |
| 906 | + /* |
| 907 | + * When a device is surprise removed from a downstream bridge slot, |
| 908 | + * the upstream bridge port can still end up frozen due to related EEH |
| 909 | + * events, which will in turn block the MSI interrupts for slot hotplug |
| 910 | + * detection. |
| 911 | + * |
| 912 | + * Detect and thaw any frozen upstream PE after slot deactivation. |
| 913 | + */ |
| 914 | + edev = pci_dev_to_eeh_dev(pdev); |
| 915 | + pe = edev ? edev->pe : NULL; |
| 916 | + rc = eeh_pe_get_state(pe); |
| 917 | + if ((rc == -ENODEV) || (rc == -ENOENT)) { |
| 918 | + SLOT_WARN( |
| 919 | + php_slot, |
| 920 | + "Upstream bridge PE state unknown, hotplug detect may fail\n"); |
| 921 | + } else { |
| 922 | + if (pe->state & EEH_PE_ISOLATED) { |
| 923 | + SLOT_WARN( |
| 924 | + php_slot, |
| 925 | + "Upstream bridge PE %02x frozen, thawing...\n", |
| 926 | + pe->addr); |
| 927 | + for (i = 0; i < 3; i++) |
| 928 | + if (!eeh_unfreeze_pe(pe)) |
| 929 | + break; |
| 930 | + if (i >= 3) |
| 931 | + SLOT_WARN( |
| 932 | + php_slot, |
| 933 | + "Unable to thaw PE %02x, hotplug detect will fail!\n", |
| 934 | + pe->addr); |
| 935 | + else |
| 936 | + SLOT_WARN(php_slot, |
| 937 | + "PE %02x thawed successfully\n", |
| 938 | + pe->addr); |
| 939 | + } |
| 940 | + } |
| 941 | +} |
| 942 | + |
841 | 943 | static void pnv_php_event_handler(struct work_struct *work)
|
842 | 944 | {
|
843 | 945 | struct pnv_php_event *event =
|
844 | 946 | container_of(work, struct pnv_php_event, work);
|
845 | 947 | struct pnv_php_slot *php_slot = event->php_slot;
|
846 | 948 |
|
847 |
| - if (event->added) |
| 949 | + if (event->added) { |
848 | 950 | pnv_php_enable_slot(&php_slot->slot);
|
849 |
| - else |
| 951 | + } else { |
850 | 952 | pnv_php_disable_slot(&php_slot->slot);
|
| 953 | + pnv_php_detect_clear_suprise_removal_freeze(php_slot); |
| 954 | + } |
851 | 955 |
|
852 | 956 | kfree(event);
|
853 | 957 | }
|
|
0 commit comments