Skip to content

Commit ceeaaed

Browse files
smp-ready msi routing
1 parent fdb3a65 commit ceeaaed

File tree

7 files changed

+361
-94
lines changed

7 files changed

+361
-94
lines changed

include/idt.h

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,25 @@ typedef struct idt_entry_t {
2626

2727
extern volatile idt_ptr_t idt64;
2828

29+
/**
30+
* @brief Function pointer type for interrupt and IRQ handlers.
31+
*
32+
* This defines the signature of an interrupt service routine (ISR) that may be
33+
* registered via `register_interrupt_handler`. It will be called whenever a
34+
* matching interrupt or IRQ occurs.
35+
*
36+
* All handlers receive the interrupt vector number, error code (if any),
37+
* the IRQ number (for hardware IRQs, this is `isrnumber - 32`), and a user-defined
38+
* opaque pointer that was passed during registration. This allows for stateful
39+
* or device-specific behaviour in shared or reentrant interrupt contexts.
40+
*
41+
* @param isrnumber The IDT entry number (0–255), corresponding to the triggered interrupt vector.
42+
* @param errorcode A CPU-generated error code for certain exceptions (e.g. page faults). Always 0 for IRQs.
43+
* @param irqnumber For IRQs, this is typically `isrnumber - 32`; otherwise 0.
44+
* @param opaque The same pointer that was passed to `register_interrupt_handler`; used for context or state.
45+
*/
46+
typedef void (*isr_t)(uint8_t isrnumber, uint64_t errorcode, uint64_t irqnumber, void* opaque);
47+
2948
/**
3049
* @brief Initialise the IDT and enable interrupts.
3150
*
@@ -77,31 +96,38 @@ void pic_eoi(int irq);
7796
void io_wait(void);
7897

7998
/**
80-
* @brief Allocate a free MSI interrupt vector.
99+
* @brief Allocate a free MSI interrupt vector on a given CPU.
81100
*
82101
* Allocates an IDT vector in the range 64–255 for use with
83-
* Message Signalled Interrupts (MSI/MSI-X).
102+
* Message Signalled Interrupts (MSI/MSI-X), associated with
103+
* the specified Local APIC ID.
104+
*
105+
* @param cpu Local APIC ID of the target CPU.
84106
*
85107
* @return Vector number (64–255) on success,
86-
* -1 if no free vector is available.
108+
* -1 if no free vector is available on that CPU.
87109
*
88110
* @note After allocation, the driver must program the device’s
89-
* MSI capability structure and register an interrupt
90-
* handler for the vector.
111+
* MSI/MSI-X capability structure with the returned vector
112+
* and Local APIC ID, and register an interrupt handler
113+
* for the vector on the specified CPU.
91114
*/
92-
int alloc_msi_vector(void);
115+
int alloc_msi_vector(uint8_t cpu);
93116

94117
/**
95-
* @brief Free a previously allocated MSI interrupt vector.
118+
* @brief Free a previously allocated MSI interrupt vector on a given CPU.
96119
*
97-
* Marks the given MSI vector as available for reuse.
120+
* Marks the given MSI vector as available for reuse on the specified
121+
* Local APIC ID.
98122
*
99-
* @param vec The MSI vector to free (64–255).
123+
* @param cpu Local APIC ID of the CPU that the vector belongs to.
124+
* @param vec The MSI vector to free (64–255).
100125
*
101126
* @warning Behaviour is undefined if freeing a vector that
102-
* was never allocated or is still in use.
127+
* was never allocated on the given CPU, or is still in use.
103128
*
104129
* @note Drivers should call this during teardown to avoid
105-
* leaking interrupt vectors.
130+
* leaking interrupt vectors on that CPU.
106131
*/
107-
void free_msi_vector(int vec);
132+
void free_msi_vector(uint8_t cpu, int vec);
133+

include/interrupt.h

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,27 +38,6 @@ enum irq_number_t {
3838
IRQ16 = 50, ///< Local APIC timer vector (used instead of IRQ0 in APIC mode)
3939
};
4040

41-
42-
/**
43-
* @brief Function pointer type for interrupt and IRQ handlers.
44-
*
45-
* This defines the signature of an interrupt service routine (ISR) that may be
46-
* registered via `register_interrupt_handler`. It will be called whenever a
47-
* matching interrupt or IRQ occurs.
48-
*
49-
* All handlers receive the interrupt vector number, error code (if any),
50-
* the IRQ number (for hardware IRQs, this is `isrnumber - 32`), and a user-defined
51-
* opaque pointer that was passed during registration. This allows for stateful
52-
* or device-specific behaviour in shared or reentrant interrupt contexts.
53-
*
54-
* @param isrnumber The IDT entry number (0–255), corresponding to the triggered interrupt vector.
55-
* @param errorcode A CPU-generated error code for certain exceptions (e.g. page faults). Always 0 for IRQs.
56-
* @param irqnumber For IRQs, this is typically `isrnumber - 32`; otherwise 0.
57-
* @param opaque The same pointer that was passed to `register_interrupt_handler`; used for context or state.
58-
*/
59-
typedef void (*isr_t)(uint8_t isrnumber, uint64_t errorcode, uint64_t irqnumber, void* opaque);
60-
61-
6241
/**
6342
* @brief Represents a single handler in a chain of handlers attached to a shared ISR.
6443
*

include/pci.h

Lines changed: 177 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,13 @@ extern pci_dev_t dev_zero;
8787
#define PCI_MSI_EDGETRIGGER (1 << 15)
8888
#define PCI_MSI_ENABLE (1 << 16)
8989

90-
#define PCI_BAR_TYPE_MEMORY 0x00
91-
#define PCI_BAR_TYPE_IOPORT 0x01
90+
#define PCI_CAPABILITY_MSIX 0x11
91+
92+
#define PCI_MSIX_ENABLE (1 << 15)
93+
#define PCI_MSIX_FUNCTION_MASK (1 << 14)
94+
95+
#define PCI_BAR_TYPE_MEMORY 0x00
96+
#define PCI_BAR_TYPE_IOPORT 0x01
9297

9398
#define PCI_HEADER_TYPE_DEVICE 0
9499
#define PCI_HEADER_TYPE_BRIDGE 1
@@ -101,23 +106,187 @@ extern pci_dev_t dev_zero;
101106
#define DEVICE_PER_BUS 32
102107
#define FUNCTION_PER_DEVICE 32
103108

109+
/**
110+
* @brief Read a value from a PCI configuration space field.
111+
* @param dev PCI device descriptor.
112+
* @param field Offset of the configuration field.
113+
* @return The value read (size determined by field).
114+
*/
104115
uint32_t pci_read(pci_dev_t dev, uint32_t field);
116+
117+
/**
118+
* @brief Write a value into a PCI configuration space field.
119+
* @param dev PCI device descriptor.
120+
* @param field Offset of the configuration field.
121+
* @param value Value to write (size determined by field).
122+
*/
105123
void pci_write(pci_dev_t dev, uint32_t field, uint32_t value);
124+
125+
/**
126+
* @brief Get the PCI device class/subclass identifier.
127+
* @param dev PCI device descriptor.
128+
* @return Encoded device type (class << 8 | subclass).
129+
*/
106130
uint32_t get_device_type(pci_dev_t dev);
131+
132+
/**
133+
* @brief Get the secondary bus number from a PCI bridge device.
134+
* @param dev PCI bridge device descriptor.
135+
* @return Secondary bus number.
136+
*/
107137
uint32_t get_secondary_bus(pci_dev_t dev);
138+
139+
/**
140+
* @brief Determine if the device is a single-function endpoint.
141+
* @param dev PCI device descriptor.
142+
* @return Non-zero if endpoint, zero if multifunction/bridge.
143+
*/
108144
uint32_t pci_reach_end(pci_dev_t dev);
109-
pci_dev_t pci_scan_function(uint16_t vendor_id, uint16_t device_id, uint32_t bus, uint32_t device, uint32_t function, int device_type);
110-
pci_dev_t pci_scan_device(uint16_t vendor_id, uint16_t device_id, uint32_t bus, uint32_t device, int device_type);
111-
pci_dev_t pci_scan_bus(uint16_t vendor_id, uint16_t device_id, uint32_t bus, int device_type);
112-
pci_dev_t pci_get_device(uint16_t vendor_id, uint16_t device_id, int device_type);
145+
146+
/**
147+
* @brief Scan a specific PCI function for matching IDs or type.
148+
* @param vendor_id Vendor ID to match (0 = wildcard).
149+
* @param device_id Device ID to match (0 = wildcard).
150+
* @param bus Bus number.
151+
* @param device Device slot number.
152+
* @param function Function number.
153+
* @param device_type Device class/type to match (-1 = any).
154+
* @return Matching PCI device or dev_zero if none.
155+
*/
156+
pci_dev_t pci_scan_function(uint16_t vendor_id, uint16_t device_id,
157+
uint32_t bus, uint32_t device,
158+
uint32_t function, int device_type);
159+
160+
/**
161+
* @brief Scan all functions of a given PCI device slot.
162+
* @param vendor_id Vendor ID to match (0 = wildcard).
163+
* @param device_id Device ID to match (0 = wildcard).
164+
* @param bus Bus number.
165+
* @param device Device slot number.
166+
* @param device_type Device class/type to match (-1 = any).
167+
* @return Matching PCI device or dev_zero if none.
168+
*/
169+
pci_dev_t pci_scan_device(uint16_t vendor_id, uint16_t device_id,
170+
uint32_t bus, uint32_t device,
171+
int device_type);
172+
173+
/**
174+
* @brief Scan all devices on a given PCI bus.
175+
* @param vendor_id Vendor ID to match (0 = wildcard).
176+
* @param device_id Device ID to match (0 = wildcard).
177+
* @param bus Bus number.
178+
* @param device_type Device class/type to match (-1 = any).
179+
* @return First matching PCI device or dev_zero if none.
180+
*/
181+
pci_dev_t pci_scan_bus(uint16_t vendor_id, uint16_t device_id,
182+
uint32_t bus, int device_type);
183+
184+
/**
185+
* @brief Find a device by vendor, device, or type.
186+
* @param vendor_id Vendor ID to match (0 = any).
187+
* @param device_id Device ID to match (0 = any).
188+
* @param device_type Device class/type to match (-1 = any).
189+
* @return Matching PCI device or dev_zero if none.
190+
*/
191+
pci_dev_t pci_get_device(uint16_t vendor_id, uint16_t device_id,
192+
int device_type);
193+
194+
/**
195+
* @brief Initialise the PCI subsystem and enumerate devices.
196+
*/
113197
void init_pci();
198+
199+
/**
200+
* @brief Enable bus mastering on a PCI device.
201+
* @param device PCI device descriptor.
202+
* @return True if bus mastering was newly enabled, false if already set.
203+
*/
114204
bool pci_bus_master(pci_dev_t device);
205+
206+
/**
207+
* @brief Get the type of a PCI Base Address Register (BAR).
208+
* @param field Raw BAR field value.
209+
* @return 0 = memory, 1 = I/O.
210+
*/
115211
uint8_t pci_bar_type(uint32_t field);
212+
213+
/**
214+
* @brief Extract an I/O base address from a BAR.
215+
* @param field Raw BAR field value.
216+
* @return I/O port base address.
217+
*/
116218
uint16_t pci_io_base(uint32_t field);
219+
220+
/**
221+
* @brief Extract a memory base address from a BAR.
222+
* @param field Raw BAR field value.
223+
* @return Memory-mapped base address.
224+
*/
117225
uint32_t pci_mem_base(uint32_t field);
226+
227+
/**
228+
* @brief Test whether a device descriptor represents 'not found'.
229+
* @param device PCI device descriptor.
230+
* @return True if no device, false otherwise.
231+
*/
118232
bool pci_not_found(pci_dev_t device);
119233

120-
bool pci_enable_msi(pci_dev_t device, uint32_t vector);
234+
/**
235+
* @brief Enable MSI (Message Signalled Interrupts) on a device.
236+
* @param device PCI device descriptor.
237+
* @param vector APIC interrupt vector to use.
238+
* @param lapic_id local APIC ID of the CPU to route the vector to
239+
* @return True if MSI was enabled successfully.
240+
*/
241+
bool pci_enable_msi(pci_dev_t device, uint32_t vector, uint32_t lapic_id);
242+
243+
/**
244+
* @brief Display enumerated PCI devices to the system log.
245+
*/
121246
void pci_display_device_list();
247+
248+
/**
249+
* @brief Get a list of all enumerated PCI devices.
250+
* @param list Pointer to array of devices (output).
251+
* @return Number of devices in the list.
252+
*/
122253
size_t pci_get_device_list(pci_dev_t** list);
123-
void pci_interrupt_enable(pci_dev_t device, bool enable);
254+
255+
/**
256+
* @brief Enable or disable legacy INTx interrupt signalling.
257+
* @param device PCI device descriptor.
258+
* @param enable True = enable, False = disable.
259+
*/
260+
void pci_interrupt_enable(pci_dev_t device, bool enable);
261+
262+
/**
263+
* @brief Enable MSI-X (extended message signalled interrupts).
264+
* @param device PCI device descriptor.
265+
* @param vector APIC interrupt vector to assign.
266+
* @param entry MSI-X table entry to configure.
267+
* @param lapic_id local APIC ID of the CPU to route the vector to
268+
* @return True if MSI-X was enabled successfully.
269+
*/
270+
bool pci_enable_msix(pci_dev_t device, uint32_t vector, uint16_t entry, uint32_t lapic_id);
271+
272+
/**
273+
* @brief Configure an interrupt for a PCI device (MSI/MSI-X if available).
274+
*
275+
* Attempts to enable MSI/MSI-X for the given PCI device, routing the
276+
* interrupt to the specified Local APIC ID. If the device does not support
277+
* MSI/MSI-X, falls back to legacy INTx routing.
278+
*
279+
* @param name Human-readable device name (used in logs/debug output).
280+
* @param dev The PCI device handle.
281+
* @param lapic_id Local APIC ID of the target CPU that should receive the
282+
* interrupt.
283+
* @param handler Interrupt service routine to register.
284+
* @param context Opaque pointer passed to the ISR when the interrupt fires.
285+
*
286+
* @return The assigned interrupt vector (64–255 if MSI/MSI-X was used,
287+
* or IRQ_START + line if falling back to legacy INTx).
288+
*
289+
* @note Drivers should always check the returned vector to confirm which
290+
* interrupt mechanism was actually configured.
291+
*/
292+
uint32_t pci_setup_interrupt(const char* name, pci_dev_t dev, uint8_t lapic_id, isr_t handler, void *context);

src/ahci.c

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -594,21 +594,8 @@ void init_ahci()
594594
}
595595

596596
ahci_base = pci_mem_base(ahci_base);
597-
598-
uint32_t irq_num = pci_read(ahci_device, PCI_INTERRUPT_LINE);
599-
uint32_t irq_pin = pci_read(ahci_device, PCI_INTERRUPT_PIN);
600-
uint32_t vector = alloc_msi_vector();
601-
bool msi_ok = pci_enable_msi(ahci_device, vector);
602-
if (msi_ok) {
603-
register_interrupt_handler(vector, ahci_handler, ahci_device, (void *) ahci_base);
604-
kprintf("AHCI: MSI on, INT %d\n", vector);
605-
} else {
606-
free_msi_vector(vector);
607-
register_interrupt_handler(IRQ_START + irq_num, ahci_handler, ahci_device, (void *) ahci_base);
608-
kprintf("AHCI: MSI off, INT %d\n", irq_num);
609-
}
610-
pci_interrupt_enable(ahci_device, true);
611-
dprintf("AHCI base MMIO: %08x INT %d PIN#%c\n", ahci_base, irq_num, irq_pin + 'A' - 1);
597+
uint32_t irq_num = pci_setup_interrupt("AHCI", ahci_device, cpu_id(), ahci_handler, ahci_base);
598+
dprintf("AHCI base MMIO: %08x INT %d\n", ahci_base, irq_num);
612599

613600
probe_port((ahci_hba_mem_t*)ahci_base, ahci_device);
614601
}

src/e1000.c

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -319,17 +319,36 @@ bool e1000_start(pci_dev_t *pci_device) {
319319
}
320320

321321
if (e1000_device_id == E1000_82540EM) {
322-
uint8_t vector = alloc_msi_vector();
323-
bool msi_ok = pci_enable_msi(*pci_device, vector);
324-
if (msi_ok) {
325-
kprintf("e1000: MSI enabled, INT %d\n", vector);
326-
register_interrupt_handler(vector, e1000_handler, *pci_device, NULL);
327-
} else {
328-
free_msi_vector(vector);
329-
uint32_t irq_num = pci_read(*pci_device, PCI_INTERRUPT_LINE);
330-
register_interrupt_handler(IRQ_START + irq_num, e1000_handler, *pci_device, NULL);
331-
}
322+
/* Attempting MSI setup is safe here */
323+
pci_setup_interrupt("e1000", *pci_device, cpu_id(), e1000_handler, NULL);
332324
} else {
325+
/* But not here! The 82541PI actively torpedoes the system if you enable MSI, see its errata:
326+
*
327+
* 82541PI GIGABIT ETHERNET CONTROLLER SPECIFICATION UPDATE
328+
*
329+
* 7. Message Signaled Interrupt Feature May Corrupt Write Transactions
330+
*
331+
* Problem: The problem is with the implementation of the Message Signaled Interrupt (MSI) feature in the Ethernet
332+
* controllers. During MSI writes, the controller incorrectly accesses the write data FIFO. If there are pending write
333+
* transactions when this occurs, these transactions may become corrupted, which may cause the network
334+
* controller to lock up and become unresponsive.
335+
*
336+
* For a normal PCI write transaction, the controller’s PCI logic receives data to be written from an internal FIFO.
337+
* Once the controller is given bus ownership, the PCI logic pulls the data out of this FIFO and performs the write
338+
* transaction.
339+
*
340+
* For systems using MSI writes, the data, which is constant, should be pulled from the controller’s PCI
341+
* Configuration Space rather than the internal FIFO. The affected devices are not pulling this data from PCI
342+
* Configuration Space. Instead, they are pulling data from the internal FIFO.
343+
*
344+
* Implication: If the affected products are used with a future OS that uses Message Signal Interrupts and no accommodations
345+
* are made to mitigate the use of these interrupts, data integrity issues may occur.
346+
*
347+
* Workaround: For PCI systems, advertisement of the MSI capability can be turned off by setting the MSI Disable bit in the
348+
* EEPROM (Init Control Word 2, bit 7).
349+
*
350+
* Status: Intel does not plan to resolve this erratum in the 82541 Gigabit Ethernet controller.
351+
*/
333352
uint32_t irq_num = pci_read(*pci_device, PCI_INTERRUPT_LINE);
334353
register_interrupt_handler(IRQ_START + irq_num, e1000_handler, *pci_device, NULL);
335354

0 commit comments

Comments
 (0)