Skip to content

Commit 5237658

Browse files
committed
580.94.10
1 parent e2dbb3d commit 5237658

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+700
-273
lines changed

README.md

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# NVIDIA Linux Open GPU Kernel Module Source
22

33
This is the source release of the NVIDIA Linux open GPU kernel modules,
4-
version 580.94.06.
4+
version 580.94.10.
55

66

77
## How to Build
@@ -17,7 +17,7 @@ as root:
1717

1818
Note that the kernel modules built here must be used with GSP
1919
firmware and user-space NVIDIA GPU driver components from a corresponding
20-
580.94.06 driver release. This can be achieved by installing
20+
580.94.10 driver release. This can be achieved by installing
2121
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
2222
option. E.g.,
2323

@@ -185,7 +185,7 @@ table below).
185185
For details on feature support and limitations, see the NVIDIA GPU driver
186186
end user README here:
187187

188-
https://us.download.nvidia.com/XFree86/Linux-x86_64/580.94.06/README/kernel_open.html
188+
https://us.download.nvidia.com/XFree86/Linux-x86_64/580.94.10/README/kernel_open.html
189189

190190
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
191191
Package for more details.
@@ -749,6 +749,7 @@ Subsystem Device ID.
749749
| NVIDIA A10 | 2236 10DE 1482 |
750750
| NVIDIA A10G | 2237 10DE 152F |
751751
| NVIDIA A10M | 2238 10DE 1677 |
752+
| NVIDIA H20 NVL16 | 230E 10DE 20DF |
752753
| NVIDIA H100 NVL | 2321 10DE 1839 |
753754
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
754755
| NVIDIA H800 | 2324 10DE 17A6 |
@@ -949,9 +950,10 @@ Subsystem Device ID.
949950
| NVIDIA GB200 | 2941 10DE 20D5 |
950951
| NVIDIA GB200 | 2941 10DE 21C9 |
951952
| NVIDIA GB200 | 2941 10DE 21CA |
953+
| NVIDIA DRIVE P2021 | 29BB 10DE 207C |
952954
| NVIDIA GeForce RTX 5090 | 2B85 |
953955
| NVIDIA GeForce RTX 5090 D | 2B87 |
954-
| NVIDIA GeForce RTX 5090 D v2 | 2B8C 17AA 530C |
956+
| NVIDIA GeForce RTX 5090 D v2 | 2B8C |
955957
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 1028 204B |
956958
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 103C 204B |
957959
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B |
@@ -964,6 +966,8 @@ Subsystem Device ID.
964966
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C |
965967
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C |
966968
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 17AA 204C |
969+
| NVIDIA RTX PRO 6000 Blackwell Server Edition | 2BB5 10DE 204E |
970+
| NVIDIA RTX 6000D | 2BB9 10DE 2091 |
967971
| NVIDIA GeForce RTX 5080 | 2C02 |
968972
| NVIDIA GeForce RTX 5070 Ti | 2C05 |
969973
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C18 |
@@ -974,6 +978,7 @@ Subsystem Device ID.
974978
| NVIDIA RTX PRO 4500 Blackwell | 2C31 17AA 2051 |
975979
| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 1028 2053 |
976980
| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 103C 2053 |
981+
| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 10DE 2053 |
977982
| NVIDIA RTX PRO 4000 Blackwell SFF Edition | 2C33 17AA 2053 |
978983
| NVIDIA RTX PRO 4000 Blackwell | 2C34 1028 2052 |
979984
| NVIDIA RTX PRO 4000 Blackwell | 2C34 103C 2052 |
@@ -983,22 +988,29 @@ Subsystem Device ID.
983988
| NVIDIA RTX PRO 4000 Blackwell Generation Laptop GPU | 2C39 |
984989
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C58 |
985990
| NVIDIA GeForce RTX 5080 Laptop GPU | 2C59 |
991+
| NVIDIA RTX PRO 5000 Blackwell Embedded GPU | 2C77 |
992+
| NVIDIA RTX PRO 4000 Blackwell Embedded GPU | 2C79 |
986993
| NVIDIA GeForce RTX 5060 Ti | 2D04 |
987994
| NVIDIA GeForce RTX 5060 | 2D05 |
988995
| NVIDIA GeForce RTX 5070 Laptop GPU | 2D18 |
989996
| NVIDIA GeForce RTX 5060 Laptop GPU | 2D19 |
990997
| NVIDIA RTX PRO 2000 Blackwell | 2D30 1028 2054 |
991998
| NVIDIA RTX PRO 2000 Blackwell | 2D30 103C 2054 |
999+
| NVIDIA RTX PRO 2000 Blackwell | 2D30 10DE 2054 |
9921000
| NVIDIA RTX PRO 2000 Blackwell | 2D30 17AA 2054 |
9931001
| NVIDIA RTX PRO 2000 Blackwell Generation Laptop GPU | 2D39 |
9941002
| NVIDIA GeForce RTX 5070 Laptop GPU | 2D58 |
9951003
| NVIDIA GeForce RTX 5060 Laptop GPU | 2D59 |
996-
| NVIDIA GeForce RTX 5050 | 2D83 17AA C791 |
1004+
| NVIDIA RTX PRO 2000 Blackwell Embedded GPU | 2D79 |
1005+
| NVIDIA GeForce RTX 5050 | 2D83 |
9971006
| NVIDIA GeForce RTX 5050 Laptop GPU | 2D98 |
9981007
| NVIDIA RTX PRO 1000 Blackwell Generation Laptop GPU | 2DB8 |
9991008
| NVIDIA RTX PRO 500 Blackwell Generation Laptop GPU | 2DB9 |
10001009
| NVIDIA GeForce RTX 5050 Laptop GPU | 2DD8 |
1010+
| NVIDIA RTX PRO 500 Blackwell Embedded GPU | 2DF9 |
10011011
| NVIDIA GeForce RTX 5070 | 2F04 |
10021012
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 |
10031013
| NVIDIA RTX PRO 3000 Blackwell Generation Laptop GPU | 2F38 |
10041014
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F58 |
1015+
| NVIDIA B300 SXM6 AC | 3182 10DE 20E6 |
1016+
| NVIDIA GB300 | 31C2 10DE 21F1 |

kernel-open/Kbuild

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ ccflags-y += -I$(src)/common/inc
7979
ccflags-y += -I$(src)
8080
ccflags-y += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
8181
ccflags-y += -D__KERNEL__ -DMODULE -DNVRM
82-
ccflags-y += -DNV_VERSION_STRING=\"580.94.06\"
82+
ccflags-y += -DNV_VERSION_STRING=\"580.94.10\"
8383

8484
# Include and link Tegra out-of-tree modules.
8585
ifneq ($(wildcard /usr/src/nvidia/nvidia-oot),)

kernel-open/common/inc/nvstatuscodes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ NV_STATUS_CODE(NV_ERR_FABRIC_STATE_OUT_OF_SYNC, 0x00000087, "NVLink fabri
165165
NV_STATUS_CODE(NV_ERR_BUFFER_FULL, 0x00000088, "Buffer is full")
166166
NV_STATUS_CODE(NV_ERR_BUFFER_EMPTY, 0x00000089, "Buffer is empty")
167167
NV_STATUS_CODE(NV_ERR_MC_FLA_OFFSET_TABLE_FULL, 0x0000008A, "Multicast FLA offset table has no available slots")
168+
NV_STATUS_CODE(NV_ERR_DMA_XFER_FAILED, 0x0000008B, "DMA transfer failed")
168169

169170
// Warnings:
170171
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")

kernel-open/common/inc/os-interface.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ struct os_work_queue;
6262
/* Each OS defines its own version of this opaque type */
6363
typedef struct os_wait_queue os_wait_queue;
6464

65+
/* Flags needed by os_get_current_proccess_flags */
66+
#define OS_CURRENT_PROCESS_FLAG_NONE 0x0
67+
#define OS_CURRENT_PROCESS_FLAG_KERNEL_THREAD 0x1
68+
#define OS_CURRENT_PROCESS_FLAG_EXITING 0x2
69+
6570
/*
6671
* ---------------------------------------------------------------------------
6772
*
@@ -194,6 +199,7 @@ NV_STATUS NV_API_CALL os_open_readonly_file (const char *, void **
194199
NV_STATUS NV_API_CALL os_open_and_read_file (const char *, NvU8 *, NvU64);
195200
NvBool NV_API_CALL os_is_nvswitch_present (void);
196201
NV_STATUS NV_API_CALL os_get_random_bytes (NvU8 *, NvU16);
202+
NvU32 NV_API_CALL os_get_current_process_flags (void);
197203
NV_STATUS NV_API_CALL os_alloc_wait_queue (os_wait_queue **);
198204
void NV_API_CALL os_free_wait_queue (os_wait_queue *);
199205
void NV_API_CALL os_wait_uninterruptible (os_wait_queue *);

kernel-open/nvidia-uvm/uvm_ampere_host.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,3 +461,29 @@ void uvm_hal_ampere_host_tlb_invalidate_test(uvm_push_t *push,
461461
if (params->membar == UvmInvalidateTlbMemBarLocal)
462462
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
463463
}
464+
465+
void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture)
466+
{
467+
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
468+
NvU32 aperture_value;
469+
470+
if (aperture == UVM_APERTURE_SYS) {
471+
aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_SYSMEM_INVALIDATE);
472+
}
473+
else if (uvm_aperture_is_peer(aperture)) {
474+
aperture_value = HWCONST(C56F, MEM_OP_D, OPERATION, L2_PEERMEM_INVALIDATE);
475+
}
476+
else {
477+
UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
478+
return;
479+
}
480+
481+
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
482+
483+
NV_PUSH_4U(C56F, MEM_OP_A, 0,
484+
MEM_OP_B, 0,
485+
MEM_OP_C, 0,
486+
MEM_OP_D, aperture_value);
487+
488+
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
489+
}

kernel-open/nvidia-uvm/uvm_blackwell_host.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -347,10 +347,23 @@ uvm_hal_blackwell_access_counter_query_clear_op_gb20x(uvm_parent_gpu_t *parent_g
347347
return UVM_ACCESS_COUNTER_CLEAR_OP_TARGETED;
348348
}
349349

350-
// Host-specific L2 cache invalidate for non-coherent sysmem
351-
void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push)
350+
void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture)
352351
{
353352
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
353+
NvU32 aperture_value;
354+
355+
if (!gpu->parent->is_integrated_gpu) {
356+
return uvm_hal_ampere_host_l2_invalidate(push, aperture);
357+
}
358+
359+
switch (aperture) {
360+
case UVM_APERTURE_SYS:
361+
aperture_value = HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE);
362+
break;
363+
default:
364+
UVM_ASSERT_MSG(false, "Invalid aperture_type %d\n", aperture);
365+
return;
366+
}
354367

355368
// First sysmembar
356369
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
@@ -363,7 +376,7 @@ void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push)
363376
NV_PUSH_4U(C96F, MEM_OP_A, 0,
364377
MEM_OP_B, 0,
365378
MEM_OP_C, 0,
366-
MEM_OP_D, HWCONST(C96F, MEM_OP_D, OPERATION, L2_SYSMEM_NCOH_INVALIDATE));
379+
MEM_OP_D, aperture_value);
367380
// Final sysmembar
368381
uvm_hal_membar(gpu, push, UVM_MEMBAR_SYS);
369382
}

kernel-open/nvidia-uvm/uvm_hal.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ static uvm_hal_class_ops_t host_table[] =
221221
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
222222
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
223223
.access_counter_query_clear_op = uvm_hal_maxwell_access_counter_query_clear_op_unsupported,
224-
.l2_invalidate_noncoh_sysmem = uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported,
224+
.l2_invalidate = uvm_hal_host_l2_invalidate_unsupported,
225225
.get_time = uvm_hal_maxwell_get_time,
226226
}
227227
},
@@ -287,6 +287,7 @@ static uvm_hal_class_ops_t host_table[] =
287287
.tlb_invalidate_all = uvm_hal_ampere_host_tlb_invalidate_all,
288288
.tlb_invalidate_va = uvm_hal_ampere_host_tlb_invalidate_va,
289289
.tlb_invalidate_test = uvm_hal_ampere_host_tlb_invalidate_test,
290+
.l2_invalidate = uvm_hal_ampere_host_l2_invalidate,
290291
}
291292
},
292293
{
@@ -315,8 +316,8 @@ static uvm_hal_class_ops_t host_table[] =
315316
.tlb_invalidate_phys = uvm_hal_blackwell_host_tlb_invalidate_phys,
316317
.tlb_invalidate_test = uvm_hal_blackwell_host_tlb_invalidate_test,
317318
.tlb_flush_prefetch = uvm_hal_blackwell_host_tlb_flush_prefetch,
318-
.l2_invalidate_noncoh_sysmem = uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem,
319319
.access_counter_query_clear_op = uvm_hal_blackwell_access_counter_query_clear_op_gb100,
320+
.l2_invalidate = uvm_hal_blackwell_host_l2_invalidate,
320321
}
321322
},
322323
{
@@ -1162,10 +1163,11 @@ void uvm_hal_ce_memcopy_patch_src_stub(uvm_push_t *push, uvm_gpu_address_t *src)
11621163
{
11631164
}
11641165

1165-
void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push)
1166+
void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture)
11661167
{
11671168
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
11681169
UVM_ERR_PRINT("L2 cache invalidation: Called on unsupported GPU %s (arch: 0x%x, impl: 0x%x)\n",
11691170
uvm_gpu_name(gpu), gpu->parent->rm_info.gpuArch, gpu->parent->rm_info.gpuImplementation);
1170-
UVM_ASSERT_MSG(false, "host l2_invalidate_noncoh_sysmem called on unsupported GPU\n");
1171+
UVM_ASSERT_MSG(false, "L2 invalidate is not supported on %s",
1172+
uvm_parent_gpu_name(gpu->parent));
11711173
}

kernel-open/nvidia-uvm/uvm_hal.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,11 +248,12 @@ typedef void (*uvm_hal_host_tlb_flush_prefetch_t)(uvm_push_t *push);
248248
void uvm_hal_maxwell_host_tlb_flush_prefetch_unsupported(uvm_push_t *push);
249249
void uvm_hal_blackwell_host_tlb_flush_prefetch(uvm_push_t *push);
250250

251-
// L2 cache invalidate for non-coherent sysmem for systems with write back cache.
252-
// These are iGPUs as of now.
253-
typedef void (*uvm_hal_host_l2_invalidate_noncoh_sysmem_t)(uvm_push_t *push);
254-
void uvm_hal_blackwell_host_l2_invalidate_noncoh_sysmem(uvm_push_t *push);
255-
void uvm_hal_host_l2_invalidate_noncoh_sysmem_unsupported(uvm_push_t *push);
251+
// Performs L2 cache invalidation for peer or system memory.
252+
typedef void (*uvm_hal_host_l2_invalidate_t)(uvm_push_t *push, uvm_aperture_t aperture);
253+
void uvm_hal_blackwell_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture);
254+
255+
void uvm_hal_ampere_host_l2_invalidate(uvm_push_t *push, uvm_aperture_t aperture);
256+
void uvm_hal_host_l2_invalidate_unsupported(uvm_push_t *push, uvm_aperture_t aperture);
256257

257258
// By default all semaphore release operations include a membar sys before the
258259
// operation. This can be affected by using UVM_PUSH_FLAG_NEXT_* flags with
@@ -822,7 +823,7 @@ struct uvm_host_hal_struct
822823
uvm_hal_host_tlb_invalidate_phys_t tlb_invalidate_phys;
823824
uvm_hal_host_tlb_invalidate_test_t tlb_invalidate_test;
824825
uvm_hal_host_tlb_flush_prefetch_t tlb_flush_prefetch;
825-
uvm_hal_host_l2_invalidate_noncoh_sysmem_t l2_invalidate_noncoh_sysmem;
826+
uvm_hal_host_l2_invalidate_t l2_invalidate;
826827
uvm_hal_fault_buffer_replay_t replay_faults;
827828
uvm_hal_fault_cancel_global_t cancel_faults_global;
828829
uvm_hal_fault_cancel_targeted_t cancel_faults_targeted;

kernel-open/nvidia-uvm/uvm_map_external.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,11 +1276,20 @@ void uvm_ext_gpu_map_destroy(uvm_va_range_external_t *external_range,
12761276

12771277
range_tree = uvm_ext_gpu_range_tree(external_range, mapped_gpu);
12781278

1279-
// Perform L2 cache invalidation for noncoherent sysmem mappings.
1280-
// This is done only on systems with write-back cache which is iGPUs as of now.
1279+
// Perform L2 cache invalidation for cached peer and sysmem mappings.
12811280
if (ext_gpu_map->need_l2_invalidate_at_unmap) {
1282-
UVM_ASSERT(ext_gpu_map->gpu->parent->is_integrated_gpu);
1283-
status = uvm_mmu_l2_invalidate_noncoh_sysmem(mapped_gpu);
1281+
uvm_aperture_t aperture;
1282+
1283+
// Peer cache invalidation is not targeted to a specific peer, so we
1284+
// just use UVM_APERTURE_PEER(0).
1285+
if (ext_gpu_map->is_egm)
1286+
aperture = UVM_APERTURE_PEER(0);
1287+
else if (ext_gpu_map->is_sysmem)
1288+
aperture = UVM_APERTURE_SYS;
1289+
else
1290+
aperture = UVM_APERTURE_PEER(0);
1291+
1292+
status = uvm_mmu_l2_invalidate(mapped_gpu, aperture);
12841293
UVM_ASSERT(status == NV_OK);
12851294
}
12861295

kernel-open/nvidia-uvm/uvm_mmu.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2974,25 +2974,21 @@ NV_STATUS uvm_mmu_tlb_invalidate_phys(uvm_gpu_t *gpu)
29742974
return uvm_push_end_and_wait(&push);
29752975
}
29762976

2977-
NV_STATUS uvm_mmu_l2_invalidate_noncoh_sysmem(uvm_gpu_t *gpu)
2977+
NV_STATUS uvm_mmu_l2_invalidate(uvm_gpu_t *gpu, uvm_aperture_t aperture)
29782978
{
29792979
uvm_push_t push;
29802980
NV_STATUS status;
29812981

2982-
// L2 cache invalidation is only done for systems with write-back
2983-
// cache which is iGPUs as of now.
2984-
UVM_ASSERT(gpu->parent->is_integrated_gpu);
2985-
29862982
status = uvm_push_begin(gpu->channel_manager,
29872983
UVM_CHANNEL_TYPE_MEMOPS,
29882984
&push,
2989-
"L2 cache invalidate for sysmem");
2985+
"L2 cache invalidate");
29902986
if (status != NV_OK) {
29912987
UVM_ERR_PRINT("L2 cache invalidation: Failed to begin push, status: %s\n", nvstatusToString(status));
29922988
return status;
29932989
}
29942990

2995-
gpu->parent->host_hal->l2_invalidate_noncoh_sysmem(&push);
2991+
gpu->parent->host_hal->l2_invalidate(&push, aperture);
29962992

29972993
status = uvm_push_end_and_wait(&push);
29982994
if (status != NV_OK)

0 commit comments

Comments
 (0)