Skip to content

Commit aec514d

Browse files
authored
fix(cudart): recreate CUDA runtime bindings based on CUDA 13.0 (#106)
# What ❔ This PR recreates the CUDA runtime bindings based on CUDA toolkit version 13.0. ## Why ❔ The bindings based on CUDA toolkit version 12.x used `cudaGetDeviceProperties_v2` function which was removed in version 13, instead `cudaGetDeviceProperties` is used. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [ ] Tests for the changes have been added / updated. - [ ] Documentation comments have been added / updated. - [x] Code has been formatted via `zk fmt` and `zk lint`.
1 parent b925169 commit aec514d

File tree

11 files changed

+65
-45
lines changed

11 files changed

+65
-45
lines changed

.github/workflows/ci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
runs-on: [ubuntu-24.04-github-hosted-32core]
1313
strategy:
1414
matrix:
15-
cuda: [ "12.0.0-devel-ubuntu20.04", "12.5.0-devel-ubuntu20.04" ]
15+
cuda: [ "12.0.0-devel-ubuntu20.04", "13.0.0-devel-ubuntu24.04" ]
1616
container:
1717
image: nvidia/cuda:${{ matrix.cuda }}
1818
env:

.github/workflows/publish-crates.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
publish-crates:
1515
runs-on: [ubuntu-24.04-github-hosted-32core]
1616
container:
17-
image: nvidia/cuda:12.5.0-devel-ubuntu20.04
17+
image: nvidia/cuda:13.0.0-devel-ubuntu24.04
1818
env:
1919
BELLMAN_CUDA_DIR: ${{ github.workspace }}/bellman-cuda
2020
CUDAARCHS: 89

.github/workflows/release-please.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
process-release:
3535
runs-on: [ubuntu-24.04-github-hosted-32core]
3636
container:
37-
image: nvidia/cuda:12.5.0-devel-ubuntu20.04
37+
image: nvidia/cuda:13.0.0-devel-ubuntu24.04
3838
env:
3939
BELLMAN_CUDA_DIR: ${{ github.workspace }}/bellman-cuda
4040
CUDAARCHS: 89

.github/workflows/test-gpu.yaml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
runs-on: [ ubuntu-latest ]
1414
strategy:
1515
matrix:
16-
cuda: [ "12.0.0-devel-ubuntu20.04", "12.5.0-devel-ubuntu20.04" ]
16+
cuda: [ "12.0.0-devel-ubuntu20.04", "13.0.0-devel-ubuntu24.04" ]
1717
# TODO: Building the whole workspace with `--test` currently fails with link-time errors,
1818
# presumably due to either `gpu-ffi` or `gpu-prover` crates.
1919
# So for now we point at specific packages we want to test.
@@ -125,21 +125,21 @@ jobs:
125125
name: zksync-crypto-gpu-12.0.0-devel-ubuntu20.04-${{ matrix.package }}-test-binary
126126
path: zksync-crypto-gpu-test-binary/12.0/
127127

128-
- name: Download test binary built with CUDA 12.5
128+
- name: Download test binary built with CUDA 13.0
129129
uses: actions/download-artifact@v4
130130
with:
131-
name: zksync-crypto-gpu-12.5.0-devel-ubuntu20.04-${{ matrix.package }}-test-binary
132-
path: zksync-crypto-gpu-test-binary/12.5/
131+
name: zksync-crypto-gpu-13.0.0-devel-ubuntu24.04-${{ matrix.package }}-test-binary
132+
path: zksync-crypto-gpu-test-binary/13.0/
133133

134-
- name: Run test binary built with CUDA 12.5
135-
id: test_cuda_12_5
134+
- name: Run test binary built with CUDA 13.0
135+
id: test_cuda_13_0
136136
continue-on-error: true
137137
run: |
138-
chmod +x zksync-crypto-gpu-test-binary/12.5/${{ matrix.package }}
139-
zksync-crypto-gpu-test-binary/12.5/${{ matrix.package }}
138+
chmod +x zksync-crypto-gpu-test-binary/13.0/${{ matrix.package }}
139+
zksync-crypto-gpu-test-binary/13.0/${{ matrix.package }}
140140
141141
- name: Run test binary built with CUDA 12.0
142-
if: steps.test_cuda_12_5.outcome == 'failure' || steps.test_cuda_12_5.outcome == 'success'
142+
if: steps.test_cuda_13_0.outcome == 'failure' || steps.test_cuda_13_0.outcome == 'success'
143143
run: |
144144
chmod +x zksync-crypto-gpu-test-binary/12.0/${{ matrix.package }}
145145
zksync-crypto-gpu-test-binary/12.0/${{ matrix.package }}

crates/boojum-cuda/build/main.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ fn main() {
1919
use era_cudart_sys::{get_cuda_lib_path, get_cuda_version};
2020
let cuda_version =
2121
get_cuda_version().expect("Failed to determine the CUDA Toolkit version.");
22-
if !cuda_version.starts_with("12.") {
23-
println!("cargo::warning=CUDA Toolkit version {cuda_version} detected. This crate is only tested with CUDA Toolkit 12.*.");
22+
if !(cuda_version.starts_with("12.") || cuda_version.starts_with("13.")) {
23+
println!("cargo::warning=CUDA Toolkit version {cuda_version} detected. This crate is only tested with CUDA Toolkit versions 12.* and 13.*.");
2424
}
2525
let cudaarchs = std::env::var("CUDAARCHS").unwrap_or("native".to_string());
2626
let dst = cmake::Config::new("native")

crates/cudart-sys-bindings-generator/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ description = "CUDA Bindings generator for ZKsync"
1212
publish = false
1313

1414
[dependencies]
15-
bindgen = "0.69"
15+
bindgen = "0.72"
1616
era_cudart_sys.workspace = true

crates/cudart-sys-bindings-generator/src/main.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use bindgen::callbacks::{EnumVariantValue, ParseCallbacks};
1+
use bindgen::callbacks::{EnumVariantValue, ItemInfo, ParseCallbacks};
22
use bindgen::{BindgenError, Bindings};
33
use era_cudart_sys::get_cuda_include_path;
44

@@ -47,9 +47,9 @@ impl ParseCallbacks for CudaParseCallbacks {
4747
}
4848
}
4949

50-
fn item_name(&self, _original_item_name: &str) -> Option<String> {
50+
fn item_name(&self, item_info: ItemInfo) -> Option<String> {
5151
let from = |s: &str| Some(String::from(s));
52-
match _original_item_name {
52+
match item_info.name {
5353
"cudaDeviceAttr" => from("CudaDeviceAttr"),
5454
"cudaLimit" => from("CudaLimit"),
5555
"cudaError" => from("CudaError"),
@@ -106,7 +106,7 @@ fn generate_bindings<T: Into<String>>(header: T) -> Result<Bindings, BindgenErro
106106
.allowlist_function("cudaDeviceSynchronize")
107107
.allowlist_function("cudaGetDevice")
108108
.allowlist_function("cudaGetDeviceCount")
109-
.allowlist_function("cudaGetDeviceProperties_v2")
109+
.allowlist_function("cudaGetDeviceProperties")
110110
.allowlist_function("cudaSetDevice")
111111
// error handling
112112
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__ERROR.html

crates/cudart-sys/build.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ fn main() {
1010
} else {
1111
let cuda_version =
1212
get_cuda_version().expect("Failed to determine the CUDA Toolkit version.");
13-
if !cuda_version.starts_with("12.") {
14-
println!("cargo::warning=CUDA Toolkit version {cuda_version} detected. This crate is only tested with CUDA Toolkit version 12.*.");
13+
if !(cuda_version.starts_with("12.") || cuda_version.starts_with("13.")) {
14+
println!("cargo::warning=CUDA Toolkit version {cuda_version} detected. This crate is only tested with CUDA Toolkit versions 12.* and 13.*.");
1515
}
1616
let cuda_lib_path = get_cuda_lib_path().unwrap();
1717
let cuda_lib_path_str = cuda_lib_path.to_str().unwrap();

crates/cudart-sys/src/bindings.rs

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ pub enum CudaError {
100100
ErrorJitCompilationDisabled = 223,
101101
ErrorUnsupportedExecAffinity = 224,
102102
ErrorUnsupportedDevSideSync = 225,
103+
ErrorContained = 226,
103104
ErrorInvalidSource = 300,
104105
ErrorFileNotFound = 301,
105106
ErrorSharedObjectSymbolNotFound = 302,
@@ -129,6 +130,7 @@ pub enum CudaError {
129130
ErrorInvalidPc = 718,
130131
ErrorLaunchFailure = 719,
131132
ErrorCooperativeLaunchTooLarge = 720,
133+
ErrorTensorMemoryLeak = 721,
132134
ErrorNotPermitted = 800,
133135
ErrorNotSupported = 801,
134136
ErrorSystemNotReady = 802,
@@ -218,6 +220,7 @@ pub struct CudaPointerAttributes {
218220
pub device: ::std::os::raw::c_int,
219221
pub devicePointer: *mut ::std::os::raw::c_void,
220222
pub hostPointer: *mut ::std::os::raw::c_void,
223+
pub reserved: [::std::os::raw::c_long; 8usize],
221224
}
222225
#[repr(C)]
223226
#[derive(Debug, Copy, Clone)]
@@ -279,10 +282,6 @@ pub enum CudaLimit {
279282
MaxL2FetchGranularity = 5,
280283
PersistingL2CacheSize = 6,
281284
}
282-
impl CudaDeviceAttr {
283-
pub const MaxTimelineSemaphoreInteropSupported: CudaDeviceAttr =
284-
CudaDeviceAttr::TimelineSemaphoreInteropSupported;
285-
}
286285
#[repr(u32)]
287286
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
288287
pub enum CudaDeviceAttr {
@@ -380,7 +379,7 @@ pub enum CudaDeviceAttr {
380379
Reserved93 = 93,
381380
Reserved94 = 94,
382381
CooperativeLaunch = 95,
383-
CooperativeMultiDeviceLaunch = 96,
382+
Reserved96 = 96,
384383
MaxSharedMemoryPerBlockOptin = 97,
385384
CanFlushRemoteWrites = 98,
386385
HostRegisterSupported = 99,
@@ -414,7 +413,16 @@ pub enum CudaDeviceAttr {
414413
MpsEnabled = 133,
415414
HostNumaId = 134,
416415
D3D12CigSupported = 135,
417-
Max = 136,
416+
VulkanCigSupported = 138,
417+
GpuPciDeviceId = 139,
418+
GpuPciSubsystemId = 140,
419+
Reserved141 = 141,
420+
HostNumaMemoryPoolsSupported = 142,
421+
HostNumaMultinodeIpcSupported = 143,
422+
HostMemoryPoolsSupported = 144,
423+
Reserved145 = 145,
424+
OnlyPartialHostNativeAtomicSupported = 147,
425+
Max = 148,
418426
}
419427
#[repr(u32)]
420428
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
@@ -428,6 +436,9 @@ pub enum CudaMemPoolAttribute {
428436
AttrUsedMemCurrent = 7,
429437
AttrUsedMemHigh = 8,
430438
}
439+
impl CudaMemLocationType {
440+
pub const None: CudaMemLocationType = CudaMemLocationType::Invalid;
441+
}
431442
#[repr(u32)]
432443
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
433444
pub enum CudaMemLocationType {
@@ -461,6 +472,7 @@ pub struct CudaMemAccessDesc {
461472
pub enum CudaMemAllocationType {
462473
Invalid = 0,
463474
Pinned = 1,
475+
Managed = 2,
464476
Max = 2147483647,
465477
}
466478
#[repr(u32)]
@@ -504,21 +516,16 @@ pub struct CudaDeviceProperties {
504516
pub maxThreadsPerBlock: ::std::os::raw::c_int,
505517
pub maxThreadsDim: [::std::os::raw::c_int; 3usize],
506518
pub maxGridSize: [::std::os::raw::c_int; 3usize],
507-
pub clockRate: ::std::os::raw::c_int,
508519
pub totalConstMem: usize,
509520
pub major: ::std::os::raw::c_int,
510521
pub minor: ::std::os::raw::c_int,
511522
pub textureAlignment: usize,
512523
pub texturePitchAlignment: usize,
513-
pub deviceOverlap: ::std::os::raw::c_int,
514524
pub multiProcessorCount: ::std::os::raw::c_int,
515-
pub kernelExecTimeoutEnabled: ::std::os::raw::c_int,
516525
pub integrated: ::std::os::raw::c_int,
517526
pub canMapHostMemory: ::std::os::raw::c_int,
518-
pub computeMode: ::std::os::raw::c_int,
519527
pub maxTexture1D: ::std::os::raw::c_int,
520528
pub maxTexture1DMipmap: ::std::os::raw::c_int,
521-
pub maxTexture1DLinear: ::std::os::raw::c_int,
522529
pub maxTexture2D: [::std::os::raw::c_int; 2usize],
523530
pub maxTexture2DMipmap: [::std::os::raw::c_int; 2usize],
524531
pub maxTexture2DLinear: [::std::os::raw::c_int; 3usize],
@@ -545,7 +552,6 @@ pub struct CudaDeviceProperties {
545552
pub tccDriver: ::std::os::raw::c_int,
546553
pub asyncEngineCount: ::std::os::raw::c_int,
547554
pub unifiedAddressing: ::std::os::raw::c_int,
548-
pub memoryClockRate: ::std::os::raw::c_int,
549555
pub memoryBusWidth: ::std::os::raw::c_int,
550556
pub l2CacheSize: ::std::os::raw::c_int,
551557
pub persistingL2CacheMaxSize: ::std::os::raw::c_int,
@@ -559,13 +565,11 @@ pub struct CudaDeviceProperties {
559565
pub isMultiGpuBoard: ::std::os::raw::c_int,
560566
pub multiGpuBoardGroupID: ::std::os::raw::c_int,
561567
pub hostNativeAtomicSupported: ::std::os::raw::c_int,
562-
pub singleToDoublePrecisionPerfRatio: ::std::os::raw::c_int,
563568
pub pageableMemoryAccess: ::std::os::raw::c_int,
564569
pub concurrentManagedAccess: ::std::os::raw::c_int,
565570
pub computePreemptionSupported: ::std::os::raw::c_int,
566571
pub canUseHostPointerForRegisteredMem: ::std::os::raw::c_int,
567572
pub cooperativeLaunch: ::std::os::raw::c_int,
568-
pub cooperativeMultiDeviceLaunch: ::std::os::raw::c_int,
569573
pub sharedMemPerBlockOptin: usize,
570574
pub pageableMemoryAccessUsesHostPageTables: ::std::os::raw::c_int,
571575
pub directManagedMemAccessFromHost: ::std::os::raw::c_int,
@@ -585,9 +589,14 @@ pub struct CudaDeviceProperties {
585589
pub ipcEventSupported: ::std::os::raw::c_int,
586590
pub clusterLaunch: ::std::os::raw::c_int,
587591
pub unifiedFunctionPointers: ::std::os::raw::c_int,
588-
pub reserved2: [::std::os::raw::c_int; 2usize],
589-
pub reserved1: [::std::os::raw::c_int; 1usize],
590-
pub reserved: [::std::os::raw::c_int; 60usize],
592+
pub deviceNumaConfig: ::std::os::raw::c_int,
593+
pub deviceNumaId: ::std::os::raw::c_int,
594+
pub mpsEnabled: ::std::os::raw::c_int,
595+
pub hostNumaId: ::std::os::raw::c_int,
596+
pub gpuPciDeviceID: ::std::os::raw::c_uint,
597+
pub gpuPciSubsystemID: ::std::os::raw::c_uint,
598+
pub hostNumaMultinodeIpcSupported: ::std::os::raw::c_int,
599+
pub reserved: [::std::os::raw::c_int; 56usize],
591600
}
592601
pub use self::CudaError as cudaError_t;
593602
#[repr(C)]
@@ -641,9 +650,11 @@ pub enum CudaLaunchAttributeID {
641650
Priority = 8,
642651
MemSyncDomainMap = 9,
643652
MemSyncDomain = 10,
653+
PreferredClusterDimension = 11,
644654
LaunchCompletionEvent = 12,
645655
DeviceUpdatableKernelNode = 13,
646656
PreferredSharedMemoryCarveout = 14,
657+
NvlinkUtilCentricScheduling = 16,
647658
}
648659
#[repr(C)]
649660
#[derive(Copy, Clone)]
@@ -659,9 +670,11 @@ pub union CudaLaunchAttributeValue {
659670
pub priority: ::std::os::raw::c_int,
660671
pub memSyncDomainMap: cudaLaunchMemSyncDomainMap,
661672
pub memSyncDomain: CudaLaunchMemSyncDomain,
662-
pub launchCompletionEvent: cudaLaunchAttributeValue__bindgen_ty_3,
663-
pub deviceUpdatableKernelNode: cudaLaunchAttributeValue__bindgen_ty_4,
673+
pub preferredClusterDim: cudaLaunchAttributeValue__bindgen_ty_3,
674+
pub launchCompletionEvent: cudaLaunchAttributeValue__bindgen_ty_4,
675+
pub deviceUpdatableKernelNode: cudaLaunchAttributeValue__bindgen_ty_5,
664676
pub sharedMemCarveout: ::std::os::raw::c_uint,
677+
pub nvlinkUtilCentricScheduling: ::std::os::raw::c_uint,
665678
}
666679
#[repr(C)]
667680
#[derive(Debug, Copy, Clone)]
@@ -680,12 +693,19 @@ pub struct cudaLaunchAttributeValue__bindgen_ty_2 {
680693
#[repr(C)]
681694
#[derive(Debug, Copy, Clone)]
682695
pub struct cudaLaunchAttributeValue__bindgen_ty_3 {
696+
pub x: ::std::os::raw::c_uint,
697+
pub y: ::std::os::raw::c_uint,
698+
pub z: ::std::os::raw::c_uint,
699+
}
700+
#[repr(C)]
701+
#[derive(Debug, Copy, Clone)]
702+
pub struct cudaLaunchAttributeValue__bindgen_ty_4 {
683703
pub event: cudaEvent_t,
684704
pub flags: ::std::os::raw::c_int,
685705
}
686706
#[repr(C)]
687707
#[derive(Debug, Copy, Clone)]
688-
pub struct cudaLaunchAttributeValue__bindgen_ty_4 {
708+
pub struct cudaLaunchAttributeValue__bindgen_ty_5 {
689709
pub deviceUpdatable: ::std::os::raw::c_int,
690710
pub devNode: cudaGraphDeviceNode_t,
691711
}
@@ -733,7 +753,7 @@ cuda_fn_and_stub! {
733753
pub fn cudaGetDeviceCount(count: *mut ::std::os::raw::c_int) -> cudaError_t;
734754
}
735755
cuda_fn_and_stub! {
736-
pub fn cudaGetDeviceProperties_v2(
756+
pub fn cudaGetDeviceProperties(
737757
prop: *mut CudaDeviceProperties,
738758
device: ::std::os::raw::c_int,
739759
) -> cudaError_t;

crates/cudart/src/device.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pub fn get_device() -> CudaResult<i32> {
3333

3434
pub fn get_device_properties(device_id: i32) -> CudaResult<CudaDeviceProperties> {
3535
let mut props = MaybeUninit::<CudaDeviceProperties>::uninit();
36-
unsafe { cudaGetDeviceProperties_v2(props.as_mut_ptr(), device_id).wrap_maybe_uninit(props) }
36+
unsafe { cudaGetDeviceProperties(props.as_mut_ptr(), device_id).wrap_maybe_uninit(props) }
3737
}
3838

3939
pub fn set_device(device_id: i32) -> CudaResult<()> {

0 commit comments

Comments
 (0)