Skip to content

Commit 2cc9285

Browse files
authored
Update subpackages. (#2776)
1 parent 64f4a23 commit 2cc9285

File tree

17 files changed

+1287
-639
lines changed

17 files changed

+1287
-639
lines changed

lib/cublas/libcublas.jl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3046,6 +3046,7 @@ end
30463046
CUBLAS_TENSOR_OP_MATH = 1
30473047
CUBLAS_PEDANTIC_MATH = 2
30483048
CUBLAS_TF32_TENSOR_OP_MATH = 3
3049+
CUBLAS_FP32_EMULATED_BF16X9_MATH = 4
30493050
CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION = 16
30503051
end
30513052

@@ -3059,12 +3060,19 @@ const cublasDataType_t = cudaDataType
30593060
CUBLAS_COMPUTE_32F_FAST_16F = 74
30603061
CUBLAS_COMPUTE_32F_FAST_16BF = 75
30613062
CUBLAS_COMPUTE_32F_FAST_TF32 = 77
3063+
CUBLAS_COMPUTE_32F_EMULATED_16BFX9 = 78
30623064
CUBLAS_COMPUTE_64F = 70
30633065
CUBLAS_COMPUTE_64F_PEDANTIC = 71
30643066
CUBLAS_COMPUTE_32I = 72
30653067
CUBLAS_COMPUTE_32I_PEDANTIC = 73
30663068
end
30673069

3070+
@cenum cublasEmulationStrategy_t::UInt32 begin
3071+
CUBLAS_EMULATION_STRATEGY_DEFAULT = 0
3072+
CUBLAS_EMULATION_STRATEGY_PERFORMANT = 1
3073+
CUBLAS_EMULATION_STRATEGY_EAGER = 2
3074+
end
3075+
30683076
# typedef void ( * cublasLogCallback ) ( const char * msg )
30693077
const cublasLogCallback = Ptr{Cvoid}
30703078

@@ -3113,6 +3121,18 @@ end
31133121
smCountTarget::Cint)::cublasStatus_t
31143122
end
31153123

3124+
@checked function cublasGetEmulationStrategy(handle, emulationStrategy)
3125+
initialize_context()
3126+
@gcsafe_ccall libcublas.cublasGetEmulationStrategy(handle::cublasHandle_t,
3127+
emulationStrategy::Ptr{cublasEmulationStrategy_t})::cublasStatus_t
3128+
end
3129+
3130+
@checked function cublasSetEmulationStrategy(handle, emulationStrategy)
3131+
initialize_context()
3132+
@gcsafe_ccall libcublas.cublasSetEmulationStrategy(handle::cublasHandle_t,
3133+
emulationStrategy::cublasEmulationStrategy_t)::cublasStatus_t
3134+
end
3135+
31163136
function cublasGetStatusName(status)
31173137
initialize_context()
31183138
@gcsafe_ccall libcublas.cublasGetStatusName(status::cublasStatus_t)::Cstring

lib/cublas/libcublasLt.jl

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,10 @@ end
833833
CUBLASLT_MATMUL_MATRIX_SCALE_SCALAR_32F = 0
834834
CUBLASLT_MATMUL_MATRIX_SCALE_VEC16_UE4M3 = 1
835835
CUBLASLT_MATMUL_MATRIX_SCALE_VEC32_UE8M0 = 2
836-
CUBLASLT_MATMUL_MATRIX_SCALE_END = 3
836+
CUBLASLT_MATMUL_MATRIX_SCALE_OUTER_VEC_32F = 3
837+
CUBLASLT_MATMUL_MATRIX_SCALE_VEC128_32F = 4
838+
CUBLASLT_MATMUL_MATRIX_SCALE_BLK128x128_32F = 5
839+
CUBLASLT_MATMUL_MATRIX_SCALE_END = 6
837840
end
838841

839842
@cenum cublasLtPointerMode_t::UInt32 begin
@@ -896,6 +899,11 @@ end
896899
CUBLASLT_ORDER_COL32_2R_4R4 = 4
897900
end
898901

902+
@cenum cublasLtBatchMode_t::UInt32 begin
903+
CUBLASLT_BATCH_MODE_STRIDED = 0
904+
CUBLASLT_BATCH_MODE_POINTER_ARRAY = 1
905+
end
906+
899907
@cenum cublasLtMatrixLayoutAttribute_t::UInt32 begin
900908
CUBLASLT_MATRIX_LAYOUT_TYPE = 0
901909
CUBLASLT_MATRIX_LAYOUT_ORDER = 1
@@ -905,6 +913,7 @@ end
905913
CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT = 5
906914
CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET = 6
907915
CUBLASLT_MATRIX_LAYOUT_PLANE_OFFSET = 7
916+
CUBLASLT_MATRIX_LAYOUT_BATCH_MODE = 8
908917
end
909918

910919
@checked function cublasLtMatrixLayoutInit_internal(matLayout, size, type, rows, cols, ld)
@@ -1267,6 +1276,8 @@ end
12671276
CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_C_BYTES = 18
12681277
CUBLASLT_ALGO_CAP_MIN_ALIGNMENT_D_BYTES = 19
12691278
CUBLASLT_ALGO_CAP_ATOMIC_SYNC = 20
1279+
CUBLASLT_ALGO_CAP_POINTER_ARRAY_BATCH_SUPPORT = 21
1280+
CUBLASLT_ALGO_CAP_FLOATING_POINT_EMULATION_SUPPORT = 22
12701281
end
12711282

12721283
@checked function cublasLtMatmulAlgoCapGetAttribute(algo, attr, buf, sizeInBytes,
@@ -1337,7 +1348,7 @@ end
13371348
@gcsafe_ccall libcublasLt.cublasLtLoggerSetMask(mask::Cint)::cublasStatus_t
13381349
end
13391350

1340-
# no prototype is found for this function at cublasLt.h:2507:29, please use with caution
1351+
# no prototype is found for this function at cublasLt.h:2550:29, please use with caution
13411352
@checked function cublasLtLoggerForceDisable()
13421353
initialize_context()
13431354
@gcsafe_ccall libcublasLt.cublasLtLoggerForceDisable()::cublasStatus_t

lib/cudadrv/libcuda.jl

Lines changed: 324 additions & 252 deletions
Large diffs are not rendered by default.

lib/cudnn/Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "cuDNN"
22
uuid = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"
33
authors = ["Tim Besard <[email protected]>"]
4-
version = "1.4.2"
4+
version = "1.4.3"
55

66
[deps]
77
CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82"
@@ -13,5 +13,5 @@ CUDNN_jll = "62b44479-cb7b-5706-934f-f13b2eb2e645"
1313
CEnum = "0.2, 0.3, 0.4, 0.5"
1414
CUDA = "~5.7"
1515
CUDA_Runtime_Discovery = "0.2, 0.3"
16-
CUDNN_jll = "~9.4"
16+
CUDNN_jll = "9.10"
1717
julia = "1.10"

lib/cudnn/src/libcudnn.jl

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ end
382382
CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201
383383
CUDNN_ATTR_ENGINEHEUR_RESULTS = 202
384384
CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET = 203
385+
CUDNN_ATTR_ENGINEHEUR_DEVICEPROP = 204
385386
CUDNN_ATTR_ENGINECFG_ENGINE = 300
386387
CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301
387388
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302
@@ -394,6 +395,7 @@ end
394395
CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404
395396
CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405
396397
CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE = 406
398+
CUDNN_ATTR_EXECUTION_PLAN_DEVICEPROP = 407
397399
CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500
398400
CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501
399401
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502
@@ -482,6 +484,7 @@ end
482484
CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304
483485
CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE = 1305
484486
CUDNN_ATTR_ENGINE_SM_COUNT_TARGET = 1306
487+
CUDNN_ATTR_ENGINE_DEVICEPROP = 1307
485488
CUDNN_ATTR_MATMUL_COMP_TYPE = 1500
486489
CUDNN_ATTR_MATMUL_PADDING_VALUE = 1503
487490
CUDNN_ATTR_OPERATION_MATMUL_ADESC = 1520
@@ -572,6 +575,20 @@ end
572575
CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS = 2110
573576
CUDNN_ATTR_OPERATION_RESHAPE_XDESC = 2200
574577
CUDNN_ATTR_OPERATION_RESHAPE_YDESC = 2201
578+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_XDESC = 2250
579+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_YDESC = 2251
580+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_LOWER_BANDWIDTH = 2252
581+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_UPPER_BANDWIDTH = 2253
582+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_AXIS = 2254
583+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_PAD_VALUE = 2255
584+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_KV_TOKEN_OFFSET_DESC = 2256
585+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_XDESC = 2270
586+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_YDESC = 2271
587+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_LOWER_BANDWIDTH = 2272
588+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_UPPER_BANDWIDTH = 2273
589+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_AXIS = 2274
590+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_PAD_VALUE = 2275
591+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MAX_TOKEN_VALUE = 2276
575592
CUDNN_ATTR_RNG_DISTRIBUTION = 2300
576593
CUDNN_ATTR_RNG_NORMAL_DIST_MEAN = 2301
577594
CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION = 2302
@@ -584,17 +601,20 @@ end
584601
CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313
585602
CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH = 2400
586603
CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401
604+
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION = 2402
587605
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_XDESC = 2500
588606
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_YDESC = 2501
589607
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_SCALE_DESC = 2502
590608
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_MATH_PREC = 2503
591609
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_BLOCK_SIZE = 2504
592-
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_DENOM_FACTOR_MODE = 2505
593610
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_XDESC = 2600
594611
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_SCALE_DESC = 2601
595612
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_YDESC = 2602
596613
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_MATH_PREC = 2603
597614
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_BLOCK_SIZE = 2604
615+
CUDNN_ATTR_DEVICEPROP_DEVICE_ID = 2700
616+
CUDNN_ATTR_DEVICEPROP_HANDLE = 2701
617+
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION = 2702
598618
end
599619

600620
@cenum cudnnBackendAttributeType_t::UInt32 begin
@@ -667,8 +687,11 @@ end
667687
CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33
668688
CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34
669689
CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35
670-
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_QUANTIZE_DESCRIPTOR = 36
671-
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_DEQUANTIZE_DESCRIPTOR = 37
690+
CUDNN_BACKEND_OPERATION_EXPAND_BAND_MATRIX_DESCRIPTOR = 36
691+
CUDNN_BACKEND_OPERATION_CONTRACT_BAND_MATRIX_DESCRIPTOR = 37
692+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_QUANTIZE_DESCRIPTOR = 38
693+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_DEQUANTIZE_DESCRIPTOR = 39
694+
CUDNN_BACKEND_DEVICEPROP_DESCRIPTOR = 40
672695
end
673696

674697
@cenum cudnnBackendNumericalNote_t::UInt32 begin

0 commit comments

Comments
 (0)