[CUBLAS] Update wrapppers to use the ILP64 API #2845

amontoison · 2025-08-12T20:54:53Z

I checked the symbols with nm -D .../libcusolver.so and it seems that they are in the library.

github-actions · 2025-08-12T21:57:45Z

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.

diff --git a/lib/cublas/wrappers.jl b/lib/cublas/wrappers.jl
index 07c584860..510d0fbde 100644
--- a/lib/cublas/wrappers.jl
+++ b/lib/cublas/wrappers.jl
@@ -400,8 +400,10 @@ for (fname, fname_64, elty) in ((:cublasSrotm_v2, :cublasSrotm_v2_64, :Float32),
 end
 
 ## rotmg
-for (fname, fname_64, elty) in ((:cublasSrotmg_v2, :cublasSrotmg_v2_64, :Float32),
-                                (:cublasDrotmg_v2, :cublasSrotmg_v2_64, :Float64))
+for (fname, fname_64, elty) in (
+        (:cublasSrotmg_v2, :cublasSrotmg_v2_64, :Float32),
+        (:cublasDrotmg_v2, :cublasSrotmg_v2_64, :Float64),
+    )
     @eval begin
         function rotmg!(d1::$elty,
                         d2::$elty,
@@ -1120,7 +1122,7 @@ end
 ## (GE) general matrix-matrix multiplication
 for (fname, fname_64, elty) in ((:cublasDgemm_v2, :cublasDgemm_v2_64, :Float64),
                                 (:cublasSgemm_v2, :cublasSgemm_v2_64, :Float32),
-                                (:cublasHgemm, :cublasHgemm_64, :Float16),
+        (:cublasHgemm, :cublasHgemm_64, :Float16),
                                 (:cublasZgemm_v2, :cublasZgemm_v2_64, :ComplexF64),
                                 (:cublasCgemm_v2, :cublasCgemm_v2_64, :ComplexF32))
     @eval begin
@@ -1531,7 +1533,7 @@ end
 ## (GE) general matrix-matrix multiplication batched
 for (fname, fname_64, elty) in ((:cublasDgemmBatched, :cublasDgemmBatched_64, :Float64),
                                 (:cublasSgemmBatched, :cublasSgemmBatched_64, :Float32),
-                                (:cublasHgemmBatched, :cublasHgemmBatched_64, :Float16),
+        (:cublasHgemmBatched, :cublasHgemmBatched_64, :Float16),
                                 (:cublasZgemmBatched, :cublasZgemmBatched_64, :ComplexF64),
                                 (:cublasCgemmBatched, :cublasCgemmBatched_64, :ComplexF32))
     @eval begin
@@ -1598,7 +1600,7 @@ end
 ## (GE) general matrix-matrix multiplication strided batched
 for (fname, fname_64, elty) in ((:cublasDgemmStridedBatched, :cublasDgemmStridedBatched_64, :Float64),
                                 (:cublasSgemmStridedBatched, :cublasSgemmStridedBatched_64, :Float32),
-                                (:cublasHgemmStridedBatched, :cublasHgemmStridedBatched_64, :Float16),
+        (:cublasHgemmStridedBatched, :cublasHgemmStridedBatched_64, :Float16),
                                 (:cublasZgemmStridedBatched, :cublasZgemmStridedBatched_64, :ComplexF64),
                                 (:cublasCgemmStridedBatched, :cublasCgemmStridedBatched_64, :ComplexF32))
     @eval begin
@@ -1950,10 +1952,12 @@ end
 
 ## (TR) Triangular matrix and vector multiplication and solution
 for (mmname, mmname_64, elty) in
-        ((:cublasDtrmm_v2, :cublasDtrmm_v2_64, :Float64),
-         (:cublasStrmm_v2, :cublasStrmm_v2_64, :Float32),
-         (:cublasZtrmm_v2, :cublasZtrmm_v2_64, :ComplexF64),
-         (:cublasCtrmm_v2, :cublasCtrmm_v2_64, :ComplexF32))
+    (
+        (:cublasDtrmm_v2, :cublasDtrmm_v2_64, :Float64),
+        (:cublasStrmm_v2, :cublasStrmm_v2_64, :Float32),
+        (:cublasZtrmm_v2, :cublasZtrmm_v2_64, :ComplexF64),
+        (:cublasCtrmm_v2, :cublasCtrmm_v2_64, :ComplexF32),
+    )
     @eval begin
         # Note: CUBLAS differs from BLAS API for trmm
         #   BLAS: inplace modification of B
@@ -1987,10 +1991,12 @@ for (mmname, mmname_64, elty) in
 end
 
 for (smname, smname_64, elty) in
-        ((:cublasDtrsm_v2, :cublasDtrsm_v2_64, :Float64),
-         (:cublasStrsm_v2, :cublasStrsm_v2_64, :Float32),
-         (:cublasZtrsm_v2, :cublasZtrsm_v2_64, :ComplexF64),
-         (:cublasCtrsm_v2, :cublasCtrsm_v2_64, :ComplexF32))
+    (
+        (:cublasDtrsm_v2, :cublasDtrsm_v2_64, :Float64),
+        (:cublasStrsm_v2, :cublasStrsm_v2_64, :Float32),
+        (:cublasZtrsm_v2, :cublasZtrsm_v2_64, :ComplexF64),
+        (:cublasCtrsm_v2, :cublasCtrsm_v2_64, :ComplexF32),
+    )
     @eval begin
         function trsm!(side::Char,
                        uplo::Char,

github-actions

CUDA.jl Benchmarks

Benchmark suite	Current: `ace28c3`	Previous: `c05359d`	Ratio
`latency/precompile`	`42966135947` ns	`42922336650.5` ns	`1.00`
`latency/ttfp`	`7012136452` ns	`7015168424` ns	`1.00`
`latency/import`	`3573568156` ns	`3571269514` ns	`1.00`
`integration/volumerhs`	`9625284.5` ns	`9608723` ns	`1.00`
`integration/byval/slices=1`	`147213` ns	`146920.5` ns	`1.00`
`integration/byval/slices=3`	`426506` ns	`425845` ns	`1.00`
`integration/byval/reference`	`145291` ns	`145020` ns	`1.00`
`integration/byval/slices=2`	`286867` ns	`286380` ns	`1.00`
`integration/cudadevrt`	`103634` ns	`103554` ns	`1.00`
`kernel/indexing`	`14158.5` ns	`14235` ns	`0.99`
`kernel/indexing_checked`	`15011` ns	`14711` ns	`1.02`
`kernel/occupancy`	`670.632911392405` ns	`672.5506329113924` ns	`1.00`
`kernel/launch`	`2238.1111111111113` ns	`2270.3333333333335` ns	`0.99`
`kernel/rand`	`15214` ns	`14669` ns	`1.04`
`array/reverse/1d`	`19943` ns	`19682` ns	`1.01`
`array/reverse/2d`	`24868.5` ns	`23613.5` ns	`1.05`
`array/reverse/1d_inplace`	`10397` ns	`10461` ns	`0.99`
`array/reverse/2d_inplace`	`12028` ns	`13212` ns	`0.91`
`array/copy`	`20985` ns	`20972` ns	`1.00`
`array/iteration/findall/int`	`157586.5` ns	`157808` ns	`1.00`
`array/iteration/findall/bool`	`140117` ns	`139837` ns	`1.00`
`array/iteration/findfirst/int`	`165035.5` ns	`164937` ns	`1.00`
`array/iteration/findfirst/bool`	`158235` ns	`165868` ns	`0.95`
`array/iteration/scalar`	`72865` ns	`73041` ns	`1.00`
`array/iteration/logical`	`215963.5` ns	`214850` ns	`1.01`
`array/iteration/findmin/1d`	`46181` ns	`46704` ns	`0.99`
`array/iteration/findmin/2d`	`96327.5` ns	`96962.5` ns	`0.99`
`array/reductions/reduce/Int64/1d`	`43700.5` ns	`46033` ns	`0.95`
`array/reductions/reduce/Int64/dims=1`	`49010` ns	`55193` ns	`0.89`
`array/reductions/reduce/Int64/dims=2`	`62902.5` ns	`62917` ns	`1.00`
`array/reductions/reduce/Int64/dims=1L`	`89016` ns	`88869` ns	`1.00`
`array/reductions/reduce/Int64/dims=2L`	`88505` ns	`87079` ns	`1.02`
`array/reductions/reduce/Float32/1d`	`34730` ns	`34606` ns	`1.00`
`array/reductions/reduce/Float32/dims=1`	`41845` ns	`43875` ns	`0.95`
`array/reductions/reduce/Float32/dims=2`	`59974` ns	`59705` ns	`1.00`
`array/reductions/reduce/Float32/dims=1L`	`52527` ns	`52260` ns	`1.01`
`array/reductions/reduce/Float32/dims=2L`	`70389` ns	`70051.5` ns	`1.00`
`array/reductions/mapreduce/Int64/1d`	`43580` ns	`42671.5` ns	`1.02`
`array/reductions/mapreduce/Int64/dims=1`	`48210.5` ns	`45980` ns	`1.05`
`array/reductions/mapreduce/Int64/dims=2`	`62641` ns	`62143.5` ns	`1.01`
`array/reductions/mapreduce/Int64/dims=1L`	`89035` ns	`88812` ns	`1.00`
`array/reductions/mapreduce/Int64/dims=2L`	`87202` ns	`86818` ns	`1.00`
`array/reductions/mapreduce/Float32/1d`	`34464` ns	`34742` ns	`0.99`
`array/reductions/mapreduce/Float32/dims=1`	`42028` ns	`43090.5` ns	`0.98`
`array/reductions/mapreduce/Float32/dims=2`	`60389` ns	`60061` ns	`1.01`
`array/reductions/mapreduce/Float32/dims=1L`	`52805` ns	`52528` ns	`1.01`
`array/reductions/mapreduce/Float32/dims=2L`	`70688` ns	`70191` ns	`1.01`
`array/broadcast`	`20331` ns	`20155` ns	`1.01`
`array/copyto!/gpu_to_gpu`	`12820` ns	`11294` ns	`1.14`
`array/copyto!/cpu_to_gpu`	`215251` ns	`216503` ns	`0.99`
`array/copyto!/gpu_to_cpu`	`283637` ns	`284237` ns	`1.00`
`array/accumulate/Int64/1d`	`124894.5` ns	`125529` ns	`0.99`
`array/accumulate/Int64/dims=1`	`83631` ns	`84037` ns	`1.00`
`array/accumulate/Int64/dims=2`	`158233` ns	`159166` ns	`0.99`
`array/accumulate/Int64/dims=1L`	`1719529` ns	`1720376` ns	`1.00`
`array/accumulate/Int64/dims=2L`	`967882` ns	`968348` ns	`1.00`
`array/accumulate/Float32/1d`	`109433` ns	`109984` ns	`0.99`
`array/accumulate/Float32/dims=1`	`80815` ns	`81082` ns	`1.00`
`array/accumulate/Float32/dims=2`	`147970` ns	`148760` ns	`0.99`
`array/accumulate/Float32/dims=1L`	`1618276` ns	`1629307.5` ns	`0.99`
`array/accumulate/Float32/dims=2L`	`698936` ns	`701479` ns	`1.00`
`array/construct`	`1305.6` ns	`1287.2` ns	`1.01`
`array/random/randn/Float32`	`44977` ns	`44176` ns	`1.02`
`array/random/randn!/Float32`	`25089` ns	`24930` ns	`1.01`
`array/random/rand!/Int64`	`27518` ns	`27547` ns	`1.00`
`array/random/rand!/Float32`	`8792.666666666666` ns	`8724.666666666666` ns	`1.01`
`array/random/rand/Int64`	`30018` ns	`30114` ns	`1.00`
`array/random/rand/Float32`	`12998` ns	`13059` ns	`1.00`
`array/permutedims/4d`	`60364.5` ns	`60761` ns	`0.99`
`array/permutedims/2d`	`54202` ns	`54037` ns	`1.00`
`array/permutedims/3d`	`55093` ns	`54954` ns	`1.00`
`array/sorting/1d`	`2755872.5` ns	`2756544` ns	`1.00`
`array/sorting/by`	`3354726` ns	`3343249` ns	`1.00`
`array/sorting/2d`	`1084530` ns	`1080799` ns	`1.00`
`cuda/synchronization/stream/auto`	`1026.3` ns	`1040.3` ns	`0.99`
`cuda/synchronization/stream/nonblocking`	`8291.400000000001` ns	`7220` ns	`1.15`
`cuda/synchronization/stream/blocking`	`805.3258426966293` ns	`802.3333333333334` ns	`1.00`
`cuda/synchronization/context/auto`	`1197.8` ns	`1203.5` ns	`1.00`
`cuda/synchronization/context/nonblocking`	`7976.1` ns	`7276.700000000001` ns	`1.10`
`cuda/synchronization/context/blocking`	`932.25` ns	`900.4347826086956` ns	`1.04`

This comment was automatically generated by workflow using github-action-benchmark.

Update wrapppers to use the ILP64 API

c5da1f5

amontoison changed the title ~~Update wrapppers to use the ILP64 API~~ [CUBLAS] Update wrapppers to use the ILP64 API Aug 12, 2025

Update wrappers.jl

ace28c3

github-actions bot reviewed Aug 12, 2025

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[CUBLAS] Update wrapppers to use the ILP64 API #2845

[CUBLAS] Update wrapppers to use the ILP64 API #2845

amontoison commented Aug 12, 2025 •

edited

Loading

Uh oh!

github-actions bot commented Aug 12, 2025

Uh oh!

github-actions bot left a comment

Uh oh!

Uh oh!

[CUBLAS] Update wrapppers to use the ILP64 API #2845

Are you sure you want to change the base?

[CUBLAS] Update wrapppers to use the ILP64 API #2845

Conversation

amontoison commented Aug 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Aug 12, 2025

Uh oh!

github-actions bot left a comment

Choose a reason for hiding this comment

CUDA.jl Benchmarks

Uh oh!

Uh oh!

amontoison commented Aug 12, 2025 •

edited

Loading