Skip to content

Commit a39e1e7

Browse files
committed
fixup! use argsize instead of sizeof
1 parent 36cca7f commit a39e1e7

File tree

1 file changed

+17
-17
lines changed

1 file changed

+17
-17
lines changed

lib/cudadrv/memory.jl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,7 @@ Initialize device memory by copying `val` for `len` times.
392392
memset
393393

394394
for T in [UInt8, UInt16, UInt32]
395-
bits = 8*sizeof(T)
395+
bits = 8*argsize(T)
396396
fn = Symbol("cuMemsetD$(bits)Async")
397397
@eval function memset(ptr::CuPtr{$T}, value::$T, len::Integer; stream::CuStream=stream())
398398
$(getproperty(CUDA, fn))(ptr, value, len, stream)
@@ -411,7 +411,7 @@ for (fn, srcPtrTy, dstPtrTy) in (("cuMemcpyDtoHAsync_v2", :CuPtr, :Ptr),
411411
@eval function Base.unsafe_copyto!(dst::$dstPtrTy{T}, src::$srcPtrTy{T}, N::Integer;
412412
stream::CuStream=stream(),
413413
async::Bool=false) where T
414-
$(getproperty(CUDA, Symbol(fn)))(dst, src, N*sizeof(T), stream)
414+
$(getproperty(CUDA, Symbol(fn)))(dst, src, N*argsize(T), stream)
415415
async || synchronize(stream)
416416
return dst
417417
end
@@ -423,11 +423,11 @@ function Base.unsafe_copyto!(dst::CuPtr{T}, src::CuPtr{T}, N::Integer;
423423
dst_dev = device(dst)
424424
src_dev = device(src)
425425
if dst_dev == src_dev
426-
cuMemcpyDtoDAsync_v2(dst, src, N*sizeof(T), stream)
426+
cuMemcpyDtoDAsync_v2(dst, src, N*argsize(T), stream)
427427
else
428428
cuMemcpyPeerAsync(dst, context(dst_dev),
429429
src, context(src_dev),
430-
N*sizeof(T), stream)
430+
N*argsize(T), stream)
431431
end
432432
async || synchronize(stream)
433433
return dst
@@ -436,24 +436,24 @@ end
436436
function Base.unsafe_copyto!(dst::CuArrayPtr{T}, doffs::Integer, src::Ptr{T}, N::Integer;
437437
stream::CuStream=stream(),
438438
async::Bool=false) where T
439-
cuMemcpyHtoAAsync_v2(dst, doffs, src, N*sizeof(T), stream)
439+
cuMemcpyHtoAAsync_v2(dst, doffs, src, N*argsize(T), stream)
440440
async || synchronize(stream)
441441
return dst
442442
end
443443

444444
function Base.unsafe_copyto!(dst::Ptr{T}, src::CuArrayPtr{T}, soffs::Integer, N::Integer;
445445
stream::CuStream=stream(),
446446
async::Bool=false) where T
447-
cuMemcpyAtoHAsync_v2(dst, src, soffs, N*sizeof(T), stream)
447+
cuMemcpyAtoHAsync_v2(dst, src, soffs, N*argsize(T), stream)
448448
async || synchronize(stream)
449449
return dst
450450
end
451451

452452
Base.unsafe_copyto!(dst::CuArrayPtr{T}, doffs::Integer, src::CuPtr{T}, N::Integer) where {T} =
453-
cuMemcpyDtoA_v2(dst, doffs, src, N*sizeof(T))
453+
cuMemcpyDtoA_v2(dst, doffs, src, N*argsize(T))
454454

455455
Base.unsafe_copyto!(dst::CuPtr{T}, src::CuArrayPtr{T}, soffs::Integer, N::Integer) where {T} =
456-
cuMemcpyAtoD_v2(dst, src, soffs, N*sizeof(T))
456+
cuMemcpyAtoD_v2(dst, src, soffs, N*argsize(T))
457457

458458
Base.unsafe_copyto!(dst::CuArrayPtr, src, N::Integer; kwargs...) =
459459
Base.unsafe_copyto!(dst, 0, src, N; kwargs...)
@@ -529,15 +529,15 @@ function unsafe_copy2d!(dst::Union{Ptr{T},CuPtr{T},CuArrayPtr{T}}, dstTyp::Type{
529529

530530
params_ref = Ref(CUDA_MEMCPY2D(
531531
# source
532-
(srcPos.x-1)*sizeof(T), srcPos.y-1,
532+
(srcPos.x-1)*argsize(T), srcPos.y-1,
533533
srcMemoryType, srcHost, srcDevice, srcArray,
534534
srcPitch,
535535
# destination
536-
(dstPos.x-1)*sizeof(T), dstPos.y-1,
536+
(dstPos.x-1)*argsize(T), dstPos.y-1,
537537
dstMemoryType, dstHost, dstDevice, dstArray,
538538
dstPitch,
539539
# extent
540-
width*sizeof(T), height
540+
width*argsize(T), height
541541
))
542542
cuMemcpy2DAsync_v2(params_ref, stream)
543543
async || synchronize(stream)
@@ -569,8 +569,8 @@ function unsafe_copy3d!(dst::Union{Ptr{T},CuPtr{T},CuArrayPtr{T}}, dstTyp::Type{
569569
# when using the stream-ordered memory allocator
570570
# NOTE: we apply the workaround unconditionally, since we want to keep this call cheap.
571571
if v"11.2" <= driver_version() <= v"11.3" #&& pools[device()].stream_ordered
572-
srcOffset = (srcPos.x-1)*sizeof(T) + srcPitch*((srcPos.y-1) + srcHeight*(srcPos.z-1))
573-
dstOffset = (dstPos.x-1)*sizeof(T) + dstPitch*((dstPos.y-1) + dstHeight*(dstPos.z-1))
572+
srcOffset = (srcPos.x-1)*argsize(T) + srcPitch*((srcPos.y-1) + srcHeight*(srcPos.z-1))
573+
dstOffset = (dstPos.x-1)*argsize(T) + dstPitch*((dstPos.y-1) + dstHeight*(dstPos.z-1))
574574
else
575575
srcOffset = 0
576576
dstOffset = 0
@@ -622,23 +622,23 @@ function unsafe_copy3d!(dst::Union{Ptr{T},CuPtr{T},CuArrayPtr{T}}, dstTyp::Type{
622622

623623
params_ref = Ref(CUDA_MEMCPY3D(
624624
# source
625-
srcOffset==0 ? (srcPos.x-1)*sizeof(T) : 0,
625+
srcOffset==0 ? (srcPos.x-1)*argsize(T) : 0,
626626
srcOffset==0 ? srcPos.y-1 : 0,
627627
srcOffset==0 ? srcPos.z-1 : 0,
628628
0, # LOD
629629
srcMemoryType, srcHost, srcDevice, srcArray,
630630
C_NULL, # reserved
631631
srcPitch, srcHeight,
632632
# destination
633-
dstOffset==0 ? (dstPos.x-1)*sizeof(T) : 0,
633+
dstOffset==0 ? (dstPos.x-1)*argsize(T) : 0,
634634
dstOffset==0 ? dstPos.y-1 : 0,
635635
dstOffset==0 ? dstPos.z-1 : 0,
636636
0, # LOD
637637
dstMemoryType, dstHost, dstDevice, dstArray,
638638
C_NULL, # reserved
639639
dstPitch, dstHeight,
640640
# extent
641-
width*sizeof(T), height, depth
641+
width*argsize(T), height, depth
642642
))
643643
cuMemcpy3DAsync_v2(params_ref, stream)
644644
async || synchronize(stream)
@@ -698,7 +698,7 @@ function pin(ref::Base.RefValue{T}) where T
698698
ctx = context()
699699
ptr = Base.unsafe_convert(Ptr{T}, ref)
700700

701-
__pin(ptr, sizeof(T))
701+
__pin(ptr, argsize(T))
702702
finalizer(ref) do _
703703
__unpin(ptr, ctx)
704704
end

0 commit comments

Comments
 (0)