@@ -395,53 +395,52 @@ end
395
395
@kernel function _upsample_linear_kernel! (:: CPU , y:: T , x:: T , rwidth, align:: Val{A} ) where {
396
396
T <: AbstractArray{<:Any, 3} , A,
397
397
}
398
- @uniform in_width:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (x)
399
- @uniform out_width:: UInt32 = size (y, 1 )
400
- c:: UInt32 , n:: UInt32 = @index (Global, NTuple)
398
+ @uniform in_width, channels, batch = size (x)
399
+ @uniform out_width = size (y, 1 )
400
+ c, n = @index (Global, NTuple)
401
401
yv, xv = @view (y[:, c, n]), @view (x[:, c, n])
402
- @inbounds for i in UnitRange {UInt32} ( one (UInt32), out_width)
403
- iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, in_width)
402
+ @inbounds for i in 1 : out_width
403
+ iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, in_width)
404
404
yv[i] = w0λ * xv[iw0] + w1λ * xv[iw1]
405
405
end
406
406
end
407
407
408
408
@kernel function _∇upsample_linear_kernel! (:: CPU , dx:: T1 , Δ:: T2 , rwidth, align:: Val{A} ) where {
409
409
T1 <: AbstractArray{<:Any, 3} , T2 <: AbstractArray{<:Any, 3} , A,
410
410
}
411
- @uniform in_width:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (Δ)
412
- @uniform out_width:: UInt32 = size (dx, 1 )
413
- c:: UInt32 , n:: UInt32 = @index (Global, NTuple)
411
+ @uniform in_width, channels, batch = size (Δ)
412
+ @uniform out_width = size (dx, 1 )
413
+ c, n = @index (Global, NTuple)
414
414
Δv, dxv = @view (Δ[:, c, n]), @view (dx[:, c, n])
415
- @inbounds for i in UnitRange {UInt32} ( one (UInt32), in_width)
416
- ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, out_width)
415
+ @inbounds for i in 1 : in_width
416
+ ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, out_width)
417
417
val = Δv[i]
418
418
dxv[ow0] += w0λ * val
419
419
dxv[ow1] += w1λ * val
420
420
end
421
421
end
422
422
423
423
# Linear (GPU): parallelization along width dimension.
424
- # TODO replace AbstractArray -> AnyGPUArray once device arrays subtype it.
425
424
426
425
@kernel function _upsample_linear_kernel! (:: B , y:: T , x:: T , rwidth, align:: Val{A} ) where {
427
426
B <: GPU , T <: AbstractArray{<:Any, 3} , A,
428
427
}
429
- @uniform in_width:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (x)
430
- i:: UInt32 = @index (Global)
431
- iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, in_width)
432
- @inbounds for n in UnitRange {UInt32} ( one (UInt32), batch) , c in UnitRange {UInt32} ( one (UInt32), channels)
428
+ @uniform in_width, channels, batch = size (x)
429
+ i = @index (Global)
430
+ iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, in_width)
431
+ @inbounds for n in 1 : batch, c in 1 : channels
433
432
y[i, c, n] = w0λ * x[iw0, c, n] + w1λ * x[iw1, c, n]
434
433
end
435
434
end
436
435
437
436
@kernel function _∇upsample_linear_kernel! (:: B , dx:: T , Δ:: T , rwidth, align:: Val{A} ) where {
438
437
B <: GPU , T <: AbstractArray{<:Any, 3} , A,
439
438
}
440
- @uniform in_width:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (Δ)
441
- @uniform out_width:: UInt32 = size (dx, 1 )
442
- i:: UInt32 = @index (Global)
443
- ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, out_width)
444
- @inbounds for n in UnitRange {UInt32} ( one (UInt32), batch) , c in UnitRange {UInt32} ( one (UInt32), channels)
439
+ @uniform in_width, channels, batch = size (Δ)
440
+ @uniform out_width = size (dx, 1 )
441
+ i = @index (Global)
442
+ ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, out_width)
443
+ @inbounds for n in 1 : batch, c in 1 : channels
445
444
val = Δ[i, c, n]
446
445
@atomic dx[ow0, c, n] += w0λ * val
447
446
@atomic dx[ow1, c, n] += w1λ * val
@@ -453,14 +452,14 @@ end
453
452
@kernel function _upsample_linear_kernel! (:: CPU , y:: T , x:: T , rwidth, rheight, align:: Val{A} ) where {
454
453
T <: AbstractArray{<:Any, 4} , A,
455
454
}
456
- @uniform in_width:: UInt32 , in_height:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (x)
457
- @uniform out_width:: UInt32 , out_height:: UInt32 = size (y)[1 : 2 ]
458
- c:: UInt32 , n:: UInt32 = @index (Global, NTuple)
455
+ @uniform in_width, in_height, channels, batch = size (x)
456
+ @uniform out_width, out_height = size (y)[1 : 2 ]
457
+ c, n = @index (Global, NTuple)
459
458
yv, xv = @view (y[:, :, c, n]), @view (x[:, :, c, n])
460
- for j in UnitRange {UInt32} ( one (UInt32), out_height)
461
- ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, in_height)
462
- for i in UnitRange {UInt32} ( one (UInt32), out_width)
463
- iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, in_width)
459
+ for j in 1 : out_height
460
+ ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, in_height)
461
+ for i in 1 : out_width
462
+ iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, in_width)
464
463
@inbounds yv[i, j] =
465
464
h0λ * (w0λ * xv[iw0, ih0] + w1λ * xv[iw1, ih0]) +
466
465
h1λ * (w0λ * xv[iw0, ih1] + w1λ * xv[iw1, ih1])
@@ -471,14 +470,14 @@ end
471
470
@kernel function _∇upsample_linear_kernel! (:: CPU , dx:: T1 , Δ:: T2 , rwidth, rheight, align:: Val{A} ) where {
472
471
T1 <: AbstractArray{<:Any, 4} , T2 <: AbstractArray{<:Any, 4} , A,
473
472
}
474
- @uniform in_width:: UInt32 , in_height:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (Δ)
475
- @uniform out_width:: UInt32 , out_height:: UInt32 = size (dx)[1 : 2 ]
476
- c:: UInt32 , n:: UInt32 = @index (Global, NTuple)
473
+ @uniform in_width, in_height, channels, batch = size (Δ)
474
+ @uniform out_width, out_height = size (dx)[1 : 2 ]
475
+ c, n = @index (Global, NTuple)
477
476
Δv, dxv = @view (Δ[:, :, c, n]), @view (dx[:, :, c, n])
478
- for j in UnitRange {UInt32} ( one (UInt32), in_height)
479
- oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, out_height)
480
- @inbounds for i in UnitRange {UInt32} ( one (UInt32), in_width)
481
- ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, out_width)
477
+ for j in 1 : in_height
478
+ oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, out_height)
479
+ @inbounds for i in 1 : in_width
480
+ ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, out_width)
482
481
val = Δv[i, j]
483
482
dxv[ow0, oh0] += w0λ * h0λ * val
484
483
dxv[ow1, oh0] += w1λ * h0λ * val
@@ -493,11 +492,11 @@ end
493
492
@kernel function _upsample_linear_kernel! (:: B , y:: T , x:: T , rwidth, rheight, align:: Val{A} ) where {
494
493
B <: GPU , T <: AbstractArray{<:Any, 4} , A,
495
494
}
496
- @uniform in_width:: UInt32 , in_height:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (x)
497
- i:: UInt32 , j:: UInt32 = @index (Global, NTuple)
498
- iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, in_width)
499
- ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, in_height)
500
- @inbounds for n in UnitRange {UInt32} ( one (UInt32), batch) , c in UnitRange {UInt32} ( one (UInt32), channels)
495
+ @uniform in_width, in_height, channels, batch = size (x)
496
+ i, j = @index (Global, NTuple)
497
+ iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, in_width)
498
+ ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, in_height)
499
+ @inbounds for n in 1 : batch, c in 1 : channels
501
500
y[i, j, c, n] =
502
501
h0λ * (w0λ * x[iw0, ih0, c, n] + w1λ * x[iw1, ih0, c, n]) +
503
502
h1λ * (w0λ * x[iw0, ih1, c, n] + w1λ * x[iw1, ih1, c, n])
@@ -507,12 +506,12 @@ end
507
506
@kernel function _∇upsample_linear_kernel! (:: B , dx:: T , Δ:: T , rwidth, rheight, align:: Val{A} ) where {
508
507
B <: GPU , T <: AbstractArray{<:Any, 4} , A,
509
508
}
510
- @uniform in_width:: UInt32 , in_height:: UInt32 , channels:: UInt32 , batch:: UInt32 = size (Δ)
511
- @uniform out_width:: UInt32 , out_height:: UInt32 = size (dx)[1 : 2 ]
512
- i:: UInt32 , j:: UInt32 = @index (Global, NTuple)
513
- ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, out_width)
514
- oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, out_height)
515
- @inbounds for n in UnitRange {UInt32} ( one (UInt32), batch) , c in UnitRange {UInt32} ( one (UInt32), channels)
509
+ @uniform in_width, in_height, channels, batch = size (Δ)
510
+ @uniform out_width, out_height = size (dx)[1 : 2 ]
511
+ i, j = @index (Global, NTuple)
512
+ ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, out_width)
513
+ oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, out_height)
514
+ @inbounds for n in 1 : batch, c in 1 : channels
516
515
val = Δ[i, j, c, n]
517
516
@atomic dx[ow0, oh0, c, n] += w0λ * h0λ * val
518
517
@atomic dx[ow1, oh0, c, n] += w1λ * h0λ * val
@@ -526,17 +525,17 @@ end
526
525
@kernel function _upsample_linear_kernel! (:: CPU , y:: T , x:: T , rwidth, rheight, rdepth, align:: Val{A} ) where {
527
526
T <: AbstractArray{<:Any, 5} , A,
528
527
}
529
- @uniform in_width:: UInt32 , in_height:: UInt32 , in_depth:: UInt32 = size (x)[1 : 3 ]
530
- @uniform channels:: UInt32 , batch:: UInt32 = size (x, 4 ), size (x, 5 )
531
- @uniform out_width:: UInt32 , out_height:: UInt32 , out_depth:: UInt32 = size (y)[1 : 3 ]
532
- c:: UInt32 , n:: UInt32 = @index (Global, NTuple)
528
+ @uniform in_width, in_height, in_depth = size (x)[1 : 3 ]
529
+ @uniform channels, batch = size (x, 4 ), size (x, 5 )
530
+ @uniform out_width, out_height, out_depth = size (y)[1 : 3 ]
531
+ c, n = @index (Global, NTuple)
533
532
yv, xv = @view (y[:, :, :, c, n]), @view (x[:, :, :, c, n])
534
- for k in UnitRange {UInt32} ( one (UInt32), out_depth)
535
- id0, id1, d0λ, d1λ = source_idx_and_λ (rdepth, k - one (UInt32) , align, in_depth)
536
- for j in UnitRange {UInt32} ( one (UInt32), out_height)
537
- ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, in_height)
538
- for i in UnitRange {UInt32} ( one (UInt32), out_width)
539
- iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, in_width)
533
+ for k in 1 : out_depth
534
+ id0, id1, d0λ, d1λ = source_idx_and_λ (rdepth, k - 1 , align, in_depth)
535
+ for j in 1 : out_height
536
+ ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, in_height)
537
+ for i in 1 : out_width
538
+ iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, in_width)
540
539
@inbounds yv[i, j, k] =
541
540
d0λ * (
542
541
h0λ * (w0λ * xv[iw0, ih0, id0] + w1λ * xv[iw1, ih0, id0]) +
@@ -552,17 +551,17 @@ end
552
551
@kernel function _∇upsample_linear_kernel! (:: CPU , dx:: T1 , Δ:: T2 , rwidth, rheight, rdepth, align:: Val{A} ) where {
553
552
T1 <: AbstractArray{<:Any, 5} , T2 <: AbstractArray{<:Any, 5} , A,
554
553
}
555
- @uniform in_width:: UInt32 , in_height:: UInt32 , in_depth:: UInt32 = size (Δ)[1 : 3 ]
556
- @uniform channels:: UInt32 , batch:: UInt32 = size (Δ, 4 ), size (Δ, 5 )
557
- @uniform out_width:: UInt32 , out_height:: UInt32 , out_depth:: UInt32 = size (dx)[1 : 3 ]
558
- c:: UInt32 , n:: UInt32 = @index (Global, NTuple)
554
+ @uniform in_width, in_height, in_depth = size (Δ)[1 : 3 ]
555
+ @uniform channels, batch = size (Δ, 4 ), size (Δ, 5 )
556
+ @uniform out_width, out_height, out_depth = size (dx)[1 : 3 ]
557
+ c, n = @index (Global, NTuple)
559
558
Δv, dxv = @view (Δ[:, :, :, c, n]), @view (dx[:, :, :, c, n])
560
- for k in UnitRange {UInt32} ( one (UInt32), in_depth)
561
- od0, od1, d0λ, d1λ = source_idx_and_λ (rdepth, k - one (UInt32) , align, out_depth)
562
- for j in UnitRange {UInt32} ( one (UInt32), in_height)
563
- oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, out_height)
564
- @inbounds for i in UnitRange {UInt32} ( one (UInt32), in_width)
565
- ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, out_width)
559
+ for k in 1 : in_depth
560
+ od0, od1, d0λ, d1λ = source_idx_and_λ (rdepth, k - 1 , align, out_depth)
561
+ for j in 1 : in_height
562
+ oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, out_height)
563
+ @inbounds for i in 1 : in_width
564
+ ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, out_width)
566
565
val = Δv[i, j, k]
567
566
dxv[ow0, oh0, od0] += w0λ * h0λ * d0λ * val
568
567
dxv[ow1, oh0, od0] += w1λ * h0λ * d0λ * val
@@ -583,13 +582,13 @@ end
583
582
@kernel function _upsample_linear_kernel! (:: B , y:: T , x:: T , rwidth, rheight, rdepth, align:: Val{A} ) where {
584
583
B <: GPU , T <: AbstractArray{<:Any, 5} , A,
585
584
}
586
- @uniform in_width:: UInt32 , in_height:: UInt32 , in_depth:: UInt32 = size (x)[1 : 3 ]
587
- @uniform channels:: UInt32 , batch:: UInt32 = size (x, 4 ), size (x, 5 )
588
- i:: UInt32 , j:: UInt32 , k:: UInt32 = @index (Global, NTuple)
589
- iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, in_width)
590
- ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, in_height)
591
- id0, id1, d0λ, d1λ = source_idx_and_λ (rdepth, k - one (UInt32) , align, in_depth)
592
- @inbounds for n in UnitRange {UInt32} ( one (UInt32), batch) , c in UnitRange {UInt32} ( one (UInt32), channels)
585
+ @uniform in_width, in_height, in_depth = size (x)[1 : 3 ]
586
+ @uniform channels, batch = size (x, 4 ), size (x, 5 )
587
+ i, j, k = @index (Global, NTuple)
588
+ iw0, iw1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, in_width)
589
+ ih0, ih1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, in_height)
590
+ id0, id1, d0λ, d1λ = source_idx_and_λ (rdepth, k - 1 , align, in_depth)
591
+ @inbounds for n in 1 : batch, c in 1 : channels
593
592
y[i, j, k, c, n] =
594
593
d0λ * (
595
594
h0λ * (w0λ * x[iw0, ih0, id0, c, n] + w1λ * x[iw1, ih0, id0, c, n]) +
@@ -603,14 +602,14 @@ end
603
602
@kernel function _∇upsample_linear_kernel! (:: B , dx:: T , Δ:: T , rwidth, rheight, rdepth, align:: Val{A} ) where {
604
603
B <: GPU , T <: AbstractArray{<:Any, 5} , A,
605
604
}
606
- @uniform in_width:: UInt32 , in_height:: UInt32 , in_depth:: UInt32 = size (Δ)[1 : 3 ]
607
- @uniform channels:: UInt32 , batch:: UInt32 = size (Δ, 4 ), size (Δ, 5 )
608
- @uniform out_width:: UInt32 , out_height:: UInt32 , out_depth:: UInt32 = size (dx)[1 : 3 ]
609
- i:: UInt32 , j:: UInt32 , k:: UInt32 = @index (Global, NTuple)
610
- ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - one (UInt32) , align, out_width)
611
- oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - one (UInt32) , align, out_height)
612
- od0, od1, d0λ, d1λ = source_idx_and_λ (rdepth, k - one (UInt32) , align, out_depth)
613
- @inbounds for n in UnitRange {UInt32} ( one (UInt32), batch) , c in UnitRange {UInt32} ( one (UInt32), channels)
605
+ @uniform in_width, in_height, in_depth = size (Δ)[1 : 3 ]
606
+ @uniform channels, batch = size (Δ, 4 ), size (Δ, 5 )
607
+ @uniform out_width, out_height, out_depth = size (dx)[1 : 3 ]
608
+ i, j, k = @index (Global, NTuple)
609
+ ow0, ow1, w0λ, w1λ = source_idx_and_λ (rwidth, i - 1 , align, out_width)
610
+ oh0, oh1, h0λ, h1λ = source_idx_and_λ (rheight, j - 1 , align, out_height)
611
+ od0, od1, d0λ, d1λ = source_idx_and_λ (rdepth, k - 1 , align, out_depth)
612
+ @inbounds for n in 1 : batch, c in 1 : channels
614
613
val = Δ[i, j, k, c, n]
615
614
@atomic dx[ow0, oh0, od0, c, n] += w0λ * h0λ * d0λ * val
616
615
@atomic dx[ow1, oh0, od0, c, n] += w1λ * h0λ * d0λ * val
@@ -625,17 +624,21 @@ end
625
624
end
626
625
627
626
@inline function source_idx_and_λ (
628
- ratio:: T , out_idx:: UInt32 , :: Val{align} , in_width:: UInt32 ,
627
+ ratio:: T , out_idx:: Int , :: Val{align} , in_width:: Int ,
629
628
) where {T, align}
630
629
real_index = align ?
631
630
ratio * out_idx :
632
631
max (zero (T), ratio * (out_idx + T (0.5 )) - T (0.5 ))
633
632
634
- iw0 = floor (UInt32, real_index)
635
- offset:: UInt32 = ifelse (iw0 < in_width - one (UInt32), one (UInt32), zero (UInt32))
636
- iw1 = iw0 + offset + one (UInt32)
633
+ iw0 = if T <: Rational
634
+ floor (Int, real_index) # Not GPU-friendly, but allows for Rational support.
635
+ else
636
+ unsafe_trunc (Int, floor (real_index))
637
+ end
638
+ offset = ifelse (iw0 < in_width - 1 , 1 , 0 )
639
+ iw1 = iw0 + offset + 1
637
640
638
641
w1lambda = real_index - iw0
639
642
w0lambda = one (T) - w1lambda
640
- return iw0 + one (UInt32) , iw1, w0lambda, w1lambda
643
+ return iw0 + 1 , iw1, w0lambda, w1lambda
641
644
end
0 commit comments