Skip to content

uniform memory and @synchronize inside an if statement #132

@jkozdon

Description

@jkozdon

Not really a bug in KA, but @vchuravy asked me to post

When uniform memory is used with a synchronize in an if statement one needs to be careful due to the way that the implicit thread loops arise on the CPU

using KernelAbstractions
using StaticArrays
using Test

# Good: no @synchronize in if statement
@kernel function no_if_uniform_copy!(::Val{N}, A, B) where {N}
  @uniform begin
    FT = eltype(B)
    l_B = MArray{Tuple{N}, FT}(undef)
    grp_size = @uniform groupsize()[1]
  end

  ##############################
  # Start implicit thread loop?

  glo_id = @index(Global)
  loc_id = @index(Local)
  s_B = @localmem FT (N, grp_size)

  # store value of B in uniform memory
  for n = 1:N
    l_B[n] = B[n, glo_id]
  end

  # Dump value from uniform to shared
  for n = 1:N
    s_B[n, loc_id] = l_B[n]
  end

  # End implicit thread loop?
  ##############################
  @synchronize
  ##############################
  # Start implicit thread loop?

  # Dump value from shared to global memory
  for n = 1:N
    A[n, glo_id] = s_B[n, loc_id]
  end
  # End implicit thread loop?
  ##############################
end

# Bad: @synchronize in if statement with uniform memory usage
@kernel function with_if_uniform_copy!(::Val{N}, A, B) where {N}
  @uniform begin
    FT = eltype(B)
    l_B = MArray{Tuple{N}, FT}(undef)
    grp_size = @uniform groupsize()[1]
  end

  ##############################
  # Start implicit thread loop?

  glo_id = @index(Global)
  loc_id = @index(Local)
  s_B = @localmem FT (N, grp_size)

  # store value of B in uniform memory
  for n = 1:N
    l_B[n] = B[n, glo_id]
  end

  # End implicit thread loop?
  ##############################

  if true
    ##############################
    # Start implicit thread loop
    #

    # Dump value from uniform to shared
    for n = 1:N
      s_B[n, loc_id] = l_B[n]
    end

    # End implicit thread loop?
    ##############################
    @synchronize
    ##############################
    # Start implicit thread loop?

    # Dump value from shared to global memory
    for n = 1:N
      A[n, glo_id] = s_B[n, loc_id]
    end
    # End implicit thread loop?
    ##############################
  end
end

# Good: @synchronize in if statement with private memory usage
@kernel function with_if_private_copy!(::Val{N}, A, B) where {N}
  @uniform begin
    FT = eltype(B)
    grp_size = @uniform groupsize()[1]
  end

  p_B = @private FT (N,)

  ##############################
  # Start implicit thread loop?

  glo_id = @index(Global)
  loc_id = @index(Local)
  s_B = @localmem FT (N, grp_size)

  # store value of B in uniform memory
  for n = 1:N
    p_B[n] = B[n, glo_id]
  end

  # End implicit thread loop?
  ##############################

  if true
    ##############################
    # Start implicit thread loop?

    # Dump value from uniform to shared
    for n = 1:N
      s_B[n, loc_id] = p_B[n]
    end

    # End implicit thread loop?
    ##############################
    @synchronize
    ##############################
    # Start implicit thread loop?

    # Dump value from shared to global memory
    for n = 1:N
      A[n, glo_id] = s_B[n, loc_id]
    end
    # End implicit thread loop?
    ##############################
  end
end

@testset "no if uniform copy" begin
  N = 10
  M = 1024
  B = rand(N, M)
  A = similar(B)
  event = no_if_uniform_copy!(CPU(), 8)(Val(N), A, B; ndrange = M)
  wait(event)
  @test A == B
end

@testset "with if private copy" begin
  N = 10
  M = 1024
  B = rand(N, M)
  A = similar(B)
  event = with_if_private_copy!(CPU(), 8)(Val(N), A, B; ndrange = M)
  wait(event)
  @test A == B
end

@testset "with if uniform copy" begin
  N = 10
  M = 1024
  B = rand(N, M)
  A = similar(B)
  event = with_if_uniform_copy!(CPU(), 8)(Val(N), A, B; ndrange = M)
  wait(event)
  @test A == B
end

output:

julia> include("buggy.jl")
Test Summary:      | Pass  Total
no if uniform copy |    1      1
Test Summary:        | Pass  Total
with if private copy |    1      1
with if uniform copy: Test Failed at /Users/jekozdon/scratch/2019_09_17/buggy.jl:167
  Expression: A == B
   Evaluated: [0.909792046331644 0.909792046331644 … 0.49137323913605546 0.49137323913605546; 0.14386949928286263 0.14386949928286263 … 0.7250834679876621 0.7250834679876621; … ; 0.9275922324520269 0.9275922324520269 … 0.5301867826757798 0.5301867826757798; 0.7105600705440542 0.7105600705440542 … 0.782530472812315 0.782530472812315] == [0.41175116387410693 0.1267238684429859 … 0.9288230234713291 0.49137323913605546; 0.9951976250072363 0.6354672711865443 … 0.0058710270867841086 0.7250834679876621; … ; 0.709797268000828 0.6061527988039019 … 0.188834315207701 0.5301867826757798; 0.9941190027847424 0.0318726131609639 … 0.037656338749129104 0.782530472812315]
Stacktrace:
 [1] top-level scope at /Users/jekozdon/scratch/2019_09_17/buggy.jl:167
 [2] top-level scope at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.4/Test/src/Test.jl:1113
 [3] top-level scope at /Users/jekozdon/scratch/2019_09_17/buggy.jl:161
Test Summary:        | Fail  Total
with if uniform copy |    1      1
ERROR: LoadError: Some tests did not pass: 0 passed, 1 failed, 0 errored, 0 broken.
in expression starting at /Users/jekozdon/scratch/2019_09_17/buggy.jl:160

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions