Memcopy with static NDRange
The first example simple copies memory from A
to B
. In contrast to the previous examples it uses a fully static kernel configuration. Specializing the kernel on the iteration range itself.
using KernelAbstractions
using CUDA
using Test
@kernel function copy_kernel!(A, @Const(B))
I = @index(Global)
@inbounds A[I] = B[I]
end
function mycopy_static!(A::Array, B::Array)
@assert size(A) == size(B)
kernel = copy_kernel!(CPU(), 32, size(A)) # if size(A) varies this will cause recompilation
kernel(A, B, ndrange=size(A))
end
A = zeros(128, 128)
B = ones(128, 128)
event = mycopy_static!(A, B)
wait(event)
@test A == B
if has_cuda_gpu()
function mycopy_static!(A::CuArray, B::CuArray)
@assert size(A) == size(B)
kernel = copy_kernel!(CUDADevice(), 32, size(A)) # if size(A) varies this will cause recompilation
kernel(A, B, ndrange=size(A))
end
A = CuArray{Float32}(undef, 1024)
B = CUDA.ones(Float32, 1024)
event = mycopy_static!(A, B)
wait(event)
@test A == B
end