Adding the following to the example fails on a raspberry, and by seemingly random amounts every time, even on the same binary.
#[cfg(test)]
mod tests{
use super::*;
use khal::re_exports::wgpu;
async fn test_add(backend: &GpuBackend, loop_num: usize) {
const MAX_IDX: usize = 1000000;
let a = (0..MAX_IDX).map(|i| i as f32).collect::<Vec<_>>();
let b = (0..MAX_IDX).map(|i| i as f32 * 10.0).collect::<Vec<_>>();
let result = compute_sum(&backend, &a, &b).await.unwrap();
assert_eq!(result.len(), a.len());
assert_eq!(a.len(), b.len());
assert_eq!(b.len(), MAX_IDX);
for (i, (a, b)) in a.iter().zip(b.iter()).enumerate() {
assert!((a - i as f32).abs() < f32::EPSILON, "a[i] and i should be equal within epsilon. i = {i}, a = {a} (loop_num:{loop_num})");
assert!((b - (i as f32 * 10.0)).abs() < f32::EPSILON, "b[i] and 10*i should be equal within epsilon. i = {i}, b = {b} (loop_num:{loop_num})");
let r = result[i];
let expected = a + b;
let error = r - expected;
assert!(error < f32::EPSILON, "result[i] should be equal to a[i] + b[i] within epsilon. \
i = {i}, a = {a}, b = {b}, expected = {expected}, r = {r}, error = {error} (loop_num:{loop_num})");
}
}
#[async_std::test]
async fn test_gpu_pi() {
let features = wgpu::Features::default();
let mut limits = wgpu::Limits::default();
limits.max_color_attachments = 4;
limits.max_inter_stage_shader_variables = 15;
limits.max_texture_dimension_2d = 4096;
limits.max_texture_dimension_1d = 4096;
let backend = GpuBackend::WebGpu(WebGpu::new(features, limits).await.expect("expect a GPU"));
for i in 0..10 {
test_add(&backend, i).await;
}
}
}
I tried to collect some data because I thought I saw some patterns but it's nothing I can use.
const MAX_IDX: usize = 1000400;
run 1
result[i] should be equal to a[i] + b[i] within epsilon. i = 3904, a = 3904, b = 39040, expected = 42944, r = 81280, error = 38336 (loop_num:2)
run 2
result[i] should be equal to a[i] + b[i] within epsilon. i = 0, a = 0, b = 0, expected = 0, r = 1000384, error = 1000384 (loop_num:1)
run 3
result[i] should be equal to a[i] + b[i] within epsilon. i = 0, a = 0, b = 0, expected = 0, r = 1000384, error = 1000384 (loop_num:1)
run 4 (resuming after run 7 of MAX_IDX = 1000401)
result[i] should be equal to a[i] + b[i] within epsilon. i = 0, a = 0, b = 0, expected = 0, r = 1000384, error = 1000384 (loop_num:1)
run 5
result[i] should be equal to a[i] + b[i] within epsilon. i = 7942, a = 7942, b = 79420, expected = 87362, r = 166142, error = 78780 (loop_num:2)
const MAX_IDX: usize = 1000401;
run 1
result[i] should be equal to a[i] + b[i] within epsilon. i = 4803, a = 4803, b = 48030, expected = 52833, r = 100159, error = 47326 (loop_num:2)
run 2
result[i] should be equal to a[i] + b[i] within epsilon. i = 4099, a = 4099, b = 40990, expected = 45089, r = 85439, error = 40350 (loop_num:2)
run 3
result[i] should be equal to a[i] + b[i] within epsilon. i = 0, a = 0, b = 0, expected = 0, r = 1000384, error = 1000384 (loop_num:1)
run 4
result[i] should be equal to a[i] + b[i] within epsilon. i = 4547, a = 4547, b = 45470, expected = 50017, r = 94847, error = 44830 (loop_num:2)
run 5
result[i] should be equal to a[i] + b[i] within epsilon. i = 4419, a = 4419, b = 44190, expected = 48609, r = 92159, error = 43550 (loop_num:2)
run 6
result[i] should be equal to a[i] + b[i] within epsilon. i = 0, a = 0, b = 0, expected = 0, r = 1000384, error = 1000384 (loop_num:1)
run 7
result[i] should be equal to a[i] + b[i] within epsilon. i = 8070, a = 8070, b = 80700, expected = 88770, r = 168830, error = 80060 (loop_num:2)
const MAX_IDX: usize = 1000;
run 1
result[i] should be equal to a[i] + b[i] within epsilon. i = 768, a = 768, b = 7680, expected = 8448, r = 14784, error = 6336 (loop_num:1)
run 2
result[i] should be equal to a[i] + b[i] within epsilon. i = 944, a = 944, b = 9440, expected = 10384, r = 18480, error = 8096 (loop_num:1)
run 3
result[i] should be equal to a[i] + b[i] within epsilon. i = 64, a = 64, b = 640, expected = 704, r = 1344, error = 640 (loop_num:8)
run 4
result[i] should be equal to a[i] + b[i] within epsilon. i = 944, a = 944, b = 9440, expected = 10384, r = 19824, error = 9440 (loop_num:7)
This is my first time trying GPU programming, I'm not gonna learn everything in time to help with what could very possibly be a Raspberry Pi driver bug. I'll help however I can, but ultimately I don't expect this to be solved.
Adding the following to the example fails on a raspberry, and by seemingly random amounts every time, even on the same binary.
(weird
wgpu::Limitsbecause the Pi GPU is weird. Missing these is an instant panic)I tried to collect some data because I thought I saw some patterns but it's nothing I can use.
This is my first time trying GPU programming, I'm not gonna learn everything in time to help with what could very possibly be a Raspberry Pi driver bug. I'll help however I can, but ultimately I don't expect this to be solved.