@@ -17,7 +17,7 @@ let cpuMxM opAdd opMult zero (m1 : array<array<_>>) (m2: array<array<_>>) =
1717 for j in 0 .. m1.Length - 1 do
1818 for k in 0 .. m1.Length - 1 do
1919 res.[ i* m1.Length + j] <- opAdd res.[ i * m1.Length + j] ( opMult m1.[ i].[ k] m2.[ k].[ j])
20- res
20+ res, 0.0
2121
2222let cpuParallelMxM opAdd opMult zero ( m1 : array < array < _ >>) ( m2 : array < array < _ >>) =
2323 let res = Array.init ( m1.Length * m1.Length) ( fun _ -> zero)
@@ -27,16 +27,17 @@ let cpuParallelMxM opAdd opMult zero (m1 : array<array<_>>) (m2: array<array<_>>
2727 for k in 0 .. m1.Length - 1 do
2828 res.[ i* m1.Length + j] <- opAdd res.[ i * m1.Length + j] ( opMult row.[ k] m2.[ k].[ j])
2929 )
30- res
30+ res, 0.0
3131
3232let check opAdd opMult zero ( m1 : array < array < _ >>) ( m2 : array < array < _ >>) ( m3 : array < _ >) =
33- let res = cpuMxM opAdd opMult zero ( m1 : array< array<_>>) ( m2: array< array<_>>)
33+ let res , _ = cpuMxM opAdd opMult zero ( m1 : array< array<_>>) ( m2: array< array<_>>)
3434 Array.iteri2 ( fun i r1 r2 -> if r1 <> r2 then printfn $" Expected {r1}, got {r2}" ) res m3
3535
3636
3737let getRandomIntMatrix n = getRandomMatrix n ( fun i -> rand.Next(- 10 , 10 ))
3838let getRandomByteMatrix n = getRandomMatrix n ( fun i -> rand.Next() |> byte)
3939let getRandomFloat32Matrix n = getRandomMatrix n ( fun i -> rand.NextSingle())
40+ let getRandomFloat64Matrix n = getRandomMatrix n ( fun i -> rand.NextDouble())
4041let getRandomOptionIntMatrix n = getRandomMatrix n ( fun i -> let x = rand.Next(- 10 , 10 ) in if x % 3 = 0 then Some x else None)
4142
4243let multiplyKernel4 ( clContext : ClContext ) ( localWorkSize : uint ) ( threadTileSize : uint ) opAdd opMult zero =
@@ -270,7 +271,7 @@ let multiplyKernel0 (clContext: ClContext) (localWorkSize: uint) opAdd opMult ze
270271 commandQueue.Post( Msg.CreateRunMsg<_, _> kernel)
271272 m3
272273
273- let applyMultiplyGPU < 'a , 'b , 'e , 'f > ( kernel : Kernels ) ( clContext : ClContext ) localWorkSize workPerThread ( opAdd : Quotations.Expr < 'a -> 'b -> 'a >) ( opMult : Quotations.Expr < 'e -> 'f -> 'b >) ( zero : Quotations.Expr < 'a >) =
274+ let applyMultiplyGPU < 'a , 'b , 'e , 'f > ( kernel : Kernels ) ( clContext : ClContext ) ( numToRun : uint ) localWorkSize workPerThread ( opAdd : Quotations.Expr < 'a -> 'b -> 'a >) ( opMult : Quotations.Expr < 'e -> 'f -> 'b >) ( zero : Quotations.Expr < 'a >) =
274275 let kernel =
275276 match kernel with
276277 | Kernels.K0 -> multiplyKernel0 clContext localWorkSize opAdd opMult zero
@@ -280,34 +281,38 @@ let applyMultiplyGPU<'a,'b,'e,'f> (kernel:Kernels) (clContext: ClContext) localW
280281 | Kernels.K4 -> multiplyKernel4 clContext localWorkSize workPerThread opAdd opMult zero
281282 | x -> failwithf $" Unexpected kernel {x}."
282283 let queue = clContext.QueueProvider.CreateQueue()
284+ //queue.Error.Add(fun x -> printfn "%A" x)
285+ let numToRun = int numToRun
283286
284287 fun ( m1 : 'e [][]) ( m2 : 'f [][]) ->
285-
286- let m1_gpu =
287- clContext.CreateClArray<_>( Array.concat m1, HostAccessMode.NotAccessible)
288-
289- let m2_gpu =
290- clContext.CreateClArray<_>( Array.concat m2, HostAccessMode.NotAccessible)
291-
292- let m3_gpu =
293- clContext.CreateClArray(
294- m1.Length * m1.Length,
295- HostAccessMode.NotAccessible,
296- deviceAccessMode= DeviceAccessMode.WriteOnly,
297- allocationMode = AllocationMode.Default
298- )
299-
300- let x = kernel queue m1_ gpu m2_ gpu m3_ gpu m1.Length
301-
288+ let start = System.DateTime.Now
302289 let result : 'a [] = Array.zeroCreate( m1.Length * m1.Length)
290+ for i in 0 .. numToRun - 1 do
291+ let m1_gpu =
292+ clContext.CreateClArray<_>( Array.concat m1, HostAccessMode.NotAccessible)
293+
294+ let m2_gpu =
295+ clContext.CreateClArray<_>( Array.concat m2, HostAccessMode.NotAccessible)
296+
297+ let m3_gpu =
298+ clContext.CreateClArray(
299+ m1.Length * m1.Length,
300+ HostAccessMode.NotAccessible,
301+ deviceAccessMode= DeviceAccessMode.WriteOnly,
302+ allocationMode = AllocationMode.Default
303+ )
304+
305+ let x = kernel queue m1_ gpu m2_ gpu m3_ gpu m1.Length
306+
307+ let result = queue.PostAndReply( fun ch -> Msg.CreateToHostMsg( m3_ gpu, result, ch))
308+
309+ queue.Post( Msg.CreateFreeMsg m1_ gpu)
310+
311+ queue.Post( Msg.CreateFreeMsg m2_ gpu)
312+
313+ queue.Post( Msg.CreateFreeMsg m3_ gpu)
314+
315+ let totalTime = ( System.DateTime.Now - start) .TotalMilliseconds
303316
304- let result = queue.PostAndReply( fun ch -> Msg.CreateToHostMsg( m3_ gpu, result, ch))
305-
306- queue.Post( Msg.CreateFreeMsg m1_ gpu)
307-
308- queue.Post( Msg.CreateFreeMsg m2_ gpu)
309-
310- queue.Post( Msg.CreateFreeMsg m3_ gpu)
311-
312- result
317+ result, ( totalTime / ( float numToRun))
313318
0 commit comments