1+ module ImageProcessing.Matrices
2+
3+ open Brahma.FSharp
4+
5+ let rand = new System.Random()
6+
7+ let getRandomMatrix ( n : uint ) init =
8+
9+ [|
10+ for i in 0 .. int n - 1 -> Array.init ( int n) init
11+ |]
12+
13+ let check opAdd opMult zero ( m1 : array < array < _ >>) ( m2 : array < array < _ >>) ( m3 : array < _ >) =
14+ let res = Array.init ( m1.Length * m1.Length) ( fun _ -> zero)
15+ for i in 0 .. m1.Length - 1 do
16+ for j in 0 .. m1.Length - 1 do
17+ for k in 0 .. m1.Length - 1 do
18+ res.[ i* m1.Length + j] <- opAdd res.[ i * m1.Length + j] ( opMult m1.[ i].[ k] m2.[ k].[ j])
19+
20+ Array.iteri2 ( fun i r1 r2 -> if r1 <> r2 then printfn $" Expected {r1}, got {r2}" ) res m3
21+
22+
23+ let getRandomIntMatrix n = getRandomMatrix n ( fun i -> rand.Next(- 10 , 10 ))
24+ let getRandomFloatMatrix n = getRandomMatrix n ( fun i -> rand.NextDouble())
25+
26+ let multiplyKernel2 ( clContext : ClContext ) ( localWorkSize : uint ) opAdd opMult zero =
27+ let localWorkSize = int localWorkSize
28+ let size = FSharp.Quotations.Evaluator.QuotationEvaluator.Evaluate <@ localWorkSize * localWorkSize@>
29+ let kernel =
30+ <@
31+ fun ( r : Range2D ) ( m1 : ClArray < _ >) ( m2 : ClArray < _ >) ( m3 : ClArray < _ >) n ->
32+ let localRow = r.LocalID0
33+ let localCol = r.LocalID1
34+ let globalRow = r.GlobalID0
35+ let globalCol = r.GlobalID1
36+
37+ let m1Submatrix = localArray size
38+ let m2Submatrix = localArray size
39+ let mutable res = zero
40+
41+ for t in 0 .. ( n / localWorkSize) - 1 do
42+ let tiledRow = localWorkSize * t + localRow
43+ let tiledCol = localWorkSize * t + localCol
44+ m1Submatrix[ localRow * localWorkSize + localCol] <- m1[ globalRow * n + tiledCol]
45+ m2Submatrix[ localRow * localWorkSize + localCol] <- m2[ tiledRow * n + globalCol]
46+
47+ barrierLocal()
48+
49+ for k in 0 .. localWorkSize - 1 do
50+ res <- (% opAdd) res ((% opMult) m1Submatrix.[ localRow * localWorkSize + k] m2Submatrix.[ localWorkSize * k + localCol])
51+ barrierLocal()
52+
53+ m3.[ globalRow * n + globalCol] <- res
54+ @>
55+
56+ let kernel = clContext.Compile kernel
57+
58+ fun ( commandQueue : MailboxProcessor < _ >) ( m1 : ClArray < _ >) ( m2 : ClArray < _ >) ( m3 : ClArray < _ >) n ->
59+
60+ let ndRange =
61+ Range2D(
62+ n,
63+ n,
64+ localWorkSize,
65+ localWorkSize
66+ )
67+
68+ let kernel = kernel.GetKernel()
69+ commandQueue.Post( Msg.MsgSetArguments( fun () -> kernel.KernelFunc ndRange m1 m2 m3 n))
70+ commandQueue.Post( Msg.CreateRunMsg<_, _> kernel)
71+ m3
72+
73+ let multiplyKernel1 ( clContext : ClContext ) ( localWorkSize : uint ) opAdd opMult zero =
74+ let kernel =
75+ <@
76+ fun ( r : Range2D ) ( m1 : ClArray < _ >) ( m2 : ClArray < _ >) ( m3 : ClArray < _ >) n ->
77+ let i = r.GlobalID0
78+ let j = r.GlobalID1
79+
80+ let mutable res = zero
81+ for k in 0 .. n - 1 do
82+ res <- (% opAdd) res ((% opMult) m1.[ i * n + k] m2.[ n * k + j])
83+ m3.[ i * n + j] <- res
84+ @>
85+
86+ let kernel = clContext.Compile kernel
87+ let localWorkSize = int localWorkSize
88+ fun ( commandQueue : MailboxProcessor < _ >) ( m1 : ClArray < _ >) ( m2 : ClArray < _ >) ( m3 : ClArray < _ >) n ->
89+
90+ let ndRange =
91+ Range2D(
92+ n,
93+ n,
94+ localWorkSize,
95+ localWorkSize
96+ )
97+
98+ let kernel = kernel.GetKernel()
99+ commandQueue.Post( Msg.MsgSetArguments( fun () -> kernel.KernelFunc ndRange m1 m2 m3 n))
100+ commandQueue.Post( Msg.CreateRunMsg<_, _> kernel)
101+ m3
102+
103+ let applyMultiplyGPU < 'a , 'b , 'e , 'f > ( clContext : ClContext ) localWorkSize ( opAdd : Quotations.Expr < 'a -> 'b -> 'a >) ( opMult : Quotations.Expr < 'e -> 'f -> 'b >) ( zero : 'a ) =
104+ //let kernel = multiplyKernel1 clContext localWorkSize opAdd opMult zero
105+ let kernel = multiplyKernel2 clContext localWorkSize opAdd opMult zero
106+ let queue = clContext.QueueProvider.CreateQueue()
107+
108+ fun ( m1 : 'e [][]) ( m2 : 'f [][]) ->
109+ printfn " !!!1!!!"
110+
111+ let m1_gpu =
112+ clContext.CreateClArray<_>( Array.concat m1, HostAccessMode.NotAccessible)
113+ printfn " !!!2!!!"
114+ let m2_gpu =
115+ clContext.CreateClArray<_>( Array.concat m2, HostAccessMode.NotAccessible)
116+ printfn " !!!3!!!"
117+
118+ let m3_gpu =
119+ clContext.CreateClArray(
120+ m1.Length * m1.Length,
121+ HostAccessMode.NotAccessible,
122+ allocationMode = AllocationMode.Default
123+ )
124+ printfn " !!!4!!!"
125+ let x = kernel queue m1_ gpu m2_ gpu m3_ gpu m1.Length
126+ printfn " !!!5!!!"
127+ let result : 'a [] = Array.zeroCreate( m1.Length * m1.Length)
128+ printfn " !!!6!!!"
129+ let result = queue.PostAndReply( fun ch -> Msg.CreateToHostMsg( m3_ gpu, result, ch))
130+ printfn " !!!7!!!"
131+ queue.Post( Msg.CreateFreeMsg m1_ gpu)
132+ printfn " !!!8!!!"
133+ queue.Post( Msg.CreateFreeMsg m2_ gpu)
134+ printfn " !!!9!!!"
135+ queue.Post( Msg.CreateFreeMsg m3_ gpu)
136+ printfn " !!!10!!!"
137+ result
0 commit comments