Skip to content

Commit 7761ebd

Browse files
committed
Fix loading of tiles
1 parent 0413591 commit 7761ebd

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
module ImageProcessing.Matrices
2+
3+
open Brahma.FSharp
4+
5+
let rand = new System.Random()
6+
7+
let getRandomMatrix (n: uint) init =
8+
9+
[|
10+
for i in 0 .. int n - 1 -> Array.init (int n) init
11+
|]
12+
13+
let check opAdd opMult zero (m1 : array<array<_>>) (m2: array<array<_>>) (m3:array<_>) =
14+
let res = Array.init (m1.Length * m1.Length) (fun _ -> zero)
15+
for i in 0..m1.Length - 1 do
16+
for j in 0..m1.Length - 1 do
17+
for k in 0..m1.Length - 1 do
18+
res.[i*m1.Length + j] <- opAdd res.[i * m1.Length + j] (opMult m1.[i].[k] m2.[k].[j])
19+
20+
Array.iteri2 (fun i r1 r2 -> if r1 <> r2 then printfn $"Expected {r1}, got {r2}") res m3
21+
22+
23+
let getRandomIntMatrix n= getRandomMatrix n (fun i -> rand.Next(-10,10))
24+
let getRandomFloatMatrix n= getRandomMatrix n (fun i -> rand.NextDouble())
25+
26+
let multiplyKernel2 (clContext: ClContext) (localWorkSize:uint) opAdd opMult zero =
27+
let localWorkSize = int localWorkSize
28+
let size = FSharp.Quotations.Evaluator.QuotationEvaluator.Evaluate <@localWorkSize * localWorkSize@>
29+
let kernel =
30+
<@
31+
fun (r: Range2D) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
32+
let localRow = r.LocalID0
33+
let localCol = r.LocalID1
34+
let globalRow = r.GlobalID0
35+
let globalCol = r.GlobalID1
36+
37+
let m1Submatrix = localArray size
38+
let m2Submatrix = localArray size
39+
let mutable res = zero
40+
41+
for t in 0 .. (n / localWorkSize) - 1 do
42+
let tiledRow = localWorkSize * t + localRow
43+
let tiledCol = localWorkSize * t + localCol
44+
m1Submatrix[localRow * localWorkSize + localCol] <- m1[globalRow * n + tiledCol]
45+
m2Submatrix[localRow * localWorkSize + localCol] <- m2[tiledRow * n + globalCol]
46+
47+
barrierLocal()
48+
49+
for k in 0 .. localWorkSize - 1 do
50+
res <- (%opAdd) res ((%opMult) m1Submatrix.[localRow * localWorkSize + k] m2Submatrix.[localWorkSize * k + localCol])
51+
barrierLocal()
52+
53+
m3.[globalRow * n + globalCol] <- res
54+
@>
55+
56+
let kernel = clContext.Compile kernel
57+
58+
fun (commandQueue: MailboxProcessor<_>) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
59+
60+
let ndRange =
61+
Range2D(
62+
n,
63+
n,
64+
localWorkSize,
65+
localWorkSize
66+
)
67+
68+
let kernel = kernel.GetKernel()
69+
commandQueue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange m1 m2 m3 n))
70+
commandQueue.Post(Msg.CreateRunMsg<_, _> kernel)
71+
m3
72+
73+
let multiplyKernel1 (clContext: ClContext) (localWorkSize: uint) opAdd opMult zero =
74+
let kernel =
75+
<@
76+
fun (r: Range2D) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
77+
let i = r.GlobalID0
78+
let j = r.GlobalID1
79+
80+
let mutable res = zero
81+
for k in 0 .. n - 1 do
82+
res <- (%opAdd) res ((%opMult) m1.[i * n + k] m2.[n * k + j])
83+
m3.[i * n + j] <- res
84+
@>
85+
86+
let kernel = clContext.Compile kernel
87+
let localWorkSize = int localWorkSize
88+
fun (commandQueue: MailboxProcessor<_>) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
89+
90+
let ndRange =
91+
Range2D(
92+
n,
93+
n,
94+
localWorkSize,
95+
localWorkSize
96+
)
97+
98+
let kernel = kernel.GetKernel()
99+
commandQueue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange m1 m2 m3 n))
100+
commandQueue.Post(Msg.CreateRunMsg<_, _> kernel)
101+
m3
102+
103+
let applyMultiplyGPU<'a,'b,'e,'f> (clContext: ClContext) localWorkSize (opAdd:Quotations.Expr<'a -> 'b -> 'a>) (opMult:Quotations.Expr<'e -> 'f -> 'b>) (zero:'a) =
104+
//let kernel = multiplyKernel1 clContext localWorkSize opAdd opMult zero
105+
let kernel = multiplyKernel2 clContext localWorkSize opAdd opMult zero
106+
let queue = clContext.QueueProvider.CreateQueue()
107+
108+
fun (m1: 'e[][]) (m2: 'f[][]) ->
109+
printfn "!!!1!!!"
110+
111+
let m1_gpu =
112+
clContext.CreateClArray<_>(Array.concat m1, HostAccessMode.NotAccessible)
113+
printfn "!!!2!!!"
114+
let m2_gpu =
115+
clContext.CreateClArray<_>(Array.concat m2, HostAccessMode.NotAccessible)
116+
printfn "!!!3!!!"
117+
118+
let m3_gpu =
119+
clContext.CreateClArray(
120+
m1.Length * m1.Length,
121+
HostAccessMode.NotAccessible,
122+
allocationMode = AllocationMode.Default
123+
)
124+
printfn "!!!4!!!"
125+
let x = kernel queue m1_gpu m2_gpu m3_gpu m1.Length
126+
printfn "!!!5!!!"
127+
let result : 'a[] = Array.zeroCreate(m1.Length * m1.Length)
128+
printfn "!!!6!!!"
129+
let result = queue.PostAndReply(fun ch -> Msg.CreateToHostMsg(m3_gpu, result, ch))
130+
printfn "!!!7!!!"
131+
queue.Post(Msg.CreateFreeMsg m1_gpu)
132+
printfn "!!!8!!!"
133+
queue.Post(Msg.CreateFreeMsg m2_gpu)
134+
printfn "!!!9!!!"
135+
queue.Post(Msg.CreateFreeMsg m3_gpu)
136+
printfn "!!!10!!!"
137+
result

0 commit comments

Comments
 (0)