Skip to content

Commit 9c0f7de

Browse files
committed
Naive version of streaming.
1 parent 943f9d0 commit 9c0f7de

File tree

3 files changed

+150
-24
lines changed

3 files changed

+150
-24
lines changed

src/ImageProcessing/ImageProcessing.fsproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
<None Include="App.config" />
2020
<Compile Include="ImageProcessing.fs" />
2121
<Compile Include="Streaming.fs" />
22+
<Compile Include="Matrices.fs" />
2223
<Compile Include="Main.fs" />
2324
</ItemGroup>
2425
<ItemGroup>

src/ImageProcessing/Main.fs

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,20 @@ type Platforms = CPU = 1 | NVidia = 2 | IntelGPU = 3 | AnyGPU = 4
99
[<CliPrefix(CliPrefix.DoubleDash)>]
1010
[<NoAppSettings>]
1111
type ImageProcessingArguments =
12-
| [<Mandatory>] Input of string
12+
| Input of string
1313
| Output of string
1414
| Platform of Platforms
15+
| WorkGroupSize of uint
16+
| MatrixSize of uint
1517
with
1618
interface IArgParserTemplate with
1719
member arg.Usage =
1820
match arg with
1921
| Input _ -> "Image to process."
2022
| Output _ -> "File to store result."
2123
| Platform _ -> "Where to run."
24+
| WorkGroupSize _ -> "Work group size."
25+
| MatrixSize _ -> "Number of columns (or rows). We use square matrices."
2226
module Main =
2327
//let pathToExamples = "/home/gsv/Projects/TestProj2020/src/ImgProcessing/Examples"
2428
//let inputFolder = System.IO.Path.Combine(pathToExamples, "input")
@@ -32,27 +36,20 @@ module Main =
3236
let main (argv: string array) =
3337
let parser = ArgumentParser.Create<ImageProcessingArguments>(programName = "ImageProcessing")
3438
let results = parser.ParseCommandLine argv
35-
let inputFile = results.GetResult(Input, defaultValue = "")
36-
let outputFile = results.GetResult(Output, defaultValue = "out.jpg")
39+
let input = results.GetResult(Input, defaultValue = "")
40+
let output = results.GetResult(Output, defaultValue = "out.jpg")
3741
let platform = results.GetResult(Platform, defaultValue = Platforms.CPU)
38-
42+
let workGroupSize = results.GetResult(WorkGroupSize, defaultValue = 64u)
43+
let matrixSize = results.GetResult(MatrixSize, defaultValue = 512u)
44+
3945
let filters = [
4046
ImageProcessing.gaussianBlurKernel
4147
ImageProcessing.gaussianBlurKernel
4248
ImageProcessing.edgesKernel
4349
]
4450

4551

46-
match platform with
47-
| Platforms.CPU ->
48-
let mutable image = ImageProcessing.loadAs2DArray inputFile
49-
printfn $"Device: CPU"
50-
let start = System.DateTime.Now
51-
for filter in filters do
52-
image <- ImageProcessing.applyFilter filter image
53-
printfn $"CPU processing time: {(System.DateTime.Now - start).TotalMilliseconds} ms"
54-
ImageProcessing.save2DByteArrayAsImage image outputFile
55-
| _ ->
52+
let applyFiltersOnGPU =
5653
let device =
5754
match platform with
5855
| Platforms.AnyGPU -> ClDevice.GetFirstAppropriateDevice()
@@ -66,30 +63,39 @@ module Main =
6663
printfn $"Device: %A{device.Name}"
6764

6865
let context = ClContext(device)
69-
let applyFiltersOnGPU = ImageProcessing.applyFiltersGPU context 64
70-
71-
66+
ImageProcessing.applyFiltersGPU context 64
67+
(*
68+
match platform with
69+
| Platforms.CPU ->
70+
let mutable image = ImageProcessing.loadAs2DArray input
71+
printfn $"Device: CPU"
7272
let start = System.DateTime.Now
73-
let grayscaleImage = ImageProcessing.loadAsImage inputFile
73+
for filter in filters do
74+
image <- ImageProcessing.applyFilter filter image
75+
printfn $"CPU processing time: {(System.DateTime.Now - start).TotalMilliseconds} ms"
76+
ImageProcessing.save2DByteArrayAsImage image output
77+
| _ ->
78+
let start = System.DateTime.Now
79+
let grayscaleImage = ImageProcessing.loadAsImage input
7480
printfn $"Image reading time: {(System.DateTime.Now - start).TotalMilliseconds} ms"
7581
7682
let start = System.DateTime.Now
7783
let result = applyFiltersOnGPU filters grayscaleImage
7884
printfn $"GPU processing time: {(System.DateTime.Now - start).TotalMilliseconds} ms"
79-
ImageProcessing.saveImage result outputFile
85+
printfn $"R: %A{result}"
86+
ImageProcessing.saveImage result output
87+
*)
8088

81-
(*
8289
let start = System.DateTime.Now
8390

84-
Streaming.processAllFiles inputFolder outputFolder [
85-
//applyFiltersOnNvGPU filters
86-
applyFiltersOnIntelGPU filters
91+
Streaming.processAllFiles input output [
92+
applyFiltersOnGPU filters
8793
]
8894

8995
printfn
9096
$"TotalTime = %f{(System.DateTime.Now
9197
- start)
9298
.TotalMilliseconds}"
93-
*)
99+
94100

95101
0

src/ImageProcessing/Matrices.fs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
module ImageProcessing.Matrices
2+
3+
open Brahma.FSharp
4+
5+
let rand = new System.Random()
6+
7+
let getRandomMatrix n init =
8+
9+
[|
10+
for i in 0 .. n - 1 -> Array.init n init
11+
|]
12+
13+
let getRandomIntMatrix n= getRandomMatrix n (fun i -> rand.Next())
14+
let getRandomFloatMatrix n= getRandomMatrix n (fun i -> rand.NextDouble())
15+
16+
let multiplyKernel2 (clContext: ClContext) localWorkSize opAdd opMult zero =
17+
let kernel =
18+
<@
19+
fun (r: Range2D) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
20+
let row = r.LocalID0
21+
let col = r.LocalID1
22+
let globalRow = localWorkSize * r.GlobalID0 + row
23+
let globalCol = localWorkSize * r.GlobalID1 + col
24+
25+
let m1Submatrix = localArray (localWorkSize * localWorkSize)
26+
let m2Submatrix = localArray (localWorkSize * localWorkSize)
27+
let mutable res = zero
28+
29+
30+
for t in 0 .. (n / localWorkSize) - 1 do
31+
let tiledRow = localWorkSize*t + row
32+
let tiledCol = localWorkSize*t + col
33+
m1Submatrix[row * localWorkSize + col] <- m1[tiledCol*n + globalRow]
34+
m2Submatrix[row * localWorkSize + col] <- m2[globalCol*n + tiledRow]
35+
36+
barrierLocal()
37+
38+
for k in 0 .. localWorkSize - 1 do
39+
res <- (%opAdd) res ((%opMult) m1Submatrix.[row * localWorkSize + k] m2Submatrix.[localWorkSize * k + col])
40+
barrierLocal()
41+
42+
m3.[globalRow * n + globalCol] <- res
43+
@>
44+
45+
let kernel = clContext.Compile kernel
46+
47+
fun (commandQueue: MailboxProcessor<_>) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
48+
49+
let ndRange =
50+
Range2D(
51+
n,
52+
n,
53+
localWorkSize,
54+
localWorkSize
55+
)
56+
57+
let kernel = kernel.GetKernel()
58+
commandQueue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange m1 m2 m3 n))
59+
commandQueue.Post(Msg.CreateRunMsg<_, _> kernel)
60+
m3
61+
62+
let multiplyKernel1 (clContext: ClContext) localWorkSize opAdd opMult zero =
63+
let kernel =
64+
<@
65+
fun (r: Range2D) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
66+
let i = r.GlobalID0
67+
let j = r.GlobalID1
68+
69+
let mutable res = zero
70+
for k in 0 .. n - 1 do
71+
res <- (%opAdd) res ((%opMult) m1.[i * n + k] m2.[n * k + j])
72+
m3.[i * n + j] <- res
73+
@>
74+
75+
let kernel = clContext.Compile kernel
76+
77+
fun (commandQueue: MailboxProcessor<_>) (m1: ClArray<_>) (m2: ClArray<_>) (m3: ClArray<_>) n ->
78+
79+
let ndRange =
80+
Range2D(
81+
n,
82+
n,
83+
localWorkSize,
84+
localWorkSize
85+
)
86+
87+
let kernel = kernel.GetKernel()
88+
commandQueue.Post(Msg.MsgSetArguments(fun () -> kernel.KernelFunc ndRange m1 m2 m3 n))
89+
commandQueue.Post(Msg.CreateRunMsg<_, _> kernel)
90+
m3
91+
92+
let applyMultiplyGPU<'a,'b,'e,'f> (clContext: ClContext) localWorkSize (opAdd:Quotations.Expr<'a -> 'b -> 'a>) (opMult:Quotations.Expr<'e -> 'f -> 'b>) (zero:'a) =
93+
let kernel = multiplyKernel1 clContext localWorkSize opAdd opMult zero
94+
let queue = clContext.QueueProvider.CreateQueue()
95+
96+
fun (m1: 'e[][]) (m2: 'f[][]) ->
97+
98+
let m1_gpu =
99+
clContext.CreateClArray<_>(Array.concat m1, HostAccessMode.NotAccessible)
100+
101+
let m2_gpu =
102+
clContext.CreateClArray<_>(Array.concat m2, HostAccessMode.NotAccessible)
103+
104+
105+
let m3_gpu =
106+
clContext.CreateClArray(
107+
m1.Length * m1.Length,
108+
HostAccessMode.NotAccessible,
109+
allocationMode = AllocationMode.Default
110+
)
111+
let _ = kernel queue m1_gpu m2_gpu m3_gpu m1.Length
112+
let result : 'a[] =
113+
Array.zeroCreate(m1.Length * m1.Length)
114+
115+
let result = queue.PostAndReply(fun ch -> Msg.CreateToHostMsg(m3_gpu, result, ch))
116+
queue.Post(Msg.CreateFreeMsg m1_gpu)
117+
queue.Post(Msg.CreateFreeMsg m2_gpu)
118+
queue.Post(Msg.CreateFreeMsg m3_gpu)
119+
result

0 commit comments

Comments
 (0)