Skip to content

Commit 46f0f41

Browse files
Merge pull request #3 from spcl/python_codegen
Python codegen
2 parents c965562 + 62ffd54 commit 46f0f41

File tree

242 files changed

+19543
-6664
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

242 files changed

+19543
-6664
lines changed

README.md

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,21 @@ For more details, see our [paper](https://arxiv.org/abs/1907.07929).
1414

1515
The library depends on:
1616

17-
* Intel FPGA SDK for OpenCL pro, version 18+ ([http://fpgasoftware.intel.com/opencl/](http://fpgasoftware.intel.com/opencl/))
17+
* Intel FPGA SDK for OpenCL pro, version 19+ ([http://fpgasoftware.intel.com/opencl/](http://fpgasoftware.intel.com/opencl/))
1818
* GCC (version 5+)
1919
* Rapidjson ([http://rapidjson.org/](http://rapidjson.org/))
2020
* Google Test (only for unit tests)
21+
* Python 3.6+
2122

2223
### Installation
2324

2425
After cloning this repository, make sure you clone the [rapidjson](http://rapidjson.org/) submodule dependency, by executing the following command:
2526

2627
```
2728
git submodule update --init
29+
pip install -r codegen/requirements.txt
2830
```
2931

30-
After this, the included Makefile can be used to compile code and modules generator:
31-
32-
33-
```
34-
make all
35-
```
3632

3733
## The FBLAS library
3834

@@ -47,19 +43,25 @@ the device, she can invoke the desired FBLAS routines working on the FPGA memory
4743

4844
For further information on how to use the library, please refer to the [wiki](https://github.com/spcl/FBLAS/wiki).
4945

50-
5146
## Publication
5247
If you use FBLAS, please cite us:
5348
```
54-
@article{
55-
author={Tiziano De Matteis and Johannes de Fine Licht and Torsten Hoefler},
56-
title={{FBLAS: Streaming Linear Algebra on FPGA}},
57-
journal={CoRR},
58-
year={2019},
59-
month={Jul.},
60-
volume={abs/1907.07929},
49+
50+
@inproceedings{fblas,
51+
author = {De Matteis, Tiziano and de Fine Licht, Johannes and Hoefler, Torsten},
52+
title = {FBLAS: Streaming Linear Algebra on FPGA},
53+
year = {2020},
54+
isbn = {9781728199986},
55+
publisher = {IEEE Press},
56+
booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis},
57+
articleno = {59},
58+
numpages = {13},
59+
keywords = {high level synthesis, spatial architectures, hardware library},
60+
location = {Atlanta, Georgia},
61+
series = {SC '20}
6162
}
6263
```
64+
Concerning the Artifact Evaluation of the paper, you will find detailed information in subfolder `evaluation`.
6365

6466

6567
## Contact

codegen/.my_routine.cl

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/**
2+
FBLAS: BLAS implementation for Intel FPGA
3+
Copyright (c) 2020 ETH-Zurich. All rights reserved.
4+
See LICENSE for license information.
5+
6+
DOT performs the dot product of two vectors.
7+
8+
Streamed version: data is received from two input streams
9+
CHANNEL_VECTOR_X and CHANNEL_VECTOR_Y having the proper type.
10+
Data elements must be streamed with a padding equal to W
11+
(padding data must be set to zero).
12+
13+
Result is streamed in an output channel at the end of the computation
14+
in a channel CHANNEL_OUT
15+
16+
*/
17+
18+
#pragma OPENCL EXTENSION cl_intel_channels : enable
19+
20+
21+
#define W 64 //width: number of multiplications performed per clock cycle
22+
23+
//namings
24+
#define my_routine streaming_dot
25+
26+
//channels names
27+
#define CHANNEL_VECTOR_X channel_gen_x
28+
#define CHANNEL_VECTOR_Y channel_gen_y
29+
#define CHANNEL_OUT channel_sink
30+
31+
//architecture
32+
#define __STRATIX_10__
33+
34+
//FBLAS_PARAMETERS_END
35+
36+
#include <commons.h>
37+
38+
channel float CHANNEL_VECTOR_X __attribute__((depth(1)));
39+
channel float CHANNEL_VECTOR_Y __attribute__((depth(1)));
40+
channel float CHANNEL_OUT __attribute__((depth(1)));
41+
42+
43+
/**
44+
Performs streaming dot product: data is received through
45+
CHANNEL_VECTOR_X and CHANNEL_VECTOR_Y. Result is sent
46+
to CHANNEL_OUT.
47+
*/
48+
__kernel void my_routine(int N)
49+
{
50+
__constant uint WIDTH = 1
51+
float acc_o=0;
52+
if(N>0)
53+
{
54+
55+
const int outer_loop_limit=1+(int)((N-1)/WIDTH); //ceiling
56+
float x[WIDTH],y[WIDTH];
57+
58+
59+
//Strip mine the computation loop to exploit unrolling
60+
for(int i=0; i<outer_loop_limit; i++)
61+
{
62+
63+
float acc_i=0;
64+
#pragma unroll
65+
for(int j=0;j<WIDTH;j++)
66+
{
67+
x[j]=read_channel_intel(CHANNEL_VECTOR_X);
68+
y[j]=read_channel_intel(CHANNEL_VECTOR_Y);
69+
acc_i+=x[j]*y[j];
70+
71+
}
72+
73+
acc_o+=acc_i;
74+
75+
}
76+
77+
}
78+
else //no computation: result is zero
79+
acc_o=0.0f;
80+
//write to the sink
81+
write_channel_intel(CHANNEL_OUT,acc_o);
82+
}

codegen/codegen/__init__.py

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"helper": [{
3+
"name": "read_vector_x",
4+
"optional_parameters": ["width", "stride"]
5+
},
6+
{
7+
"name": "read_vector_y",
8+
"optional_parameters": ["width", "stride"]
9+
},
10+
{
11+
"name": "generate_dummy_vector",
12+
"optional_parameters": ["width"]
13+
},
14+
{
15+
"name": "write_vector",
16+
"optional_parameters": ["width", "stride"]
17+
},
18+
{
19+
"name": "vector_sink",
20+
"optional_parameters": ["width"]
21+
},
22+
{
23+
"name": "write_scalar"
24+
},
25+
{
26+
"name": "read_matrix",
27+
"required_parameters": ["tiles order", "elements order"],
28+
"optional_parameters": ["tile N size", "tile M size", "width"]
29+
},
30+
{
31+
"name": "write_matrix",
32+
"required_parameters": ["tiles order", "elements order"],
33+
"optional_parameters": ["tile N size", "tile M size", "width"]
34+
},
35+
{
36+
"name": "read_matrix_4_modules",
37+
"required_parameters": ["tiles order", "elements order"],
38+
"optional_parameters": ["tile N size", "tile M size", "width"]
39+
},
40+
{
41+
"name": "generate_dummy_matrix",
42+
"required_parameters": ["tiles order", "elements order"],
43+
"optional_parameters": ["tile N size", "tile M size", "width"]
44+
}
45+
]
46+
}

include/generator/routines_definitions_tier_2.json renamed to codegen/codegen/conf/routine_defs_host_api.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,17 @@
8888
{
8989
"name": "gemm",
9090
"required_parameters": ["order", "transa", "transb"],
91-
"optional_parameters": ["computational width x", "computational width y" , "tile size", "systolic"]
91+
"optional_parameters": ["width x", "width y" , "tile size", "systolic", "vect size"]
9292
},
9393
{
9494
"name": "syrk",
9595
"required_parameters": ["order", "trans", "uplo"],
96-
"optional_parameters": ["computational width x", "computational width y" , "tile size"]
96+
"optional_parameters": ["width x", "width y" , "tile size"]
9797
},
9898
{
9999
"name": "syr2k",
100100
"required_parameters": ["order", "trans", "uplo"],
101-
"optional_parameters": ["computational width x", "computational width y" , "tile size"]
101+
"optional_parameters": ["width x", "width y" , "tile size"]
102102
},
103103
{
104104
"name": "trsm",

include/generator/routines_definitions_tier_1.json renamed to codegen/codegen/conf/routine_defs_modules_codegen.json

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,28 @@
22
"routine": [{
33
"name": "dot",
44
"optional_parameters": ["width"],
5-
"required_inputs": ["x", "y"],
6-
"required_outputs": ["res"]
5+
"required_inputs": ["in_x", "in_y"],
6+
"required_outputs": ["out_res"]
77
},
88
{
99
"name": "axpy",
1010
"optional_parameters": ["width"],
11-
"required_inputs": ["x", "y"],
12-
"required_outputs": ["res"]
11+
"required_inputs": ["in_x", "in_y"],
12+
"required_outputs": ["out_res"]
1313
},
1414
{
1515
"name": "gemv",
1616
"required_parameters": ["trans"],
1717
"optional_parameters": ["width", "tile N size", "tile M size", "A tiles order", "A elements order"],
18-
"required_inputs": ["x", "y","A"],
19-
"required_outputs": ["res"]
18+
"required_inputs": ["in_x", "in_y","in_A"],
19+
"required_outputs": ["out_res"],
20+
"optional_outputs": ["out_y_updates"]
2021
},
2122
{
2223
"name": "ger",
2324
"optional_parameters": ["width", "tile N size", "tile M size", "A tiles order", "A elements order"],
24-
"required_inputs": ["x", "y","A"],
25-
"required_outputs": ["res"]
25+
"required_inputs": ["in_x", "in_y","in_A"],
26+
"required_outputs": ["out_res"]
2627
}
2728
]
2829
}

0 commit comments

Comments
 (0)