Skip to content

Commit 17787e8

Browse files
makoeppelMarius Koppel
andauthored
add support for cloning of 7 outputs (#58)
Co-authored-by: Marius Koppel <makoppel@olhsw-07.cern.ch>
1 parent 69f483f commit 17787e8

File tree

6 files changed

+168
-8
lines changed

6 files changed

+168
-8
lines changed

hls4ml/backends/fpga/passes/clone.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ def transform(self, model, node):
7979
n_outputs = len(output_map[output]) + in_output
8080
if n_outputs == 1:
8181
continue
82-
if n_outputs > 6:
82+
if n_outputs > 7:
8383
msg = f'ERROR: Cloning output {output} of {node.class_name}\
84-
({node.name}) more than 6 times not currently supported'
84+
({node.name}) more than 7 times not currently supported'
8585
raise ValueError(msg)
8686

8787
out_var = node.get_output_variable(output)

hls4ml/templates/catapult/nnet_utils/nnet_stream.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,31 @@ void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<
130130
}
131131
}
132132

133+
template <class data_T, class res_T, int N>
134+
void clone_stream(ac_channel<data_T> &data, ac_channel<res_T> &res1, ac_channel<res_T> &res2, ac_channel<res_T> &res3,
135+
ac_channel<res_T> &res4, ac_channel<res_T> &res5, ac_channel<res_T> &res6, ac_channel<res_T> &res7) {
136+
#ifndef __SYNTHESIS__
137+
while (data.available(1))
138+
#endif
139+
{
140+
data_T in_data = data.read();
141+
res_T out_data;
142+
143+
ClonePack:
144+
for (int j = 0; j < data_T::size; j++) {
145+
out_data[j] = in_data[j];
146+
}
147+
148+
res1.write(out_data);
149+
res2.write(out_data);
150+
res3.write(out_data);
151+
res4.write(out_data);
152+
res5.write(out_data);
153+
res6.write(out_data);
154+
res7.write(out_data);
155+
}
156+
}
157+
133158
template <class data_T, class res_T, int N> void repack_stream(ac_channel<data_T> &data, ac_channel<res_T> &res) {
134159
if (data_T::size == res_T::size) {
135160
for (int i = 0; i < N / data_T::size; i++) {

hls4ml/templates/oneapi/firmware/nnet_utils/nnet_stream.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,51 @@ void clone_stream() {
170170
}
171171
}
172172

173+
template <class data_pipe, class res1_pipe, class res2_pipe, class res3_pipe, class res4_pipe, class res5_pipe,
174+
class res6_pipe, class res7_pipe, int N>
175+
void clone_stream() {
176+
using data_T = typename ExtractPipeType<data_pipe>::value_type;
177+
using res1_T = typename ExtractPipeType<res1_pipe>::value_type;
178+
using res2_T = typename ExtractPipeType<res2_pipe>::value_type;
179+
using res3_T = typename ExtractPipeType<res3_pipe>::value_type;
180+
using res4_T = typename ExtractPipeType<res4_pipe>::value_type;
181+
using res5_T = typename ExtractPipeType<res5_pipe>::value_type;
182+
using res6_T = typename ExtractPipeType<res6_pipe>::value_type;
183+
using res7_T = typename ExtractPipeType<res7_pipe>::value_type;
184+
constexpr auto datasize = std::tuple_size<data_T>{};
185+
CloneLoop:
186+
[[intel::initiation_interval(1)]] for (int i = 0; i < N / datasize; i++) {
187+
data_T in_data = data_pipe::read();
188+
res1_T out_data1;
189+
res2_T out_data2;
190+
res3_T out_data3;
191+
res4_T out_data4;
192+
res5_T out_data5;
193+
res6_T out_data6;
194+
res7_T out_data7;
195+
196+
ClonePack:
197+
#pragma unroll
198+
for (int j = 0; j < datasize; j++) {
199+
out_data1[j] = in_data[j];
200+
out_data2[j] = in_data[j];
201+
out_data3[j] = in_data[j];
202+
out_data4[j] = in_data[j];
203+
out_data5[j] = in_data[j];
204+
out_data6[j] = in_data[j];
205+
out_data7[j] = in_data[j];
206+
}
207+
208+
res1_pipe::write(out_data1);
209+
res2_pipe::write(out_data2);
210+
res3_pipe::write(out_data3);
211+
res4_pipe::write(out_data4);
212+
res5_pipe::write(out_data5);
213+
res6_pipe::write(out_data6);
214+
res6_pipe::write(out_data7);
215+
}
216+
}
217+
173218
template <class data_pipe, class res_pipe, int N> void repack_stream() {
174219
using data_T = typename ExtractPipeType<data_pipe>::value_type;
175220
using res_T = typename ExtractPipeType<res_pipe>::value_type;

hls4ml/templates/quartus/firmware/nnet_utils/nnet_stream.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,43 @@ void clone_stream(stream<data_T> &data, stream<res_T> &res1, stream<res_T> &res2
150150
}
151151
}
152152

153+
template <class data_T, class res_T, int N>
154+
void clone_stream(stream<data_T> &data, stream<res_T> &res1, stream<res_T> &res2, stream<res_T> &res3, stream<res_T> &res4,
155+
stream<res_T> &res5, stream<res_T> &res6, stream<res_T> &res7) {
156+
CloneLoop:
157+
#pragma ii 1
158+
for (int i = 0; i < N / data_T::size; i++) {
159+
data_T in_data = data.read();
160+
res_T out_data1;
161+
res_T out_data2;
162+
res_T out_data3;
163+
res_T out_data4;
164+
res_T out_data5;
165+
res_T out_data6;
166+
res_T out_data7;
167+
168+
ClonePack:
169+
#pragma unroll
170+
for (int j = 0; j < data_T::size; j++) {
171+
out_data1[j] = in_data[j];
172+
out_data2[j] = in_data[j];
173+
out_data3[j] = in_data[j];
174+
out_data4[j] = in_data[j];
175+
out_data5[j] = in_data[j];
176+
out_data6[j] = in_data[j];
177+
out_data7[j] = in_data[j];
178+
}
179+
180+
res1.write(out_data1);
181+
res2.write(out_data2);
182+
res3.write(out_data3);
183+
res4.write(out_data4);
184+
res5.write(out_data5);
185+
res6.write(out_data6);
186+
res7.write(out_data7);
187+
}
188+
}
189+
153190
template <class data_T, class res_T, int N> void repack_stream(stream<data_T> &data, stream<res_T> &res) {
154191
if (data_T::size == res_T::size) {
155192
#pragma ii 1

hls4ml/templates/vivado/nnet_utils/nnet_stream.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,51 @@ void clone_stream(hls::stream<data_T> &data, hls::stream<res_T> &res1, hls::stre
179179
}
180180
}
181181

182+
template <class data_T, class res_T, int N>
183+
void clone_stream(hls::stream<data_T> &data, hls::stream<res_T> &res1, hls::stream<res_T> &res2, hls::stream<res_T> &res3,
184+
hls::stream<res_T> &res4, hls::stream<res_T> &res5, hls::stream<res_T> &res6, hls::stream<res_T> &res7) {
185+
CloneLoop:
186+
for (int i = 0; i < N / data_T::size; i++) {
187+
#pragma HLS PIPELINE
188+
189+
data_T in_data = data.read();
190+
res_T out_data1;
191+
res_T out_data2;
192+
res_T out_data3;
193+
res_T out_data4;
194+
res_T out_data5;
195+
res_T out_data6;
196+
res_T out_data7;
197+
PRAGMA_DATA_PACK(out_data1)
198+
PRAGMA_DATA_PACK(out_data2)
199+
PRAGMA_DATA_PACK(out_data3)
200+
PRAGMA_DATA_PACK(out_data4)
201+
PRAGMA_DATA_PACK(out_data5)
202+
PRAGMA_DATA_PACK(out_data6)
203+
PRAGMA_DATA_PACK(out_data7)
204+
205+
ClonePack:
206+
for (int j = 0; j < data_T::size; j++) {
207+
#pragma HLS UNROLL
208+
out_data1[j] = in_data[j];
209+
out_data2[j] = in_data[j];
210+
out_data3[j] = in_data[j];
211+
out_data4[j] = in_data[j];
212+
out_data5[j] = in_data[j];
213+
out_data6[j] = in_data[j];
214+
out_data7[j] = in_data[j];
215+
}
216+
217+
res1.write(out_data1);
218+
res2.write(out_data2);
219+
res3.write(out_data3);
220+
res4.write(out_data4);
221+
res5.write(out_data5);
222+
res6.write(out_data6);
223+
res7.write(out_data7);
224+
}
225+
}
226+
182227
template <class data_T, class res_T, int N> void repack_stream(hls::stream<data_T> &data, hls::stream<res_T> &res) {
183228
if (data_T::size == res_T::size) {
184229
for (int i = 0; i < N / data_T::size; i++) {

test/pytest/test_stream_clone.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@ def model_clone_precision_inheritance():
2222

2323
@pytest.fixture(scope='module')
2424
def model_multi_clone():
25-
# inp will be cloned 6 times
26-
# a will be cloned 5 times
27-
# ab will be cloned 4 times
28-
# abac will be cloned 3 times
29-
# abacabad will be cloned 2 times
25+
# inp will be cloned 7 times
26+
# a will be cloned 6 times
27+
# ab will be cloned 5 times
28+
# abac will be cloned 4 times
29+
# abacabad will be cloned 3 times
30+
# abacabadabacabae will be cloned 2 times
3031

3132
inp = keras.Input(shape=(10,))
3233
a = Dense(10, name='dense_5_copies')(inp)
@@ -35,21 +36,28 @@ def model_multi_clone():
3536
d = Dense(10)(inp)
3637
e = Dense(10)(inp)
3738
f = Dense(10)(inp)
39+
g = Dense(10)(inp)
3840
ab = Add(name='add_4_copies')([a, b])
3941
ac = Add()([a, c])
4042
ad = Add()([a, d])
4143
ae = Add()([a, e])
4244
af = Add()([a, f])
45+
ag = Add()([a, g])
4346
abac = Add(name='add_3_copies')([ab, ac])
4447
abad = Add()([ab, ad])
4548
abae = Add()([ab, ae])
4649
abaf = Add()([ab, af])
50+
abag = Add()([ab, ag])
4751
abacabad = Add(name='add_2_copies')([abac, abad])
4852
abacabae = Add()([abac, abae])
4953
abacabaf = Add()([abac, abaf])
54+
abacabag = Add()([abac, abag])
5055
abacabadabacabae = Add()([abacabad, abacabae])
5156
abacabadabacabaf = Add()([abacabad, abacabaf])
52-
out = Add()([abacabadabacabae, abacabadabacabaf])
57+
abacabadabacabag = Add()([abacabad, abacabag])
58+
abacabadabacabaeabacabadabacabaf = Add()([abacabadabacabae, abacabadabacabaf])
59+
abacabadabacabaeabacabadabacabag = Add()([abacabadabacabae, abacabadabacabag])
60+
out = Add()([abacabadabacabaeabacabadabacabaf, abacabadabacabaeabacabadabacabag])
5361
model = keras.Model(inp, out)
5462
return model
5563

0 commit comments

Comments
 (0)