Skip to content

Commit 486df0e

Browse files
committed
[fusilli] Implemented driver for LayerNorm fwd
Signed-off-by: Alexandra Sidorova <[email protected]>
1 parent 2f3b912 commit 486df0e

File tree

3 files changed

+426
-176
lines changed

3 files changed

+426
-176
lines changed

benchmarks/CMakeLists.txt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,60 @@ add_fusilli_benchmark(
123123
--device 0 --iter 10 conv -F 2 --bf16 -n 64 -c 3 -H 32 -W 32 -k 32 -y 3 -x 3 -u 1 -v 1 -p 0 -q 0 -l 1 -j 1 --in_layout "NCHW" --fil_layout "NCHW" --out_layout "NCHW" --spatial_dim 2
124124
)
125125

126+
127+
# Layer normalization benchmarks
128+
# Inference forward (F=1) benchmarks
129+
add_fusilli_benchmark(
130+
NAME fusilli_benchmark_layernorm_infer_nchw_fp32
131+
DRIVER fusilli_benchmark_driver
132+
ARGS
133+
--device 0 --iter 10 layernorm --input 16x128x64x32 -F 1 --type f32 --layout NCHW
134+
)
135+
136+
add_fusilli_benchmark(
137+
NAME fusilli_benchmark_layernorm_infer_nhwc_fp16
138+
DRIVER fusilli_benchmark_driver
139+
ARGS
140+
--device 0 --iter 10 layernorm --input 16x128x64x32 -F 1 --type f16 --layout NHWC
141+
)
142+
143+
add_fusilli_benchmark(
144+
NAME fusilli_benchmark_layernorm_infer_nhwc_bf16_scale_bias
145+
DRIVER fusilli_benchmark_driver
146+
ARGS
147+
--device 0 --iter 10 layernorm --input 16x128x64x32 -F 1 --type bf16 --layout NHWC --mode 1
148+
)
149+
150+
# Training forward (F=2) benchmarks
151+
add_fusilli_benchmark(
152+
NAME fusilli_benchmark_layernorm_train_nchw_bf16
153+
DRIVER fusilli_benchmark_driver
154+
ARGS
155+
--device 0 --iter 10 layernorm --input 16x128x64x32 -F 2 --type bf16 --layout NCHW
156+
)
157+
158+
add_fusilli_benchmark(
159+
NAME fusilli_benchmark_layernorm_train_nhwc_fp16_scale_bias
160+
DRIVER fusilli_benchmark_driver
161+
ARGS
162+
--device 0 --iter 10 layernorm --input 16x128x64x32 -F 2 --type f16 --layout NHWC --mode 1
163+
)
164+
165+
# 3D input benchmarks (NCH format)
166+
add_fusilli_benchmark(
167+
NAME fusilli_benchmark_layernorm_infer_3d_fp32
168+
DRIVER fusilli_benchmark_driver
169+
ARGS
170+
--device 0 --iter 10 layernorm --input 16x128x256 -F 1 --type f32
171+
)
172+
173+
add_fusilli_benchmark(
174+
NAME fusilli_benchmark_layernorm_train_3d_bf16_scale_bias
175+
DRIVER fusilli_benchmark_driver
176+
ARGS
177+
--device 0 --iter 10 layernorm --input 16x128x256 -F 2 --type bf16 --mode 1
178+
)
179+
126180
# Matrix multiplication benchmarks
127181
add_fusilli_benchmark(
128182
NAME fusilli_benchmark_matmul_fp32

0 commit comments

Comments
 (0)