@@ -123,6 +123,60 @@ add_fusilli_benchmark(
123123 --device 0 --iter 10 conv -F 2 --bf16 -n 64 -c 3 -H 32 -W 32 -k 32 -y 3 -x 3 -u 1 -v 1 -p 0 -q 0 -l 1 -j 1 --in_layout "NCHW" --fil_layout "NCHW" --out_layout "NCHW" --spatial_dim 2
124124)
125125
126+
127+ # Layer normalization benchmarks
128+ # Inference forward (F=1) benchmarks
129+ add_fusilli_benchmark(
130+ NAME fusilli_benchmark_layernorm_infer_nchw_fp32
131+ DRIVER fusilli_benchmark_driver
132+ ARGS
133+ --device 0 --iter 10 layernorm --input 16x128x64x32 -F 1 --type f32 --layout NCHW
134+ )
135+
136+ add_fusilli_benchmark(
137+ NAME fusilli_benchmark_layernorm_infer_nhwc_fp16
138+ DRIVER fusilli_benchmark_driver
139+ ARGS
140+ --device 0 --iter 10 layernorm --input 16x128x64x32 -F 1 --type f16 --layout NHWC
141+ )
142+
143+ add_fusilli_benchmark(
144+ NAME fusilli_benchmark_layernorm_infer_nhwc_bf16_scale_bias
145+ DRIVER fusilli_benchmark_driver
146+ ARGS
147+ --device 0 --iter 10 layernorm --input 16x128x64x32 -F 1 --type bf16 --layout NHWC --mode 1
148+ )
149+
150+ # Training forward (F=2) benchmarks
151+ add_fusilli_benchmark(
152+ NAME fusilli_benchmark_layernorm_train_nchw_bf16
153+ DRIVER fusilli_benchmark_driver
154+ ARGS
155+ --device 0 --iter 10 layernorm --input 16x128x64x32 -F 2 --type bf16 --layout NCHW
156+ )
157+
158+ add_fusilli_benchmark(
159+ NAME fusilli_benchmark_layernorm_train_nhwc_fp16_scale_bias
160+ DRIVER fusilli_benchmark_driver
161+ ARGS
162+ --device 0 --iter 10 layernorm --input 16x128x64x32 -F 2 --type f16 --layout NHWC --mode 1
163+ )
164+
165+ # 3D input benchmarks (NCH format)
166+ add_fusilli_benchmark(
167+ NAME fusilli_benchmark_layernorm_infer_3d_fp32
168+ DRIVER fusilli_benchmark_driver
169+ ARGS
170+ --device 0 --iter 10 layernorm --input 16x128x256 -F 1 --type f32
171+ )
172+
173+ add_fusilli_benchmark(
174+ NAME fusilli_benchmark_layernorm_train_3d_bf16_scale_bias
175+ DRIVER fusilli_benchmark_driver
176+ ARGS
177+ --device 0 --iter 10 layernorm --input 16x128x256 -F 2 --type bf16 --mode 1
178+ )
179+
126180# Matrix multiplication benchmarks
127181add_fusilli_benchmark(
128182 NAME fusilli_benchmark_matmul_fp32
0 commit comments