Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 40581f4

Browse files
authored
[Grok-1] 1. upload moe configuration file for moe kernel optimization… (#193)
* [Grok-1] 1. upload moe configuration file for moe kernel optimization 2. support "--num-scheduler-steps" in benchmark_latency.py * [Grok-1] 1. upload moe configuration file for moe kernel optimization 2. add copy of benchmark_latency.py to support "--num-scheduler-steps" * [Grok-1] add option num-scheduler-steps in benchmark_latency.py
1 parent 54e0441 commit 40581f4

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

benchmarks/benchmark_latency.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def main(args: argparse.Namespace):
4747
distributed_executor_backend=args.distributed_executor_backend,
4848
otlp_traces_endpoint=args.otlp_traces_endpoint,
4949
enable_prefix_caching=args.enable_prefix_caching,
50+
num_scheduler_steps=args.num_scheduler_steps,
5051
)
5152

5253
sampling_params = SamplingParams(
@@ -279,5 +280,10 @@ def run_to_completion(profile_dir: Optional[str] = None):
279280
type=str,
280281
default=None,
281282
help='Target URL to which OpenTelemetry traces will be sent.')
283+
parser.add_argument(
284+
"--num-scheduler-steps",
285+
type=int,
286+
default=1,
287+
help="Maximum number of forward steps per scheduler call.")
282288
args = parser.parse_args()
283289
main(args)
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
{
22
"8": {
33
"BLOCK_SIZE_M": 16,
4-
"BLOCK_SIZE_N": 128,
4+
"BLOCK_SIZE_N": 64,
55
"BLOCK_SIZE_K": 256,
66
"GROUP_SIZE_M": 1,
7-
"num_warps": 8,
7+
"num_warps": 4,
88
"num_stages": 0,
9-
"waves_per_eu": 0,
9+
"waves_per_eu": 1,
1010
"matrix_instr_nonkdim": 16,
11-
"kpack": 2
11+
"kpack": 1
1212
}
1313
}

0 commit comments

Comments
 (0)