Skip to content

Commit ff5b150

Browse files
authored
Llama 8b f16 regression tests using random weights and inputs (#20487)
This PR adds regression tests for llama 8b f16 prefill and decode using randomized llama 8b f16 weights and inputs. We use random llama 8b f16 weights in order to download a public version of the weights (the original weights are gated and require access from the model authors from the huggingface repo). --------- Signed-off-by: aviator19941 <[email protected]>
1 parent a802470 commit ff5b150

File tree

5 files changed

+101
-1
lines changed

5 files changed

+101
-1
lines changed

.github/workflows/pkgci_test_sharktank.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
- name: amdgpu_rocm_mi250_gfx90a
4444
rocm-chip: gfx90a
4545
backend: rocm
46-
iree_test_files: /home/esaimana/iree_tests_cache
46+
iree_test_files: /groups/aig_sharks/iree-tests-cache
4747
sku: mi250
4848
target: target_hip
4949
runs-on: nodai-amdgpu-mi250-x86-64
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"inputs": [
3+
"4x1xi64",
4+
"4xi64",
5+
"4xi64",
6+
"4x5xi64",
7+
"34x2097152xf16"
8+
],
9+
"function_run": "decode_bs4",
10+
"benchmark_flags": [
11+
"--hip_use_streams=true",
12+
"--benchmark_repetitions=10",
13+
"--benchmark_min_warmup_time=3.0"
14+
],
15+
"device": "hip",
16+
"golden_time_tolerance_multiplier": {
17+
"mi300": 1.1
18+
},
19+
"golden_time_ms": {
20+
"mi300": 15.7
21+
}
22+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
"inputs": [
3+
"4x128xi64",
4+
"4xi64",
5+
"4x4xi64",
6+
"34x2097152xf16"
7+
],
8+
"function_run": "prefill_bs4",
9+
"benchmark_flags": [
10+
"--hip_use_streams=true",
11+
"--benchmark_repetitions=10",
12+
"--benchmark_min_warmup_time=3.0"
13+
],
14+
"device": "hip",
15+
"golden_time_tolerance_multiplier": {
16+
"mi300": 1.1
17+
},
18+
"golden_time_ms": {
19+
"mi300": 43.1
20+
}
21+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"inputs": [
3+
{
4+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/decode_next_tokens.npy"
5+
},
6+
{
7+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/decode_seq_lens.npy"
8+
},
9+
{
10+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/decode_start_positions.npy"
11+
},
12+
{
13+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/decode_seq_block_ids.npy"
14+
},
15+
{
16+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/decode_cache_state.npy"
17+
}
18+
],
19+
"device": "hip",
20+
"real_weights": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/real_weights.irpa",
21+
"mlir": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/8b_f16_random.mlir",
22+
"compiler_flags": [
23+
"--iree-hal-target-device=hip",
24+
"--iree-opt-level=O3",
25+
"--iree-hal-indirect-command-buffers=true",
26+
"--iree-stream-resource-memory-model=discrete",
27+
"--iree-hal-memoization=true"
28+
],
29+
"run_function": "decode_bs4"
30+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"inputs": [
3+
{
4+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/prefill_token_ids.npy"
5+
},
6+
{
7+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/prefill_seq_lens.npy"
8+
},
9+
{
10+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/prefill_seq_block_ids.npy"
11+
},
12+
{
13+
"source": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/toy_llama_inputs/prefill_cache_state.npy"
14+
}
15+
],
16+
"device": "hip",
17+
"real_weights": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/real_weights.irpa",
18+
"mlir": "https://sharkpublic.blob.core.windows.net/sharkpublic/halo-models/llm-dev/llama3_8b_random/8b_f16_random.mlir",
19+
"compiler_flags": [
20+
"--iree-hal-target-device=hip",
21+
"--iree-opt-level=O3",
22+
"--iree-hal-indirect-command-buffers=true",
23+
"--iree-stream-resource-memory-model=discrete",
24+
"--iree-hal-memoization=true"
25+
],
26+
"run_function": "prefill_bs4"
27+
}

0 commit comments

Comments
 (0)