Skip to content

Commit 31ada4d

Browse files
authored
Create load_inline_sample.py
1 parent 8e8c6b1 commit 31ada4d

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Credit https://github.com/Snektron/gpumode-amd-fp8-mm/blob/main/fp8_gemm.py
2+
#!POPCORN leaderboard amd-fp8-mm
3+
#!POPCORN gpu MI300
4+
from task import input_t, output_t
5+
import torch
6+
from torch.utils.cpp_extension import load_inline
7+
import time
8+
import os
9+
import sys
10+
11+
if "PYTORCH_ROCM_ARCH" not in os.environ:
12+
os.environ["PYTORCH_ROCM_ARCH"] = "gfx942:xnack-"
13+
14+
TESTING = os.environ.get("GPUMODE_TESTING", None)
15+
16+
with open('solution.hip', 'r') as f:
17+
kernel_cpp = f.read()
18+
19+
hip_module = load_inline(
20+
name="fp8",
21+
cpp_sources="",
22+
cuda_sources=kernel_cpp,
23+
with_cuda=True,
24+
verbose=False,
25+
extra_cuda_cflags=(["-save-temps"] if TESTING is not None else []) + ["-std=c++20", "-Werror"],
26+
build_directory="/workspace/build/" if TESTING == "vscode" else "/gpumode/amd/fp8/build/" if TESTING is not None else None,
27+
**({'no_implicit_headers': True} if TESTING != "vscode" else {}),

0 commit comments

Comments
 (0)