Skip to content

Commit 700a94f

Browse files
committed
20250722_benchmark_sweep
1 parent 27f7f1c commit 700a94f

File tree

4 files changed

+60
-10
lines changed

4 files changed

+60
-10
lines changed

benchmarks/run.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import sys
2828
from typing import Any
2929
from typing import Callable
30+
import time
3031

3132
# Maps tritonbench op names to Helion kernel examples
3233
# Can map to a single kernel or a list of kernel variants

benchmarks/run_input_shard.sh

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
[[ -z "$RANK_OFFSET" ]] && { echo "Error: RANK_OFFSET is not set"; exit 1; }
2+
[[ -z "$SHARD" ]] && { echo "Error: SHARD is not set"; exit 1; }
3+
[[ -z "$WORLD_SIZE" ]] && { echo "Error: WORLD_SIZE is not set"; exit 1; }
4+
5+
# Capture timestamp once for consistent filename
6+
TIMESTAMP=$(date +%s)
7+
OUTPUT_FILE="benchmarks_autotune_${TIMESTAMP}_input_shard_$((SHARD+1))_of_${WORLD_SIZE}.txt"
8+
9+
# Retry until success
10+
attempt=0
11+
while true; do
12+
# while (( attempt < 10 )); do
13+
attempt=$((attempt + 1))
14+
echo "Attempt $attempt: Running benchmark for shard $((SHARD+1))/${WORLD_SIZE}..."
15+
16+
# TIMESTAMP=$(date +%s)
17+
# OUTPUT_FILE="benchmarks_autotune_${TIMESTAMP}_input_shard_$((SHARD+1))_of_${WORLD_SIZE}.txt"
18+
19+
CUDA_VISIBLE_DEVICES=$((RANK_OFFSET+SHARD)) python benchmarks/run.py --input-shard $((SHARD+1))/${WORLD_SIZE} --metrics accuracy,tflops,gbps,speedup >"$OUTPUT_FILE" 2>&1
20+
21+
exit_code=$?
22+
if [ $exit_code -eq 0 ]; then
23+
echo "Success! Benchmark completed for shard $((SHARD+1))/${WORLD_SIZE}"
24+
break
25+
else
26+
echo "Failed with exit code $exit_code. Retrying..."
27+
sleep 10 # wait a few seconds before retrying
28+
fi
29+
done
30+
31+
# SHARD=0 RANK_OFFSET=4 WORLD_SIZE=4 bash benchmarks/run_input_shard.sh

helion/autotuner/base_search.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from typing import NamedTuple
1818
from typing import NoReturn
1919

20+
from triton.compiler.errors import CompilationError
21+
2022
if TYPE_CHECKING:
2123
from triton.runtime.jit import JITFunction
2224

@@ -108,10 +110,13 @@ def benchmark(self, config: Config) -> float:
108110
Returns:
109111
The performance of the configuration in seconds.
110112
"""
111-
fn = self.kernel.compile_config(config, allow_print=False)
112-
if self.start_precompile_and_check_for_hangs(config, fn)():
113-
return self.benchmark_function(config, fn)
114-
return inf
113+
try:
114+
fn = self.kernel.compile_config(config, allow_print=False)
115+
if self.start_precompile_and_check_for_hangs(config, fn)():
116+
return self.benchmark_function(config, fn)
117+
return inf
118+
except Exception as e:
119+
return inf
115120

116121
def benchmark_function(self, config: Config, fn: CompiledConfig) -> float:
117122
"""
@@ -145,9 +150,11 @@ def benchmark_function(self, config: Config, fn: CompiledConfig) -> float:
145150
self.log.debug("Benchmarking failed: OutOfResources")
146151
except PTXASError:
147152
self.log.warning(f"PTXASError compiling config: {config}")
153+
except CompilationError:
154+
self.log.debug("Benchmarking failed: Triton CompilationError")
148155
except Exception as e:
149-
if not _expected_errors_regexp.search(str(e)):
150-
raise exc.TritonError(f"{type(e).__qualname__}: {e}", config) from e
156+
# if not _expected_errors_regexp.search(str(e)):
157+
# raise exc.TritonError(f"{type(e).__qualname__}: {e}", config) from e
151158
self.log.debug(f"Benchmarking failed: {type(e).__name__}: {e}")
152159
return inf
153160

@@ -169,6 +176,8 @@ def start_precompile_and_check_for_hangs(
169176
"""
170177
if not self.settings.autotune_precompile:
171178
return PrecompileFuture.skip(self, config, True)
179+
if fn is None:
180+
return PrecompileFuture.skip(self, config, False)
172181
ctx = mp.get_context("fork")
173182

174183
def extract_launcher(
@@ -189,6 +198,8 @@ def extract_launcher(
189198
precompiler = make_precompiler(e.kernel)(*e.args, **e.kwargs)
190199
if precompiler is already_compiled:
191200
return PrecompileFuture.skip(self, config, True)
201+
except Exception as e:
202+
return PrecompileFuture.skip(self, config, False)
192203
process: mp.Process = ctx.Process(target=precompiler) # pyright: ignore[reportAssignmentType]
193204
process.start()
194205
return PrecompileFuture(
@@ -208,7 +219,13 @@ def parallel_benchmark(self, configs: list[Config]) -> list[tuple[Config, float]
208219
Returns:
209220
A list of tuples containing configurations and their performance.
210221
"""
211-
fns = [self.kernel.compile_config(c, allow_print=False) for c in configs]
222+
fns = []
223+
for c in configs:
224+
try:
225+
compile_result = self.kernel.compile_config(c, allow_print=False)
226+
fns.append(compile_result)
227+
except Exception as e:
228+
fns.append(None)
212229
if self.settings.autotune_precompile:
213230
is_workings = PrecompileFuture.wait_for_all(
214231
[
@@ -387,11 +404,12 @@ def population_statistics(population: list[PopulationMember]) -> str:
387404
working = [x for x in population if not math.isinf(x.perf)]
388405
return (
389406
f"failed={len(population) - len(working)} "
407+
) + (
390408
f"min={working[0].perf:.4f} "
391409
f"mid={working[len(working) // 2].perf:.4f} "
392410
f"max={working[-1].perf:.4f} "
393411
f"best={population[0].config!s}"
394-
)
412+
) if len(working) > 0 else "all failed!"
395413
return (
396414
f"min={population[0].perf:.4f} "
397415
f"mid={population[len(population) // 2].perf:.4f} "

helion/autotuner/config_spec.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,8 @@ def _flat_config(
411411
default = min(high, 4096)
412412
value = fn(BlockSizeFragment(low, high, default))
413413
assert isinstance(value, int)
414-
if value >= self.size_hint:
415-
return None # max size becomes persistent reduction
414+
if value >= self.size_hint or value < low:
415+
return None # max size or invalid value becomes persistent reduction
416416
return value
417417

418418
def _normalize(self, name: str, value: object) -> int | None:

0 commit comments

Comments
 (0)