Skip to content

Commit 248cf43

Browse files
addressing Charlie's comments
1 parent e1236c7 commit 248cf43

File tree

2 files changed

+161
-126
lines changed

2 files changed

+161
-126
lines changed

build/rocm/run_single_gpu.py

Lines changed: 160 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
import argparse
1919
import threading
2020
import subprocess
21+
import re
22+
import html
23+
import traceback
2124
from concurrent.futures import ThreadPoolExecutor
2225
from datetime import datetime
2326

@@ -272,10 +275,41 @@ def append_abort_to_json(json_file, testfile, abort_info):
272275
with open(json_file, "w") as f:
273276
json.dump(report_data, f, indent=2)
274277

275-
except Exception as e:
276-
print(f"Failed to create JSON report for {testfile}: {e}")
277-
import traceback
278-
traceback.print_exc()
278+
except (OSError, IOError) as e:
279+
print(f"Failed to write JSON report for {testfile}: {e}")
280+
except json.JSONDecodeError as e:
281+
print(f"Failed to parse existing JSON report for {testfile}: {e}")
282+
print("Creating new JSON file instead...")
283+
# Try creating a new file structure with just the abort test
284+
try:
285+
current_time = datetime.now().timestamp()
286+
new_report_data = {
287+
"created": current_time,
288+
"duration": abort_info.get('duration', 0),
289+
"exitcode": 1,
290+
"root": "/rocm-jax/jax",
291+
"environment": {},
292+
"summary": {
293+
"passed": 0,
294+
"failed": 1,
295+
"total": 1,
296+
"collected": 1,
297+
"unskipped_total": 1
298+
},
299+
"collectors": [
300+
{
301+
"nodeid": "",
302+
"outcome": "failed",
303+
"result": [{"nodeid": f"tests/{testfile}.py", "type": "Module"}]
304+
}
305+
],
306+
"tests": [abort_test]
307+
}
308+
os.makedirs(os.path.dirname(json_file), exist_ok=True)
309+
with open(json_file, "w") as f:
310+
json.dump(new_report_data, f, indent=2)
311+
except (OSError, IOError) as io_e:
312+
print(f"Failed to create new JSON report for {testfile}: {io_e}")
279313

280314
def append_abort_to_html(html_file, testfile, abort_info):
281315
"""Generate or append abort info to pytest-html format HTML report"""
@@ -298,25 +332,26 @@ def append_abort_to_html(html_file, testfile, abort_info):
298332
duration_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
299333

300334
# Create abort test row HTML
301-
abort_row = f""" <tbody class="results-table-row">
302-
<tr class="collapsible">
303-
<td class="col-result">Failed</td>
304-
<td class="col-name">tests/{testfile}.py::{test_name}</td>
305-
<td class="col-duration">{duration_str}</td>
306-
<td class="col-links"></td>
307-
</tr>
308-
<tr class="extras-row">
309-
<td class="extra" colspan="4">
310-
<div class="extraHTML"></div>
311-
<div class="logwrapper">
312-
<div class="logexpander"></div>
313-
<div class="log">Test aborted: {abort_info.get('reason', 'Test aborted or crashed.')}<br/>
335+
abort_row = f"""
336+
<tbody class="results-table-row">
337+
<tr class="collapsible">
338+
<td class="col-result">Failed</td>
339+
<td class="col-name">tests/{testfile}.py::{test_name}</td>
340+
<td class="col-duration">{duration_str}</td>
341+
<td class="col-links"></td>
342+
</tr>
343+
<tr class="extras-row">
344+
<td class="extra" colspan="4">
345+
<div class="extraHTML"></div>
346+
<div class="logwrapper">
347+
<div class="logexpander"></div>
348+
<div class="log">Test aborted: {abort_info.get('reason', 'Test aborted or crashed.')}<br/>
314349
Abort detected at: {abort_time}<br/>
315350
GPU ID: {gpu_id}</div>
316-
</div>
317-
</td>
318-
</tr>
319-
</tbody>"""
351+
</div>
352+
</td>
353+
</tr>
354+
</tbody>"""
320355

321356
# Insert the abort row before the closing </table> tag of results-table specifically
322357
if '</table>' in html_content:
@@ -331,7 +366,6 @@ def append_abort_to_html(html_file, testfile, abort_info):
331366
return
332367

333368
# Update the test count in the summary (find and replace pattern)
334-
import re
335369

336370
# Fix malformed run-count patterns first
337371
malformed_pattern = r'(\d+/\d+ test done\.)'
@@ -349,9 +383,9 @@ def append_abort_to_html(html_file, testfile, abort_info):
349383

350384
# Update "X test took" pattern (current pytest-html format)
351385
count_pattern2 = r'(\d+) tests? took'
352-
match2 = re.search(count_pattern2, html_content)
353-
if match2:
354-
current_count = int(match2.group(1))
386+
match = re.search(count_pattern2, html_content)
387+
if match:
388+
current_count = int(match.group(1))
355389
new_count = current_count + 1
356390
html_content = re.sub(count_pattern2, f'{new_count} tests took', html_content)
357391

@@ -368,14 +402,12 @@ def append_abort_to_html(html_file, testfile, abort_info):
368402
html_content = html_content.replace('data-test-result="failed" disabled', 'data-test-result="failed"')
369403

370404
# Update the JSON data in data-jsonblob to include the abort test
371-
import re
372405
jsonblob_pattern = r'data-jsonblob="([^"]*)"'
373406
match = re.search(jsonblob_pattern, html_content)
374407
if match:
375-
import html as html_module
376408
try:
377409
# Decode the HTML-escaped JSON
378-
json_str = html_module.unescape(match.group(1))
410+
json_str = html.unescape(match.group(1))
379411
existing_json = json.loads(json_str)
380412

381413
# Add the abort test to the tests array
@@ -402,7 +434,7 @@ def append_abort_to_html(html_file, testfile, abort_info):
402434
existing_json["tests"][test_id] = new_test
403435

404436
# Re-encode the JSON and escape for HTML
405-
updated_json_str = html_module.escape(json.dumps(existing_json))
437+
updated_json_str = html.escape(json.dumps(existing_json))
406438
html_content = re.sub(jsonblob_pattern, f'data-jsonblob="{updated_json_str}"', html_content)
407439

408440
except (json.JSONDecodeError, Exception) as e:
@@ -423,10 +455,12 @@ def append_abort_to_html(html_file, testfile, abort_info):
423455
# File doesn't exist - create complete new HTML file
424456
_create_new_html_file(html_file, testfile, abort_info)
425457

426-
except Exception as e:
427-
print(f"Failed to update HTML report for {testfile}: {e}")
428-
import traceback
429-
traceback.print_exc()
458+
except (OSError, IOError) as e:
459+
print(f"Failed to read/write HTML report for {testfile}: {e}")
460+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
461+
print(f"Failed to parse existing HTML report for {testfile}: {e}")
462+
print("Creating new HTML file instead...")
463+
_create_new_html_file(html_file, testfile, abort_info)
430464

431465
def _create_new_html_file(html_file, testfile, abort_info):
432466
"""Create a new HTML file for abort-only report"""
@@ -474,100 +508,99 @@ def _create_new_html_file(html_file, testfile, abort_info):
474508
}
475509

476510
# Convert JSON to HTML-escaped string for data-jsonblob attribute
477-
import html
478511
json_blob = html.escape(json.dumps(json_data))
479512

480513
html_content = f'''<!DOCTYPE html>
481-
<html>
482-
<head>
483-
<meta charset="utf-8"/>
484-
<title id="head-title">{testfile}_log.html</title>
485-
<link href="assets/style.css" rel="stylesheet" type="text/css"/>
486-
</head>
487-
<body onLoad="init()">
488-
<h1 id="title">{testfile}_log.html</h1>
489-
<p>Report generated on {datetime.now().strftime('%d-%b-%Y at %H:%M:%S')} by <a href="https://pypi.python.org/pypi/pytest-html">pytest-html</a> v4.1.1</p>
490-
<div id="environment-header">
491-
<h2>Environment</h2>
492-
</div>
493-
<table id="environment"></table>
494-
<div class="summary">
495-
<div class="summary__data">
496-
<h2>Summary</h2>
497-
<div class="additional-summary prefix">
498-
</div>
499-
<p class="run-count">1 tests took {duration_str}.</p>
500-
<p class="filter">(Un)check the boxes to filter the results.</p>
501-
<div class="summary__reload">
502-
<div class="summary__reload__button hidden" onclick="location.reload()">
503-
<div>There are still tests running. <br />Reload this page to get the latest results!</div>
504-
</div>
505-
</div>
506-
<div class="summary__spacer"></div>
507-
<div class="controls">
508-
<div class="filters">
509-
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="failed" />
510-
<span class="failed">1 Failed,</span>
511-
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="passed" disabled/>
512-
<span class="passed">0 Passed,</span>
513-
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="skipped" disabled/>
514-
<span class="skipped">0 Skipped,</span>
515-
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="xfailed" disabled/>
516-
<span class="xfailed">0 Expected failures,</span>
517-
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="xpassed" disabled/>
518-
<span class="xpassed">0 Unexpected passes,</span>
519-
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="error" disabled/>
520-
<span class="error">0 Errors,</span>
521-
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="rerun" disabled/>
522-
<span class="rerun">0 Reruns</span>
523-
</div>
524-
<div class="collapse">
525-
<button id="show_all_details">Show all details</button>&nbsp;/&nbsp;<button id="hide_all_details">Hide all details</button>
526-
</div>
527-
</div>
528-
</div>
529-
<div class="additional-summary summary">
530-
</div>
531-
<div class="additional-summary postfix">
532-
</div>
533-
</div>
534-
<table id="results-table">
535-
<thead id="results-table-head">
536-
<tr>
537-
<th class="sortable result initial-sort" data-column-type="result">Result</th>
538-
<th class="sortable" data-column-type="name">Test</th>
539-
<th class="sortable" data-column-type="duration">Duration</th>
540-
<th class="sortable links" data-column-type="links">Links</th>
541-
</tr>
542-
</thead>
543-
<tbody class="results-table-row">
544-
<tr class="collapsible">
545-
<td class="col-result">Failed</td>
546-
<td class="col-name">tests/{testfile}.py::{test_name}</td>
547-
<td class="col-duration">{duration_str}</td>
548-
<td class="col-links"></td>
549-
</tr>
550-
<tr class="extras-row">
551-
<td class="extra" colspan="4">
552-
<div class="extraHTML"></div>
553-
<div class="logwrapper">
554-
<div class="logexpander"></div>
555-
<div class="log">Test aborted: {abort_info.get('reason', 'Test aborted or crashed.')}<br/>
556-
Abort detected at: {abort_time}<br/>
557-
GPU ID: {gpu_id}</div>
514+
<html>
515+
<head>
516+
<meta charset="utf-8"/>
517+
<title id="head-title">{testfile}_log.html</title>
518+
<link href="assets/style.css" rel="stylesheet" type="text/css"/>
519+
</head>
520+
<body onLoad="init()">
521+
<h1 id="title">{testfile}_log.html</h1>
522+
<p>Report generated on {datetime.now().strftime('%d-%b-%Y at %H:%M:%S')} by <a href="https://pypi.python.org/pypi/pytest-html">pytest-html</a> v4.1.1</p>
523+
<div id="environment-header">
524+
<h2>Environment</h2>
558525
</div>
559-
</td>
560-
</tr>
561-
</tbody>
562-
</table>
563-
<div id="data-container" data-jsonblob="{json_blob}"></div>
564-
<script>
565-
function init() {{
566-
// Initialize any required functionality
567-
}}
568-
</script>
569-
</body>
570-
</html>'''
526+
<table id="environment"></table>
527+
<div class="summary">
528+
<div class="summary__data">
529+
<h2>Summary</h2>
530+
<div class="additional-summary prefix">
531+
</div>
532+
<p class="run-count">1 tests took {duration_str}.</p>
533+
<p class="filter">(Un)check the boxes to filter the results.</p>
534+
<div class="summary__reload">
535+
<div class="summary__reload__button hidden" onclick="location.reload()">
536+
<div>There are still tests running. <br />Reload this page to get the latest results!</div>
537+
</div>
538+
</div>
539+
<div class="summary__spacer"></div>
540+
<div class="controls">
541+
<div class="filters">
542+
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="failed" />
543+
<span class="failed">1 Failed,</span>
544+
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="passed" disabled/>
545+
<span class="passed">0 Passed,</span>
546+
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="skipped" disabled/>
547+
<span class="skipped">0 Skipped,</span>
548+
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="xfailed" disabled/>
549+
<span class="xfailed">0 Expected failures,</span>
550+
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="xpassed" disabled/>
551+
<span class="xpassed">0 Unexpected passes,</span>
552+
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="error" disabled/>
553+
<span class="error">0 Errors,</span>
554+
<input checked="true" class="filter" name="filter_checkbox" type="checkbox" data-test-result="rerun" disabled/>
555+
<span class="rerun">0 Reruns</span>
556+
</div>
557+
<div class="collapse">
558+
<button id="show_all_details">Show all details</button>&nbsp;/&nbsp;<button id="hide_all_details">Hide all details</button>
559+
</div>
560+
</div>
561+
</div>
562+
<div class="additional-summary summary">
563+
</div>
564+
<div class="additional-summary postfix">
565+
</div>
566+
</div>
567+
<table id="results-table">
568+
<thead id="results-table-head">
569+
<tr>
570+
<th class="sortable result initial-sort" data-column-type="result">Result</th>
571+
<th class="sortable" data-column-type="name">Test</th>
572+
<th class="sortable" data-column-type="duration">Duration</th>
573+
<th class="sortable links" data-column-type="links">Links</th>
574+
</tr>
575+
</thead>
576+
<tbody class="results-table-row">
577+
<tr class="collapsible">
578+
<td class="col-result">Failed</td>
579+
<td class="col-name">tests/{testfile}.py::{test_name}</td>
580+
<td class="col-duration">{duration_str}</td>
581+
<td class="col-links"></td>
582+
</tr>
583+
<tr class="extras-row">
584+
<td class="extra" colspan="4">
585+
<div class="extraHTML"></div>
586+
<div class="logwrapper">
587+
<div class="logexpander"></div>
588+
<div class="log">Test aborted: {abort_info.get('reason', 'Test aborted or crashed.')}<br/>
589+
Abort detected at: {abort_time}<br/>
590+
GPU ID: {gpu_id}</div>
591+
</div>
592+
</td>
593+
</tr>
594+
</tbody>
595+
</table>
596+
<div id="data-container" data-jsonblob="{json_blob}"></div>
597+
<script>
598+
function init() {{
599+
// Initialize any required functionality
600+
}}
601+
</script>
602+
</body>
603+
</html>'''
571604

572605
# Ensure the logs directory exists
573606
os.makedirs(os.path.dirname(html_file), exist_ok=True)
@@ -578,9 +611,10 @@ def _create_new_html_file(html_file, testfile, abort_info):
578611

579612
print(f"Created new HTML report: {html_file}")
580613

614+
except (OSError, IOError) as e:
615+
print(f"Failed to write new HTML report for {testfile}: {e}")
581616
except Exception as e:
582-
print(f"Failed to create new HTML report for {testfile}: {e}")
583-
import traceback
617+
print(f"Unexpected error creating new HTML report for {testfile}: {e}")
584618
traceback.print_exc()
585619

586620

@@ -628,3 +662,4 @@ def main(args):
628662

629663
main(args)
630664

665+

conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def pytest_runtest_protocol(item, nextitem):
137137
@pytest.hookimpl(tryfirst=True)
138138
def pytest_sessionstart(session):
139139
"""Called after the Session object has been created"""
140-
test_file = test_logger.get_test_file_name(session)
140+
print(f"Starting test session on GPU {os.environ.get('HIP_VISIBLE_DEVICES', 'unknown')}")
141141

142142
@pytest.hookimpl(trylast=True)
143143
def pytest_sessionfinish(session, exitstatus):

0 commit comments

Comments
 (0)