diff --git a/chipflow_lib/steps/silicon.py b/chipflow_lib/steps/silicon.py index 6060bec9..2357e586 100644 --- a/chipflow_lib/steps/silicon.py +++ b/chipflow_lib/steps/silicon.py @@ -7,12 +7,10 @@ import json import logging import os -import re import requests import shutil import subprocess import sys -import time import urllib3 from pprint import pformat @@ -31,6 +29,18 @@ logger = logging.getLogger(__name__) +def halo_logging(closure): + class ClosureStreamHandler(logging.StreamHandler): + def emit(self, record): + # Call the closure with the log message + closure(self.format(record)) + + handler = ClosureStreamHandler() + formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + logger.addHandler(handler) + + class SiliconTop(StepBase, Elaboratable): def __init__(self, config): self._config = config @@ -122,6 +132,7 @@ def submit(self, rtlil_path, args): else: interval = -1 with Halo(text="Submitting...", spinner="dots", interval=interval) as sp: + fh = None submission_name = self.determine_submission_name() data = { @@ -196,15 +207,12 @@ def network_err(e): }, allow_redirects=False ) - except requests.ConnectTimeout as e: - network_err(e) - except requests.ConnectionError as e: - if type(e.__context__) is urllib3.exceptions.MaxRetryError: - network_err(e) - except requests.exceptions.ReadTimeout as e: - network_err(e) - - assert resp + except Exception as e: + logger.error(f"Unexpected error submitting design: {e}") + sp.fail(f"Unexpected error: {e}") + + assert resp is not None + # Parse response body try: resp_data = resp.json() @@ -237,7 +245,6 @@ def network_err(e): headers["Authorization"] = "REDACTED" logger.debug(f"Request headers: {headers}") - logger.debug(f"Request data: {data}") logger.debug(f"Response headers: {dict(resp.headers)}") logger.debug(f"Response body: {resp_data}") sp.text = "" @@ -251,59 +258,56 @@ def network_err(e): exit(2) def _long_poll_stream(self, sp, network_err): - steps = self._last_log_steps - stream_event_counter = 0 assert self._chipflow_api_key # after 4 errors, return to _stream_logs loop and query the build status again - while (stream_event_counter < 4): - sp.text = "Build running... " + ' -> '.join(steps) - try: - log_resp = requests.get( - self._log_stream_url, - auth=("", self._chipflow_api_key), - stream=True, - timeout=(2.0, 60.0) # fail if connect takes >2s, long poll for 60s at a time - ) - if log_resp.status_code == 200: - for line in log_resp.iter_lines(): - line_str = line.decode("utf-8") if line else "" - logger.debug(line_str) - match line_str[0:8]: - case "DEBUG ": - sp.info(line_str) if log_level <= logging.DEBUG else None - case "INFO ": - sp.info(line_str) if log_level <= logging.INFO else None - # Some special handling for more user feedback - if line_str.endswith("started"): - steps = re.findall(r"([0-9a-z_.]+)\:+?", line_str[18:])[0:2] - sp.text = "Build running... " + ' -> '.join(steps) - case "WARNING ": - sp.info(line_str) if log_level <= logging.WARNING else None - case "ERROR ": - sp.info(line_str) if log_level <= logging.ERROR else None - sp.start() - else: - stream_event_counter +=1 - logger.debug(f"Failed to stream logs: {log_resp.text}") - sp.text = "💥 Failed streaming build logs. Trying again!" - break - except requests.ConnectionError as e: - if type(e.__context__) is urllib3.exceptions.ReadTimeoutError: - continue #just timed out, continue long poll - sp.text = "💥 Failed connecting to ChipFlow Cloud." - logger.debug(f"Error while streaming logs: {e}") - break - except (requests.RequestException, requests.exceptions.ReadTimeout) as e: - if type(e.__context__) is urllib3.exceptions.ReadTimeoutError: - continue #just timed out, continue long poll + logger.debug("Long poll start") + try: + log_resp = requests.get( + self._log_stream_url, + auth=("", self._chipflow_api_key), + stream=True, + timeout=(2.0, 60.0) # fail if connect takes >2s, long poll for 60s at a time + ) + if log_resp.status_code == 200: + logger.debug(f"response from {self._log_stream_url}:\n{log_resp}") + for line in log_resp.iter_lines(): + message = line.decode("utf-8") if line else "" + try: + level, time, step = message.split(maxsplit=2) + except ValueError: + continue + + match level: + case "DEBUG": + sp.info(message) if log_level <= logging.DEBUG else None + case "INFO" | "INFO+": + sp.info(message) if log_level <= logging.INFO else None + case "WARNING": + sp.info(message) if log_level <= logging.WARNING else None + case "ERROR": + sp.info(message) if log_level <= logging.ERROR else None + + if step != self._last_log_step: + sp.text = f"Build running: {self._last_log_step}" + self._last_log_step = step + else: + logger.debug(f"Failed to stream logs: {log_resp.text}") sp.text = "💥 Failed streaming build logs. Trying again!" - logger.debug(f"Error while streaming logs: {e}") - stream_event_counter +=1 - continue - - # save steps so we coninue where we left off if we manage to reconnect - self._last_log_steps = steps - return stream_event_counter + return True + except requests.ConnectionError as e: + if type(e.__context__) is urllib3.exceptions.ReadTimeoutError: + return True + sp.text = "💥 Failed connecting to ChipFlow Cloud." + logger.debug(f"Error while streaming logs: {e}") + return False + except (requests.RequestException, requests.exceptions.ReadTimeout) as e: + if type(e.__context__) is urllib3.exceptions.ReadTimeoutError: + return True + sp.text = "💥 Failed streaming build logs. Trying again!" + logger.debug(f"Error while streaming logs: {e}") + return False + + return True def _stream_logs(self, sp, network_err): sp.start("Streaming the logs...") @@ -312,18 +316,19 @@ def _stream_logs(self, sp, network_err): timeout = 10.0 build_status = "pending" stream_event_counter = 0 - self._last_log_steps = [] + self._last_log_step = "" assert self._chipflow_api_key is not None - while fail_counter < 10 and stream_event_counter < 10: - sp.text = f"Waiting for build to run... {build_status}" - time.sleep(timeout) # Wait before polling + sp.text = f"Waiting for build to run... {build_status}" + + while fail_counter < 5: try: + logger.debug(f"Checking build status, iteration {fail_counter}") status_resp = requests.get( self._build_status_url, auth=("", self._chipflow_api_key), timeout=timeout ) - except requests.exceptions.ReadTimeout as e: + except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e: sp.text = "💥 Error connecting to ChipFlow Cloud. Trying again! " fail_counter += 1 logger.debug(f"Failed to fetch build status{fail_counter} times: {e}") @@ -339,8 +344,6 @@ def _stream_logs(self, sp, network_err): build_status = status_data.get("status") logger.debug(f"Build status: {build_status}") - sp.text = f"Polling build status... {build_status}" - if build_status == "completed": sp.succeed("✅ Build completed successfully!") return 0 @@ -348,13 +351,13 @@ def _stream_logs(self, sp, network_err): sp.succeed("❌ Build failed.") return 1 elif build_status == "running": - stream_event_counter += self._long_poll_stream(sp, network_err) - - if fail_counter >=10 or stream_event_counter >= 10: - sp.text = "" - sp.fail("💥 Failed fetching build status. Perhaps you hit a network error?") - logger.debug(f"Failed to fetch build status {fail_counter} times and failed streaming {stream_event_counter} times. Exiting.") - return 2 + sp.text = f"Build status: {build_status}" + if not self._long_poll_stream(sp, network_err): + sp.text = "" + sp.fail("💥 Failed fetching build status. Perhaps you hit a network error?") + logger.debug(f"Failed to fetch build status {fail_counter} times and failed streaming {stream_event_counter} times. Exiting.") + return 2 + # check status and go again def determine_submission_name(self): if "CHIPFLOW_SUBMISSION_NAME" in os.environ: diff --git a/pyproject.toml b/pyproject.toml index f8e1a32e..1373683e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ test.cmd = "pytest" test-cov.cmd = "pytest --cov=chipflow_lib --cov-report=term" test-cov-html.cmd = "pytest --cov=chipflow_lib --cov-report=html" test-docs.cmd = "sphinx-build -b doctest docs/ docs/_build" -lint.composite = [ "./tools/license_check.sh", "ruff check", "pyright chipflow_lib"] +lint.composite = [ "./tools/license_check.sh", "ruff check {args}", "pyright chipflow_lib"] docs.cmd = "sphinx-build docs/ docs/_build/ -W --keep-going" test-silicon.cmd = "pytest tests/test_silicon_platform.py tests/test_silicon_platform_additional.py tests/test_silicon_platform_amaranth.py tests/test_silicon_platform_build.py tests/test_silicon_platform_port.py --cov=chipflow_lib.platforms.silicon --cov-report=term" _check-project.call = "tools.check_project:main"