Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 79 additions & 76 deletions chipflow_lib/steps/silicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@
import json
import logging
import os
import re
import requests
import shutil
import subprocess
import sys
import time
import urllib3
from pprint import pformat

Expand All @@ -31,6 +29,18 @@
logger = logging.getLogger(__name__)


def halo_logging(closure):
class ClosureStreamHandler(logging.StreamHandler):
def emit(self, record):
# Call the closure with the log message
closure(self.format(record))

handler = ClosureStreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)


class SiliconTop(StepBase, Elaboratable):
def __init__(self, config):
self._config = config
Expand Down Expand Up @@ -122,6 +132,7 @@ def submit(self, rtlil_path, args):
else:
interval = -1
with Halo(text="Submitting...", spinner="dots", interval=interval) as sp:

fh = None
submission_name = self.determine_submission_name()
data = {
Expand Down Expand Up @@ -196,15 +207,12 @@ def network_err(e):
},
allow_redirects=False
)
except requests.ConnectTimeout as e:
network_err(e)
except requests.ConnectionError as e:
if type(e.__context__) is urllib3.exceptions.MaxRetryError:
network_err(e)
except requests.exceptions.ReadTimeout as e:
network_err(e)

assert resp
except Exception as e:
logger.error(f"Unexpected error submitting design: {e}")
sp.fail(f"Unexpected error: {e}")

assert resp is not None

# Parse response body
try:
resp_data = resp.json()
Expand Down Expand Up @@ -237,7 +245,6 @@ def network_err(e):
headers["Authorization"] = "REDACTED"
logger.debug(f"Request headers: {headers}")

logger.debug(f"Request data: {data}")
logger.debug(f"Response headers: {dict(resp.headers)}")
logger.debug(f"Response body: {resp_data}")
sp.text = ""
Expand All @@ -251,59 +258,56 @@ def network_err(e):
exit(2)

def _long_poll_stream(self, sp, network_err):
steps = self._last_log_steps
stream_event_counter = 0
assert self._chipflow_api_key
# after 4 errors, return to _stream_logs loop and query the build status again
while (stream_event_counter < 4):
sp.text = "Build running... " + ' -> '.join(steps)
try:
log_resp = requests.get(
self._log_stream_url,
auth=("", self._chipflow_api_key),
stream=True,
timeout=(2.0, 60.0) # fail if connect takes >2s, long poll for 60s at a time
)
if log_resp.status_code == 200:
for line in log_resp.iter_lines():
line_str = line.decode("utf-8") if line else ""
logger.debug(line_str)
match line_str[0:8]:
case "DEBUG ":
sp.info(line_str) if log_level <= logging.DEBUG else None
case "INFO ":
sp.info(line_str) if log_level <= logging.INFO else None
# Some special handling for more user feedback
if line_str.endswith("started"):
steps = re.findall(r"([0-9a-z_.]+)\:+?", line_str[18:])[0:2]
sp.text = "Build running... " + ' -> '.join(steps)
case "WARNING ":
sp.info(line_str) if log_level <= logging.WARNING else None
case "ERROR ":
sp.info(line_str) if log_level <= logging.ERROR else None
sp.start()
else:
stream_event_counter +=1
logger.debug(f"Failed to stream logs: {log_resp.text}")
sp.text = "💥 Failed streaming build logs. Trying again!"
break
except requests.ConnectionError as e:
if type(e.__context__) is urllib3.exceptions.ReadTimeoutError:
continue #just timed out, continue long poll
sp.text = "💥 Failed connecting to ChipFlow Cloud."
logger.debug(f"Error while streaming logs: {e}")
break
except (requests.RequestException, requests.exceptions.ReadTimeout) as e:
if type(e.__context__) is urllib3.exceptions.ReadTimeoutError:
continue #just timed out, continue long poll
logger.debug("Long poll start")
try:
log_resp = requests.get(
self._log_stream_url,
auth=("", self._chipflow_api_key),
stream=True,
timeout=(2.0, 60.0) # fail if connect takes >2s, long poll for 60s at a time
)
if log_resp.status_code == 200:
logger.debug(f"response from {self._log_stream_url}:\n{log_resp}")
for line in log_resp.iter_lines():
message = line.decode("utf-8") if line else ""
try:
level, time, step = message.split(maxsplit=2)
except ValueError:
continue

match level:
case "DEBUG":
sp.info(message) if log_level <= logging.DEBUG else None
case "INFO" | "INFO+":
sp.info(message) if log_level <= logging.INFO else None
case "WARNING":
sp.info(message) if log_level <= logging.WARNING else None
case "ERROR":
sp.info(message) if log_level <= logging.ERROR else None

if step != self._last_log_step:
sp.text = f"Build running: {self._last_log_step}"
self._last_log_step = step
else:
logger.debug(f"Failed to stream logs: {log_resp.text}")
sp.text = "💥 Failed streaming build logs. Trying again!"
logger.debug(f"Error while streaming logs: {e}")
stream_event_counter +=1
continue

# save steps so we coninue where we left off if we manage to reconnect
self._last_log_steps = steps
return stream_event_counter
return True
except requests.ConnectionError as e:
if type(e.__context__) is urllib3.exceptions.ReadTimeoutError:
return True
sp.text = "💥 Failed connecting to ChipFlow Cloud."
logger.debug(f"Error while streaming logs: {e}")
return False
except (requests.RequestException, requests.exceptions.ReadTimeout) as e:
if type(e.__context__) is urllib3.exceptions.ReadTimeoutError:
return True
sp.text = "💥 Failed streaming build logs. Trying again!"
logger.debug(f"Error while streaming logs: {e}")
return False

return True

def _stream_logs(self, sp, network_err):
sp.start("Streaming the logs...")
Expand All @@ -312,18 +316,19 @@ def _stream_logs(self, sp, network_err):
timeout = 10.0
build_status = "pending"
stream_event_counter = 0
self._last_log_steps = []
self._last_log_step = ""
assert self._chipflow_api_key is not None
while fail_counter < 10 and stream_event_counter < 10:
sp.text = f"Waiting for build to run... {build_status}"
time.sleep(timeout) # Wait before polling
sp.text = f"Waiting for build to run... {build_status}"

while fail_counter < 5:
try:
logger.debug(f"Checking build status, iteration {fail_counter}")
status_resp = requests.get(
self._build_status_url,
auth=("", self._chipflow_api_key),
timeout=timeout
)
except requests.exceptions.ReadTimeout as e:
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError) as e:
sp.text = "💥 Error connecting to ChipFlow Cloud. Trying again! "
fail_counter += 1
logger.debug(f"Failed to fetch build status{fail_counter} times: {e}")
Expand All @@ -339,22 +344,20 @@ def _stream_logs(self, sp, network_err):
build_status = status_data.get("status")
logger.debug(f"Build status: {build_status}")

sp.text = f"Polling build status... {build_status}"

if build_status == "completed":
sp.succeed("✅ Build completed successfully!")
return 0
elif build_status == "failed":
sp.succeed("❌ Build failed.")
return 1
elif build_status == "running":
stream_event_counter += self._long_poll_stream(sp, network_err)

if fail_counter >=10 or stream_event_counter >= 10:
sp.text = ""
sp.fail("💥 Failed fetching build status. Perhaps you hit a network error?")
logger.debug(f"Failed to fetch build status {fail_counter} times and failed streaming {stream_event_counter} times. Exiting.")
return 2
sp.text = f"Build status: {build_status}"
if not self._long_poll_stream(sp, network_err):
sp.text = ""
sp.fail("💥 Failed fetching build status. Perhaps you hit a network error?")
logger.debug(f"Failed to fetch build status {fail_counter} times and failed streaming {stream_event_counter} times. Exiting.")
return 2
# check status and go again

def determine_submission_name(self):
if "CHIPFLOW_SUBMISSION_NAME" in os.environ:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ test.cmd = "pytest"
test-cov.cmd = "pytest --cov=chipflow_lib --cov-report=term"
test-cov-html.cmd = "pytest --cov=chipflow_lib --cov-report=html"
test-docs.cmd = "sphinx-build -b doctest docs/ docs/_build"
lint.composite = [ "./tools/license_check.sh", "ruff check", "pyright chipflow_lib"]
lint.composite = [ "./tools/license_check.sh", "ruff check {args}", "pyright chipflow_lib"]
docs.cmd = "sphinx-build docs/ docs/_build/ -W --keep-going"
test-silicon.cmd = "pytest tests/test_silicon_platform.py tests/test_silicon_platform_additional.py tests/test_silicon_platform_amaranth.py tests/test_silicon_platform_build.py tests/test_silicon_platform_port.py --cov=chipflow_lib.platforms.silicon --cov-report=term"
_check-project.call = "tools.check_project:main"
Expand Down
Loading