Skip to content

Commit 5f1e553

Browse files
author
Arjun Balaji
committed
responding to PR comments
1 parent ea88fed commit 5f1e553

File tree

3 files changed

+29
-29
lines changed

3 files changed

+29
-29
lines changed

.github/workflows/terminal-bench.yaml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,32 @@
11
# This is a terminal-bench workflow that is manually triggered
22
# Template taken from https://github.com/actions/starter-workflows/blob/main/automation/manual.yml for reference
33

4-
name: TB workflow
4+
name: Terminal-Bench
55

66
# Controls when the action will run. Workflow runs when manually triggered using the UI
7-
# or API.
87
on:
98
workflow_dispatch:
10-
# Inputs the workflow accepts.
119
inputs:
1210
name:
13-
# Friendly description to be shown in the UI instead of 'name'
14-
description: 'Run terminal bench'
15-
# Default value if no value is explicitly provided
11+
description: 'Run terminal-bench workflow to test Q CLI in real terminal environments.'
1612
default: 'all'
17-
# Input has to be provided for the workflow to run
1813
required: true
19-
# The data type of the input
2014
type: string
2115

22-
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
2316
jobs:
2417
run-benchmark:
18+
# avoids disk storage issues
2519
runs-on: ubuntu-latest-8-cores
20+
# makes these env vars available in main.py
2621
env:
27-
FIGCHAT_GAMMA_ID: ${{ secrets.FIGCHAT_GAMMA_ID }}
22+
CHAT_DOWNLOAD_ROLE_ARN: ${{ secrets.CHAT_DOWNLOAD_ROLE_ARN }}
23+
CHAT_BUILD_BUCKET_NAME: ${{ secrets.CHAT_BUILD_BUCKET_NAME }}
2824
permissions:
2925
id-token: write
3026
contents: read
3127
steps:
28+
29+
# clear unnecessary storage to ensure docker containers have space
3230
- name: Cleanup and free disk space
3331
run: |
3432
sudo rm -rf /usr/share/dotnet
@@ -43,6 +41,7 @@ jobs:
4341
- name: Checkout repository
4442
uses: actions/checkout@v4
4543

44+
# Captures git hash of branch to query specific S3 bucket
4645
- name: Set git hash
4746
run: |
4847
if [ -n "$GITHUB_SHA" ]; then
@@ -64,18 +63,19 @@ jobs:
6463
python -m pip install --upgrade pip
6564
pip install terminal-bench
6665
66+
# OIDC enabled for github for ArjunPersonal
6767
- name: Configure AWS credentials
6868
uses: aws-actions/configure-aws-credentials@v4
6969
with:
70-
role-to-assume: arn:aws:iam::${{ secrets.AWS_TB_ROLE }}:role/ArjunPersonal
70+
role-to-assume: ${{ secrets.AWS_TB_ROLE }}
7171
aws-region: us-east-1
72-
7372

7473
- name: Run terminal benchmark
7574
run: |
7675
cd terminal-bench-test
7776
tb run --agent-import-path main:AmazonQCLIAgent --dataset-name terminal-bench-core --dataset-version head
7877
78+
# uploads results if run fails as well to allow for easy log inspection
7979
- name: Upload results
8080
if: always()
8181
uses: actions/upload-artifact@v4

terminal-bench-test/main.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,24 @@ class AmazonQCLIAgent(AbstractInstalledAgent):
1414
def name() -> str:
1515
return "Amazon Q CLI"
1616

17-
def __init__(self, model_name: str | None = None, *args, **kwargs):
17+
def __init__(self, *args, **kwargs):
1818
super().__init__(*args, **kwargs)
19-
self._model_name = model_name
20-
self._start_url = 'https://amzn.awsapps.com/start'
21-
self.region = 'us-east-1'
2219

20+
"""
21+
Makes necessary env vars available in docker containers
22+
"""
2323
@property
2424
def _env(self) -> dict[str, str]:
2525
# SIGv4 = 1 for AWS credentials
26-
env = {}
27-
env["AMAZON_Q_SIGV4"] = 1
28-
env["FIGCHAT_GAMMA_ID"] = os.environ.get("FIGCHAT_GAMMA_ID", '')
29-
env["AWS_ACCESS_KEY_ID"] = os.environ.get("AWS_ACCESS_KEY_ID", '')
30-
env["AWS_SECRET_ACCESS_KEY"] = os.environ.get("AWS_SECRET_ACCESS_KEY", '')
31-
env["AWS_SESSION_TOKEN"] = os.environ.get("AWS_SESSION_TOKEN", '')
32-
env["GIT_HASH"] = os.environ.get("GIT_HASH", '')
26+
env = {
27+
"AMAZON_Q_SIGV4": 1,
28+
"AWS_ACCESS_KEY_ID": os.environ.get("AWS_ACCESS_KEY_ID", ''),
29+
"AWS_SECRET_ACCESS_KEY": os.environ.get("AWS_SECRET_ACCESS_KEY", ''),
30+
"AWS_SESSION_TOKEN": os.environ.get("AWS_SESSION_TOKEN", ''),
31+
"GIT_HASH": os.environ.get("GIT_HASH", ''),
32+
"CHAT_DOWNLOAD_ROLE_ARN": os.environ.get("CHAT_DOWNLOAD_ROLE_ARN", ''),
33+
"CHAT_BUILD_BUCKET_NAME": os.environ.get("CHAT_BUILD_BUCKET_NAME", '')
34+
}
3335
return env
3436

3537
@property
@@ -40,11 +42,10 @@ def _run_agent_commands(self, task_description: str) -> list[TerminalCommand]:
4042
escaped_description = shlex.quote(task_description)
4143

4244
return [
43-
# q chat with 30 min max timeout and also we wait on input. Using qchat cuz sigv4.
44-
# non-interactive for now --> check if needed or not
45+
# q chat with 30 min max timeout and also we wait on input. Using qchat because of sigv4.
4546
TerminalCommand(
4647
command=f"qchat chat --no-interactive --trust-all-tools {escaped_description}",
47-
max_timeout_sec=370,
48+
max_timeout_sec=1800,
4849
block=True,
4950
)
5051
]

terminal-bench-test/setup_amazon_q.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,20 +27,19 @@ chmod 600 ~/.aws/config
2727

2828
# Assume role and capture temporary credentials --> needed for s3 bucket access for build
2929
echo "Assuming AWS s3 role"
30-
TEMP_CREDENTIALS=$(aws sts assume-role --role-arn arn:aws:iam::${FIGCHAT_GAMMA_ID}:role/FigIoChat-S3Access-Role-Gamma --role-session-name S3AccessSession 2>/dev/null || echo '{}')
30+
TEMP_CREDENTIALS=$(aws sts assume-role --role-arn ${CHAT_DOWNLOAD_ROLE_ARN} --role-session-name S3AccessSession 2>/dev/null || echo '{}')
3131
QCHAT_ACCESSKEY=$(echo $TEMP_CREDENTIALS | jq -r '.Credentials.AccessKeyId')
3232
Q_SECRET_ACCESS_KEY=$(echo $TEMP_CREDENTIALS | jq -r '.Credentials.SecretAccessKey')
3333
Q_SESSION_TOKEN=$(echo $TEMP_CREDENTIALS | jq -r '.Credentials.SessionToken')
3434

3535
# Download specific build from S3 based on commit hash
3636
echo "Downloading Amazon Q CLI build from S3..."
37-
S3_BUCKET="fig-io-chat-build-output-${FIGCHAT_GAMMA_ID}-us-east-1"
3837
S3_PREFIX="main/${GIT_HASH}/x86_64-unknown-linux-musl"
3938
echo "Downloading qchat.zip from s3://.../${S3_PREFIX}/qchat.zip"
4039

4140
# Try download, if hash is invalid we fail.
4241
AWS_ACCESS_KEY_ID="$QCHAT_ACCESSKEY" AWS_SECRET_ACCESS_KEY="$Q_SECRET_ACCESS_KEY" AWS_SESSION_TOKEN="$Q_SESSION_TOKEN" \
43-
aws s3 cp s3://${S3_BUCKET}/${S3_PREFIX}/qchat.zip ./qchat.zip --region us-east-1
42+
aws s3 cp s3://${CHAT_BUILD_BUCKET_NAME}/${S3_PREFIX}/qchat.zip ./qchat.zip --region us-east-1
4443

4544
# Handle the zip file, copy the qchat executable to /usr/local/bin + symlink from old code
4645
echo "Extracting qchat.zip..."

0 commit comments

Comments
 (0)