Skip to content

Commit 2fb8b30

Browse files
authored
Keep going display on HUD: lambda to trigger log classifier when temp log comes in (#6811)
Please approve/merge https://github.com/pytorch/test-infra/pull/6809/files first See the PR mentioned in the README for context The lambda is really simple but idk how else to trigger log classifier whenever something gets uploaded to s3 cuz the log classifier is usually triggered just by curling
1 parent a051819 commit 2fb8b30

File tree

5 files changed

+107
-1
lines changed

5 files changed

+107
-1
lines changed

.github/workflows/_lambda-do-release-runners.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ jobs:
8989
{ dir-name: 'oss_ci_job_queue_time', zip-name: 'oss-ci-job-queue-time' },
9090
{ dir-name: 'oss_ci_cur', zip-name: 'oss-ci-cur' },
9191
{ dir-name: 'benchmark-results-uploader', zip-name: 'benchmark-results-uploader' },
92-
{ dir-name: 'pytorch-auto-revert', zip-name: 'pytorch-auto-revert' }
92+
{ dir-name: 'pytorch-auto-revert', zip-name: 'pytorch-auto-revert' },
93+
{ dir-name: 'keep-going-call-log-classifier', zip-name: 'keep-going-call-log-classifier' },
9394
]
9495
name: Upload Release for ${{ matrix.dir-name }} lambda
9596
runs-on: ubuntu-latest
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
all: run-local
2+
3+
clean:
4+
rm -rf deployment
5+
rm -rf venv
6+
rm -rf deployment.zip
7+
8+
venv/bin/python:
9+
virtualenv venv
10+
11+
deployment.zip:
12+
mkdir -p deployment
13+
cp lambda_function.py ./deployment/.
14+
cd ./deployment && zip -q -r ../deployment.zip .
15+
16+
.PHONY: create-deployment-package
17+
create-deployment-package: deployment.zip
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Keep Going Call Log Classifier
2+
3+
This is a simple AWS Lambda function that uploads the temporary values for
4+
keep-going in DynamoDB and ClickHouse by calling the log classifier when a new
5+
object is added to `s3://gha-artifacts/temp_logs`.
6+
7+
Please see https://github.com/pytorch/pytorch/pull/155371 for more context.
8+
9+
## Testing
10+
11+
To test the Lambda function locally:
12+
13+
```bash
14+
# Run test
15+
python test_lambda_function.py
16+
```
17+
18+
Page maintainers: @pytorch/pytorch-dev-infra
19+
Last verified: 2025-06-24
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from typing import Any
2+
from urllib.error import HTTPError
3+
from urllib.request import urlopen
4+
5+
6+
LOG_CLASSIFIER_URL = (
7+
"https://vwg52br27lx5oymv4ouejwf4re0akoeg.lambda-url.us-east-1.on.aws/"
8+
)
9+
PYTORCH_REPO = "pytorch/pytorch"
10+
11+
12+
def lambda_handler(event: Any, context: Any) -> None:
13+
# Entry point for the lambda function
14+
for record in event["Records"]:
15+
key = record["s3"]["object"]["key"]
16+
job_id = key.split("/")[-1]
17+
try:
18+
job_id = int(job_id)
19+
except ValueError:
20+
print(f"Failed to convert job id into int job_id={job_id}, key={key}")
21+
continue
22+
try:
23+
urlopen(
24+
f"{LOG_CLASSIFIER_URL}?job_id={job_id}&repo={PYTORCH_REPO}&temp_log=true"
25+
)
26+
except HTTPError as e:
27+
print(
28+
f"Failed to call log classifier for job_id={job_id}, key={key}, error={e}"
29+
)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import unittest
2+
from unittest.mock import patch
3+
4+
from lambda_function import lambda_handler, LOG_CLASSIFIER_URL
5+
6+
7+
def get_test_event(job_id: int) -> dict:
8+
return {
9+
"Records": [
10+
{
11+
"s3": {
12+
"bucket": {"name": "gha-artifacts"},
13+
"object": {"key": f"temp_logs/{job_id}"},
14+
}
15+
}
16+
]
17+
}
18+
19+
20+
class TestKeepGoingUploadCallLogClassifier(unittest.TestCase):
21+
def test_lambda_handler(self):
22+
# Mock urlopen to avoid actual HTTP requests
23+
# Check urlopen called with the expected URL
24+
expected_url = (
25+
f"{LOG_CLASSIFIER_URL}?job_id=123345&repo=pytorch/pytorch&temp_log=true"
26+
)
27+
with patch("lambda_function.urlopen") as mock_urlopen:
28+
lambda_handler(get_test_event(123345), None)
29+
mock_urlopen.assert_called_once_with(expected_url)
30+
31+
def test_fails_with_invalid_job_id(self):
32+
# Mock urlopen to avoid actual HTTP requests
33+
# Check urlopen called with the expected URL
34+
with patch("lambda_function.urlopen") as mock_urlopen:
35+
lambda_handler(get_test_event("not a number"), None)
36+
mock_urlopen.assert_not_called()
37+
38+
39+
if __name__ == "__main__":
40+
unittest.main()

0 commit comments

Comments
 (0)