|
| 1 | +"""Script to upload benchmark results to S3.""" |
| 2 | + |
| 3 | +import json |
| 4 | +import logging |
| 5 | +import os |
| 6 | +import sys |
| 7 | + |
| 8 | +import boto3 |
| 9 | +from botocore.exceptions import ClientError |
| 10 | + |
| 11 | +from sdgym.result_writer import S3ResultsWriter |
| 12 | +from sdgym.run_benchmark.utils import OUTPUT_DESTINATION_AWS |
| 13 | +from sdgym.s3 import S3_REGION, parse_s3_path |
| 14 | +from sdgym.sdgym_result_explorer.result_explorer import SDGymResultsExplorer |
| 15 | + |
| 16 | +LOGGER = logging.getLogger(__name__) |
| 17 | + |
| 18 | + |
| 19 | +def get_latest_run_from_file(s3_client, bucket, key): |
| 20 | + """Get the latest run folder name from the benchmark dates file in S3.""" |
| 21 | + try: |
| 22 | + object = s3_client.get_object(Bucket=bucket, Key=key) |
| 23 | + body = object['Body'].read().decode('utf-8') |
| 24 | + data = json.loads(body) |
| 25 | + latest = sorted(data['runs'], key=lambda x: x['date'])[-1] |
| 26 | + return latest['folder_name'] |
| 27 | + except s3_client.exceptions.ClientError as e: |
| 28 | + raise RuntimeError(f'Failed to read {key} from S3: {e}') |
| 29 | + |
| 30 | + |
| 31 | +def write_uploaded_marker(s3_client, bucket, prefix, folder_name): |
| 32 | + """Write a marker file to indicate that the upload is complete.""" |
| 33 | + s3_client.put_object( |
| 34 | + Bucket=bucket, Key=f'{prefix}{folder_name}/upload_complete.marker', Body=b'Upload complete' |
| 35 | + ) |
| 36 | + |
| 37 | + |
| 38 | +def upload_already_done(s3_client, bucket, prefix, folder_name): |
| 39 | + """Check if the upload has already been done by looking for the marker file.""" |
| 40 | + try: |
| 41 | + s3_client.head_object(Bucket=bucket, Key=f'{prefix}{folder_name}/upload_complete.marker') |
| 42 | + return True |
| 43 | + except ClientError as e: |
| 44 | + if e.response['Error']['Code'] == '404': |
| 45 | + return False |
| 46 | + |
| 47 | + raise |
| 48 | + |
| 49 | + |
| 50 | +def get_result_folder_name_and_s3_vars(aws_access_key_id, aws_secret_access_key): |
| 51 | + """Get the result folder name and S3 client variables.""" |
| 52 | + bucket, prefix = parse_s3_path(OUTPUT_DESTINATION_AWS) |
| 53 | + s3_client = boto3.client( |
| 54 | + 's3', |
| 55 | + aws_access_key_id=aws_access_key_id, |
| 56 | + aws_secret_access_key=aws_secret_access_key, |
| 57 | + region_name=S3_REGION, |
| 58 | + ) |
| 59 | + folder_name = get_latest_run_from_file(s3_client, bucket, f'{prefix}_BENCHMARK_DATES.json') |
| 60 | + |
| 61 | + return folder_name, s3_client, bucket, prefix |
| 62 | + |
| 63 | + |
| 64 | +def upload_results( |
| 65 | + aws_access_key_id, aws_secret_access_key, folder_name, s3_client, bucket, prefix |
| 66 | +): |
| 67 | + """Upload benchmark results to S3.""" |
| 68 | + result_explorer = SDGymResultsExplorer( |
| 69 | + OUTPUT_DESTINATION_AWS, |
| 70 | + aws_access_key_id=aws_access_key_id, |
| 71 | + aws_secret_access_key=aws_secret_access_key, |
| 72 | + ) |
| 73 | + result_writer = S3ResultsWriter(s3_client) |
| 74 | + |
| 75 | + if not result_explorer.all_runs_complete(folder_name): |
| 76 | + LOGGER.warning(f'Run {folder_name} is not complete yet. Exiting.') |
| 77 | + sys.exit(0) |
| 78 | + |
| 79 | + LOGGER.info(f'Run {folder_name} is complete! Proceeding with summarization...') |
| 80 | + summary, _ = result_explorer.summarize(folder_name) |
| 81 | + result_writer.write_dataframe( |
| 82 | + summary, f'{OUTPUT_DESTINATION_AWS}{folder_name}/{folder_name}_summary.csv', index=True |
| 83 | + ) |
| 84 | + write_uploaded_marker(s3_client, bucket, prefix, folder_name) |
| 85 | + |
| 86 | + |
| 87 | +def main(): |
| 88 | + """Main function to upload benchmark results.""" |
| 89 | + aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID') |
| 90 | + aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY') |
| 91 | + folder_name, s3_client, bucket, prefix = get_result_folder_name_and_s3_vars( |
| 92 | + aws_access_key_id, aws_secret_access_key |
| 93 | + ) |
| 94 | + if upload_already_done(s3_client, bucket, prefix, folder_name): |
| 95 | + LOGGER.warning('Benchmark results have already been uploaded. Exiting.') |
| 96 | + sys.exit(0) |
| 97 | + |
| 98 | + upload_results(aws_access_key_id, aws_secret_access_key, folder_name, s3_client, bucket, prefix) |
| 99 | + |
| 100 | + |
| 101 | +if __name__ == '__main__': |
| 102 | + main() |
0 commit comments