-
Notifications
You must be signed in to change notification settings - Fork 12
Add S3 storage option to diet link with presigned URLs #111
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,11 +2,34 @@ | |||||||
| from lib.logging_utils import init_logger | ||||||||
| import json | ||||||||
| import requests | ||||||||
| import uuid | ||||||||
| import boto3 | ||||||||
| from botocore.exceptions import ClientError | ||||||||
| from typing import Dict, List, Any, Optional | ||||||||
|
|
||||||||
| logger = init_logger(__name__) | ||||||||
| logger.info("MDO THIS SHOULD PRINT") | ||||||||
|
|
||||||||
| _REDACTED = "[REDACTED]" | ||||||||
|
|
||||||||
|
|
||||||||
| def _redact_option_value(key: str, value: Any) -> Any: | ||||||||
| """ | ||||||||
| Redact sensitive option values before logging. | ||||||||
|
|
||||||||
| This prevents leaking secrets (for example AWS credentials) into logs. | ||||||||
| """ | ||||||||
| key_l = (key or "").lower() | ||||||||
| if ( | ||||||||
| key_l == "aws_secret_access_key" | ||||||||
| or "secret" in key_l | ||||||||
| or "password" in key_l | ||||||||
| or "token" in key_l | ||||||||
| or key_l.endswith("_secret") | ||||||||
| ): | ||||||||
| return _REDACTED | ||||||||
| return value | ||||||||
|
|
||||||||
|
|
||||||||
| # Default options that control which elements to remove | ||||||||
| default_options = { | ||||||||
|
|
@@ -15,16 +38,101 @@ | |||||||
| "remove_analysis": False, # Remove all analysis data | ||||||||
| "remove_attachment_types": [], # List of attachment types to remove (e.g., ["image/jpeg", "audio/mp3"]) | ||||||||
| "remove_system_prompts": False, # Remove system_prompt keys to prevent LLM instruction insertion | ||||||||
| # S3 storage options for dialog bodies | ||||||||
| "s3_bucket": "", # S3 bucket name for storing dialog bodies | ||||||||
| "s3_path": "", # Optional path prefix within the bucket | ||||||||
| "aws_access_key_id": "", # AWS access key ID | ||||||||
| "aws_secret_access_key": "", # AWS secret access key | ||||||||
| "aws_region": "us-east-1", # AWS region (default: us-east-1) | ||||||||
| "presigned_url_expiration": None, # Presigned URL expiration in seconds (None = no expiration/default 1 hour) | ||||||||
| } | ||||||||
|
|
||||||||
|
|
||||||||
| def _get_s3_client(options: Dict[str, Any]): | ||||||||
| """Create and return an S3 client with the provided credentials.""" | ||||||||
| return boto3.client( | ||||||||
| "s3", | ||||||||
| aws_access_key_id=options["aws_access_key_id"], | ||||||||
| aws_secret_access_key=options["aws_secret_access_key"], | ||||||||
| region_name=options.get("aws_region", "us-east-1"), | ||||||||
| ) | ||||||||
|
Comment on lines
+51
to
+58
|
||||||||
|
|
||||||||
|
|
||||||||
| def _upload_to_s3_and_get_presigned_url( | ||||||||
| content: str, | ||||||||
| vcon_uuid: str, | ||||||||
| dialog_id: str, | ||||||||
| options: Dict[str, Any] | ||||||||
| ) -> Optional[str]: | ||||||||
| """ | ||||||||
| Upload dialog body content to S3 and return a presigned URL. | ||||||||
|
|
||||||||
| Args: | ||||||||
| content: The dialog body content to upload | ||||||||
| vcon_uuid: The vCon UUID | ||||||||
| dialog_id: The dialog ID | ||||||||
| options: Configuration options including S3 credentials and bucket info | ||||||||
|
|
||||||||
| Returns: | ||||||||
| Presigned URL to access the uploaded content, or None if upload fails | ||||||||
| """ | ||||||||
| try: | ||||||||
| s3 = _get_s3_client(options) | ||||||||
|
|
||||||||
| # Generate a unique key for this dialog body | ||||||||
| unique_id = str(uuid.uuid4()) | ||||||||
| key = f"{dialog_id}_{unique_id}.txt" if dialog_id else f"{unique_id}.txt" | ||||||||
|
|
||||||||
| # Add vcon_uuid as a directory level | ||||||||
| key = f"{vcon_uuid}/{key}" | ||||||||
|
|
||||||||
| # Add optional path prefix | ||||||||
| if options.get("s3_path"): | ||||||||
| key = f"{options['s3_path']}/{key}" | ||||||||
|
|
||||||||
| bucket = options["s3_bucket"] | ||||||||
|
|
||||||||
| # Upload the content | ||||||||
| s3.put_object( | ||||||||
| Bucket=bucket, | ||||||||
| Key=key, | ||||||||
| Body=content.encode("utf-8") if isinstance(content, str) else content, | ||||||||
| ContentType="text/plain", | ||||||||
|
Comment on lines
+96
to
+100
|
||||||||
| ) | ||||||||
|
|
||||||||
| logger.info(f"Successfully uploaded dialog body to s3://{bucket}/{key}") | ||||||||
|
|
||||||||
| # Generate presigned URL | ||||||||
| expiration = options.get("presigned_url_expiration") | ||||||||
| if expiration is None: | ||||||||
| # Default to 1 hour (3600 seconds) if not specified | ||||||||
| expiration = 3600 | ||||||||
|
|
||||||||
| presigned_url = s3.generate_presigned_url( | ||||||||
| "get_object", | ||||||||
| Params={"Bucket": bucket, "Key": key}, | ||||||||
| ExpiresIn=expiration, | ||||||||
| ) | ||||||||
|
Comment on lines
+106
to
+115
|
||||||||
|
|
||||||||
| logger.info(f"Generated presigned URL with expiration {expiration}s") | ||||||||
| return presigned_url | ||||||||
|
|
||||||||
| except ClientError as e: | ||||||||
| logger.error(f"S3 client error uploading dialog body: {e}") | ||||||||
| return None | ||||||||
| except Exception as e: | ||||||||
| logger.error(f"Exception uploading dialog body to S3: {e}") | ||||||||
|
||||||||
| logger.error(f"Exception uploading dialog body to S3: {e}") | |
| logger.exception(f"Unexpected exception uploading dialog body to S3: {type(e).__name__}: {e}") |
Copilot
AI
Dec 11, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When S3 upload fails and the body is cleared, the body_type field is not updated or removed. This could leave dialogs with body_type="url" but an empty body, which is an inconsistent state. Consider either removing the body_type field or setting it to an appropriate value (like an empty string) when the upload fails.
| dialog["body"] = "" | |
| dialog["body"] = "" | |
| dialog["body_type"] = "" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment says "None = no expiration/default 1 hour" which is misleading. According to the implementation (lines 86-89), None doesn't mean "no expiration" - it means the default of 3600 seconds (1 hour) will be used. The comment should be clarified to say "None = default 1 hour" or "optional, defaults to 3600 seconds (1 hour)".