Skip to content

Commit bbc7543

Browse files
author
Bob Strahan
committed
Add configurable S3 vector index properties with dimension, distance metric, and metadata configuration
1 parent 185cdc7 commit bbc7543

File tree

2 files changed

+76
-25
lines changed

2 files changed

+76
-25
lines changed

options/bedrockkb/src/s3_vectors_manager/handler.py

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,29 @@ def handle_s3_vector_resources(event, context, properties):
105105
region = properties.get('Region', '')
106106
kms_key_arn = properties.get('KmsKeyArn', '')
107107

108+
# Extract configurable index properties
109+
vector_dimension = properties.get('VectorDimension', 1024)
110+
distance_metric = properties.get('DistanceMetric', 'cosine')
111+
metadata_configuration = properties.get('MetadataConfiguration', {
112+
'nonFilterableMetadataKeys': ['AMAZON_BEDROCK_METADATA', 'AMAZON_BEDROCK_TEXT']
113+
})
114+
108115
logger.info(f"Raw bucket name: {raw_bucket_name}, Sanitized bucket name: {bucket_name}")
116+
logger.info(f"Index properties - Dimension: {vector_dimension}, Metric: {distance_metric}, Metadata: {metadata_configuration}")
109117

110118
# Initialize S3 Vectors client
111119
s3vectors_client = boto3.client('s3vectors', region_name=region)
112120

121+
# Create index config for passing to functions
122+
index_config = {
123+
'dimension': vector_dimension,
124+
'distance_metric': distance_metric,
125+
'metadata_configuration': metadata_configuration
126+
}
127+
113128
if request_type == 'Create':
114129
logger.info(f"Creating S3 Vector bucket: {bucket_name}")
115-
return create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn)
130+
return create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn, index_config)
116131

117132
elif request_type == 'Update':
118133
logger.info(f"Updating S3 Vector bucket: {bucket_name}")
@@ -125,10 +140,10 @@ def handle_s3_vector_resources(event, context, properties):
125140
if old_bucket_name != bucket_name or old_index_name != index_name:
126141
if old_bucket_name and old_index_name:
127142
delete_s3_vector_resources(s3vectors_client, old_bucket_name, old_index_name)
128-
return create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn)
143+
return create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn, index_config)
129144
else:
130145
# Names haven't changed - update existing resources (recreate index)
131-
return update_s3_vector_info(s3vectors_client, bucket_name, index_name)
146+
return update_s3_vector_info(s3vectors_client, bucket_name, index_name, index_config)
132147

133148
elif request_type == 'Delete':
134149
logger.info(f"Deleting S3 Vector bucket: {bucket_name}")
@@ -347,23 +362,25 @@ def delete_vector_index(s3vectors_client, bucket_name, index_name):
347362
raise
348363

349364

350-
def create_vector_index(s3vectors_client, bucket_name, index_name):
351-
"""Create a vector index with standard configuration for Bedrock Knowledge Base integration."""
365+
def create_vector_index(s3vectors_client, bucket_name, index_name, dimension=1024, distance_metric="cosine", metadata_configuration=None):
366+
"""Create a vector index with configurable settings for Bedrock Knowledge Base integration."""
352367
try:
353368
logger.info(f"Creating vector index: {index_name} in bucket: {bucket_name}")
369+
logger.info(f"Index configuration - Dimension: {dimension}, Distance Metric: {distance_metric}, Metadata Config: {metadata_configuration}")
370+
371+
# Default metadata configuration if none provided
372+
if metadata_configuration is None:
373+
metadata_configuration = {
374+
"nonFilterableMetadataKeys": ["AMAZON_BEDROCK_METADATA", "AMAZON_BEDROCK_TEXT"]
375+
}
354376

355377
index_response = s3vectors_client.create_index(
356378
vectorBucketName=bucket_name,
357379
indexName=index_name,
358380
dataType="float32",
359-
dimension=1024, # All embedding models in picklist output 1024
360-
distanceMetric="cosine",
361-
metadataConfiguration={
362-
"nonFilterableMetadataKeys": [
363-
"AMAZON_BEDROCK_METADATA",
364-
"AMAZON_BEDROCK_TEXT_CHUNK"
365-
]
366-
}
381+
dimension=int(dimension),
382+
distanceMetric=distance_metric,
383+
metadataConfiguration=metadata_configuration
367384
)
368385
logger.info(f"Successfully created vector index: {index_name}")
369386
return index_response
@@ -378,23 +395,30 @@ def create_vector_index(s3vectors_client, bucket_name, index_name):
378395
raise
379396

380397

381-
def recreate_vector_index(s3vectors_client, bucket_name, index_name):
398+
def recreate_vector_index(s3vectors_client, bucket_name, index_name, index_config):
382399
"""Delete and recreate a vector index to ensure fresh configuration."""
383400
try:
384401
logger.info(f"Recreating vector index: {index_name} in bucket: {bucket_name}")
385402

386403
# Delete existing index if it exists
387404
delete_vector_index(s3vectors_client, bucket_name, index_name)
388405

389-
# Create new index
390-
return create_vector_index(s3vectors_client, bucket_name, index_name)
406+
# Create new index with configuration
407+
return create_vector_index(
408+
s3vectors_client,
409+
bucket_name,
410+
index_name,
411+
dimension=index_config['dimension'],
412+
distance_metric=index_config['distance_metric'],
413+
metadata_configuration=index_config['metadata_configuration']
414+
)
391415

392416
except Exception as e:
393417
logger.error(f"Error recreating vector index {index_name}: {e}")
394418
raise
395419

396420

397-
def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn=None):
421+
def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn=None, index_config=None):
398422
"""Create S3 Vector bucket and index following Console approach."""
399423
try:
400424
# Get region from client for ARN construction
@@ -418,8 +442,18 @@ def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedd
418442
bucket_response = s3vectors_client.create_vector_bucket(**create_bucket_params)
419443
logger.info(f"Created vector bucket: {bucket_name}")
420444

421-
# Create S3 Vector Index using modular function
422-
create_vector_index(s3vectors_client, bucket_name, index_name)
445+
# Create S3 Vector Index using modular function with configuration
446+
if index_config:
447+
create_vector_index(
448+
s3vectors_client,
449+
bucket_name,
450+
index_name,
451+
dimension=index_config['dimension'],
452+
distance_metric=index_config['distance_metric'],
453+
metadata_configuration=index_config['metadata_configuration']
454+
)
455+
else:
456+
create_vector_index(s3vectors_client, bucket_name, index_name)
423457

424458
# Construct ARNs
425459
sts_client = boto3.client('sts', region_name=region)
@@ -447,8 +481,8 @@ def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedd
447481
error_code = e.response['Error']['Code']
448482
if error_code in ['BucketAlreadyExists', 'ConflictException']:
449483
logger.warning(f"Vector resource already exists: {e}")
450-
# Try to get existing resource info
451-
return get_s3_vector_info(s3vectors_client, bucket_name, index_name)
484+
# Try to get existing resource info (need to pass index_config here too)
485+
return get_s3_vector_info(s3vectors_client, bucket_name, index_name, index_config)
452486
else:
453487
raise
454488

@@ -488,7 +522,7 @@ def delete_s3_vector_resources(s3vectors_client, bucket_name, index_name):
488522
logger.warning(f"Error during deletion (continuing): {e}")
489523

490524

491-
def get_s3_vector_info(s3vectors_client, bucket_name, index_name):
525+
def get_s3_vector_info(s3vectors_client, bucket_name, index_name, index_config=None):
492526
"""Get information about existing S3 Vector bucket and ensure index exists."""
493527
try:
494528
# Get bucket info
@@ -509,7 +543,17 @@ def get_s3_vector_info(s3vectors_client, bucket_name, index_name):
509543
# Always attempt to create the index - if it exists, we'll get ConflictException
510544
# This is more robust than trying to check existence with potentially non-existent API methods
511545
logger.info(f"Ensuring vector index exists: {index_name}")
512-
index_created = create_vector_index(s3vectors_client, bucket_name, index_name)
546+
if index_config:
547+
index_created = create_vector_index(
548+
s3vectors_client,
549+
bucket_name,
550+
index_name,
551+
dimension=index_config['dimension'],
552+
distance_metric=index_config['distance_metric'],
553+
metadata_configuration=index_config['metadata_configuration']
554+
)
555+
else:
556+
index_created = create_vector_index(s3vectors_client, bucket_name, index_name)
513557

514558
# Construct index ARN (required for Knowledge Base configuration)
515559
index_arn = f"arn:aws:s3vectors:{region}:{account_id}:bucket/{bucket_name}/index/{index_name}"
@@ -530,7 +574,7 @@ def get_s3_vector_info(s3vectors_client, bucket_name, index_name):
530574
raise
531575

532576

533-
def update_s3_vector_info(s3vectors_client, bucket_name, index_name):
577+
def update_s3_vector_info(s3vectors_client, bucket_name, index_name, index_config):
534578
"""Update existing S3 Vector resources by recreating the index."""
535579
try:
536580
# Get bucket info
@@ -550,7 +594,7 @@ def update_s3_vector_info(s3vectors_client, bucket_name, index_name):
550594

551595
# For updates, always recreate the index to ensure fresh configuration
552596
logger.info(f"Recreating vector index for update: {index_name}")
553-
recreate_vector_index(s3vectors_client, bucket_name, index_name)
597+
recreate_vector_index(s3vectors_client, bucket_name, index_name, index_config)
554598

555599
# Construct index ARN (required for Knowledge Base configuration)
556600
index_arn = f"arn:aws:s3vectors:{region}:{account_id}:bucket/{bucket_name}/index/{index_name}"

options/bedrockkb/template.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Parameters:
4343
Description: >-
4444
Name for S3 vector index. Only used when pVectorStoreType is S3_VECTORS.
4545
46+
4647
pKnowledgeBaseBucketName:
4748
Type: String
4849
Default: ""
@@ -476,6 +477,12 @@ Resources:
476477
EmbeddingModel: !Ref pEmbedModel
477478
Region: !Ref AWS::Region
478479
KmsKeyArn: !If [IsCustomerManagedKey, !Ref pCustomerManagedEncryptionKeyArn, !Ref "AWS::NoValue"]
480+
VectorDimension: 1024
481+
DistanceMetric: "cosine"
482+
MetadataConfiguration:
483+
nonFilterableMetadataKeys:
484+
- "AMAZON_BEDROCK_METADATA"
485+
- "AMAZON_BEDROCK_TEXT_CHUNK"
479486

480487
#
481488
# OpenSearch Serverless resources (only when using OpenSearch)

0 commit comments

Comments
 (0)