Skip to content

Commit 910b9a5

Browse files
author
Bob Strahan
committed
fix: refactor S3 vector index creation and ensure index exists in get_s3_vector_info
1 parent 9d13cd8 commit 910b9a5

File tree

1 file changed

+75
-25
lines changed
  • options/bedrockkb/src/s3_vectors_manager

1 file changed

+75
-25
lines changed

options/bedrockkb/src/s3_vectors_manager/handler.py

Lines changed: 75 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,39 @@ def get_knowledge_base_info(bedrock_agent_client, kb_id):
325325
raise
326326

327327

328+
def create_vector_index(s3vectors_client, bucket_name, index_name):
329+
"""Create a vector index with standard configuration for Bedrock Knowledge Base integration."""
330+
try:
331+
logger.info(f"Creating vector index: {index_name} in bucket: {bucket_name}")
332+
333+
index_response = s3vectors_client.create_index(
334+
vectorBucketName=bucket_name,
335+
indexName=index_name,
336+
dataType="float32",
337+
dimension=1024, # All embedding models in picklist output 1024
338+
distanceMetric="cosine",
339+
metadataConfiguration={
340+
"nonFilterableMetadataKeys": [
341+
"AMAZON_BEDROCK_METADATA",
342+
"AMAZON_BEDROCK_TEXT_CHUNK"
343+
]
344+
}
345+
)
346+
logger.info(f"Successfully created vector index: {index_name}")
347+
return index_response
348+
349+
except ClientError as e:
350+
if e.response['Error']['Code'] == 'ConflictException':
351+
# Index already exists or was created by another process
352+
logger.info(f"Index {index_name} already exists or was created by another process")
353+
return None
354+
else:
355+
logger.error(f"Error creating vector index {index_name}: {e}")
356+
raise
357+
358+
328359
def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn=None):
329-
"""Create S3 Vector bucket and index following Console approach (manual index creation required)."""
360+
"""Create S3 Vector bucket and index following Console approach."""
330361
try:
331362
# Get region from client for ARN construction
332363
region = s3vectors_client.meta.region_name
@@ -349,23 +380,8 @@ def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedd
349380
bucket_response = s3vectors_client.create_vector_bucket(**create_bucket_params)
350381
logger.info(f"Created vector bucket: {bucket_name}")
351382

352-
# Create S3 Vector Index (required for Knowledge Base integration)
353-
logger.info(f"Creating vector index: {index_name}")
354-
355-
index_response = s3vectors_client.create_index(
356-
vectorBucketName=bucket_name,
357-
indexName=index_name,
358-
dataType="float32",
359-
dimension=1024, # All embedding models in picklist output 1024
360-
distanceMetric="cosine",
361-
metadataConfiguration={
362-
"nonFilterableMetadataKeys": [
363-
"AMAZON_BEDROCK_METADATA",
364-
"AMAZON_BEDROCK_TEXT_CHUNK"
365-
]
366-
}
367-
)
368-
logger.info(f"Created vector index: {index_name}")
383+
# Create S3 Vector Index using modular function
384+
create_vector_index(s3vectors_client, bucket_name, index_name)
369385

370386
# Construct ARNs
371387
sts_client = boto3.client('sts', region_name=region)
@@ -435,24 +451,58 @@ def delete_s3_vector_resources(s3vectors_client, bucket_name, index_name):
435451

436452

437453
def get_s3_vector_info(s3vectors_client, bucket_name, index_name):
438-
"""Get information about existing S3 Vector bucket. Bedrock manages the index."""
454+
"""Get information about existing S3 Vector bucket and ensure index exists."""
439455
try:
440456
# Get bucket info
441457
bucket_response = s3vectors_client.get_vector_bucket(vectorBucketName=bucket_name)
442458
bucket_arn = bucket_response.get('BucketArn')
443459

460+
# Get region and account ID for ARN construction
461+
region = s3vectors_client.meta.region_name
462+
sts_client = boto3.client('sts', region_name=region)
463+
account_id = sts_client.get_caller_identity()['Account']
464+
444465
# Construct bucket ARN if not returned in response
445466
if not bucket_arn:
446-
# Get account ID from STS for ARN construction
447-
sts_client = boto3.client('sts', region_name=s3vectors_client.meta.region_name)
448-
account_id = sts_client.get_caller_identity()['Account']
449-
bucket_arn = f"arn:aws:s3vectors:{s3vectors_client.meta.region_name}:{account_id}:bucket/{bucket_name}"
467+
bucket_arn = f"arn:aws:s3vectors:{region}:{account_id}:bucket/{bucket_name}"
468+
469+
logger.info(f"Found existing vector bucket ARN: {bucket_arn}")
470+
471+
# Check if index exists, create if it doesn't
472+
index_exists = False
473+
try:
474+
# Try to describe the index to see if it exists
475+
index_response = s3vectors_client.describe_index(
476+
vectorBucketName=bucket_name,
477+
indexName=index_name
478+
)
479+
index_exists = True
480+
logger.info(f"Found existing vector index: {index_name}")
481+
482+
except ClientError as e:
483+
if e.response['Error']['Code'] in ['IndexNotFound', 'ResourceNotFoundException']:
484+
logger.info(f"Index {index_name} not found in bucket {bucket_name}, will create it")
485+
index_exists = False
486+
else:
487+
logger.error(f"Error checking index existence: {e}")
488+
raise
489+
490+
# Create index if it doesn't exist using modular function
491+
if not index_exists:
492+
create_vector_index(s3vectors_client, bucket_name, index_name)
493+
494+
# Construct index ARN (required for Knowledge Base configuration)
495+
index_arn = f"arn:aws:s3vectors:{region}:{account_id}:bucket/{bucket_name}/index/{index_name}"
496+
497+
logger.info(f"Vector bucket ARN: {bucket_arn}")
498+
logger.info(f"Vector index ARN: {index_arn}")
450499

451500
return {
452501
'BucketName': bucket_name,
453502
'BucketArn': bucket_arn,
454-
'IndexName': index_name, # Index name for Bedrock to use
455-
'Status': 'Existing'
503+
'IndexName': index_name,
504+
'IndexArn': index_arn,
505+
'Status': 'Existing' if index_exists else 'IndexCreated'
456506
}
457507

458508
except ClientError as e:

0 commit comments

Comments
 (0)