Skip to content

Commit 881daeb

Browse files
committed
Simplify the selection process for the ef_construction parameter by offering a suggested default value with the flexibility for users to enter a custom value if needed.
Signed-off-by: hmumtazz <[email protected]>
1 parent 5570174 commit 881daeb

File tree

2 files changed

+39
-15
lines changed

2 files changed

+39
-15
lines changed

opensearch_py_ml/ml_commons/rag_pipeline/rag/rag.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env python3
2+
13
# SPDX-License-Identifier: Apache-2.0
24
# The OpenSearch Contributors require contributions made to
35
# this file be licensed under the Apache-2.0 license or a
@@ -23,7 +25,7 @@
2325
# specific language governing permissions and limitations
2426
# under the License.
2527

26-
#!/usr/bin/env python3
28+
2729

2830
"""
2931
Main CLI script for OpenSearch with Bedrock Integration

opensearch_py_ml/ml_commons/rag_pipeline/rag/rag_setup.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def initialize_opensearch_client(self):
370370

371371
parsed_url = urlparse(self.opensearch_endpoint)
372372
host = parsed_url.hostname
373-
port = parsed_url.port or 443
373+
port = 443
374374

375375
if self.is_serverless:
376376
credentials = boto3.Session().get_credentials()
@@ -383,7 +383,7 @@ def initialize_opensearch_client(self):
383383

384384
try:
385385
self.opensearch_client = OpenSearch(
386-
hosts=[{'host': host, 'portort': port}],
386+
hosts=[{'host': host, 'port': port}],
387387
http_auth=auth,
388388
use_ssl=True,
389389
verify_certs=True,
@@ -397,7 +397,7 @@ def initialize_opensearch_client(self):
397397
return False
398398

399399
def get_knn_index_details(self):
400-
# Prompt user for KNN index details (embedding dimension and space type)
400+
# Prompt user for KNN index details (embedding dimension, space type, and ef_construction)
401401
dimension_input = input("Press Enter to use the default embedding size (768), or type a custom size: ")
402402

403403
if dimension_input.strip() == "":
@@ -429,9 +429,24 @@ def get_knn_index_details(self):
429429

430430
print(f"Space type set to: {space_type}")
431431

432-
return embedding_dimension, space_type
432+
# New prompt for ef_construction
433+
ef_construction_input = input("\nPress Enter to use the default ef_construction value (512), or type a custom value: ")
434+
435+
if ef_construction_input.strip() == "":
436+
ef_construction = 512
437+
else:
438+
try:
439+
ef_construction = int(ef_construction_input)
440+
except ValueError:
441+
print("Invalid input. Using default ef_construction of 512.")
442+
ef_construction = 512
443+
444+
print(f"ef_construction set to: {ef_construction}")
433445

434-
def create_index(self, embedding_dimension, space_type):
446+
return embedding_dimension, space_type, ef_construction
447+
448+
449+
def create_index(self, embedding_dimension, space_type, ef_construction):
435450
# Create the KNN index in OpenSearch
436451
index_body = {
437452
"mappings": {
@@ -444,7 +459,7 @@ def create_index(self, embedding_dimension, space_type):
444459
"name": "hnsw",
445460
"space_type": space_type,
446461
"engine": "nmslib",
447-
"parameters": {"ef_construction": 512, "m": 16},
462+
"parameters": {"ef_construction": ef_construction, "m": 16},
448463
},
449464
},
450465
}
@@ -459,26 +474,31 @@ def create_index(self, embedding_dimension, space_type):
459474
}
460475
try:
461476
self.opensearch_client.indices.create(index=self.index_name, body=index_body)
462-
print(f"KNN index '{self.index_name}' created successfully with dimension {embedding_dimension} and space type {space_type}.")
477+
print(f"KNN index '{self.index_name}' created successfully with dimension {embedding_dimension}, space type {space_type}, and ef_construction {ef_construction}.")
463478
except Exception as e:
464479
if 'resource_already_exists_exception' in str(e).lower():
465480
print(f"Index '{self.index_name}' already exists.")
466481
else:
467482
print(f"Error creating index '{self.index_name}': {e}")
468483

469-
def verify_and_create_index(self, embedding_dimension, space_type):
470-
# Verify if the index exists, create it if it doesn't
484+
485+
486+
def verify_and_create_index(self, embedding_dimension, space_type, ef_construction):
471487
try:
488+
print(f"Attempting to verify index '{self.index_name}'...")
472489
index_exists = self.opensearch_client.indices.exists(index=self.index_name)
473490
if index_exists:
474491
print(f"KNN index '{self.index_name}' already exists.")
475492
else:
476-
self.create_index(embedding_dimension, space_type)
493+
print(f"Index '{self.index_name}' does not exist. Attempting to create...")
494+
self.create_index(embedding_dimension, space_type, ef_construction)
477495
return True
478496
except Exception as ex:
479497
print(f"Error verifying or creating index: {ex}")
498+
print(f"OpenSearch client config: {self.opensearch_client.transport.hosts}")
480499
return False
481500

501+
482502
def get_truncated_name(self, base_name, max_length=32):
483503
# Truncate a name to fit within a specified length
484504
if len(base_name) <= max_length:
@@ -518,12 +538,14 @@ def setup_command(self):
518538
return
519539

520540
if self.initialize_opensearch_client():
521-
embedding_dimension, space_type = self.get_knn_index_details()
522-
if self.verify_and_create_index(embedding_dimension, space_type):
541+
print("OpenSearch client initialized successfully. Proceeding with index creation...")
542+
embedding_dimension, space_type, ef_construction = self.get_knn_index_details()
543+
if self.verify_and_create_index(embedding_dimension, space_type, ef_construction):
523544
print("Setup completed successfully.")
524-
self.config['embedding_dimension'] = s= str(embedding_dimension)
545+
self.config['embedding_dimension'] = str(embedding_dimension)
525546
self.config['space_type'] = space_type
547+
self.config['ef_construction'] = str(ef_construction)
526548
else:
527549
print("Index verification failed. Please check your index name and permissions.")
528550
else:
529-
print("Failed to initialize OpenSearch client. Setup incomplete.")
551+
print("Failed to initialize OpenSearch client. Setup incomplete.")

0 commit comments

Comments
 (0)