1414app = typer .Typer ()
1515
1616
17- # https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/
17+ def get_vector_embedding_policy ():
18+ return {
19+ "vectorEmbeddings" : [
20+ {
21+ "path" : "/embedding" ,
22+ "dataType" : "float32" ,
23+ "distanceFunction" : "cosine" ,
24+ "dimensions" : 3072 , # for text-embedding-3-large
25+ }
26+ ]
27+ }
28+
29+
30+ def get_indexing_policy ():
31+ return {
32+ "indexingMode" : "consistent" ,
33+ "includedPaths" : [{"path" : "/*" }],
34+ "excludedPaths" : [{"path" : '/"_etag"/?' }],
35+ "vectorIndexes" : [{"path" : "/embedding" , "type" : "quantizedFlat" }],
36+ }
37+
38+
39+ def get_azure_cosmos_db_no_sql_vector_search ():
40+ return AzureCosmosDBNoSqlVectorSearch (
41+ embedding = AzureOpenAIEmbeddings (
42+ api_key = getenv ("AZURE_OPENAI_API_KEY" ),
43+ api_version = getenv ("AZURE_OPENAI_API_VERSION" ),
44+ azure_endpoint = getenv ("AZURE_OPENAI_ENDPOINT" ),
45+ model = getenv ("AZURE_OPENAI_EMBEDDING_MODEL" ),
46+ ),
47+ cosmos_client = CosmosClient .from_connection_string (getenv ("AZURE_COSMOS_DB_CONNECTION_STRING" )),
48+ database_name = getenv ("AZURE_COSMOS_DB_DATABASE_NAME" ),
49+ container_name = getenv ("AZURE_COSMOS_DB_CONTAINER_NAME" ),
50+ vector_embedding_policy = get_vector_embedding_policy (),
51+ indexing_policy = get_indexing_policy (),
52+ cosmos_container_properties = {"partition_key" : PartitionKey (path = "/id" )},
53+ cosmos_database_properties = {"id" : getenv ("AZURE_COSMOS_DB_DATABASE_NAME" )},
54+ )
55+
56+
1857@app .command ()
1958def insert_data (
2059 pdf_url : str = "https://arxiv.org/pdf/2303.08774.pdf" ,
@@ -36,38 +75,8 @@ def insert_data(
3675 ).split_documents (data )
3776
3877 try :
39- # Insert the data into Azure Cosmos DB
40- database_name = getenv ("AZURE_COSMOS_DB_DATABASE_NAME" )
41- AzureCosmosDBNoSqlVectorSearch .from_documents (
42- documents = docs ,
43- embedding = AzureOpenAIEmbeddings (
44- api_key = getenv ("AZURE_OPENAI_API_KEY" ),
45- api_version = getenv ("AZURE_OPENAI_API_VERSION" ),
46- azure_endpoint = getenv ("AZURE_OPENAI_ENDPOINT" ),
47- model = getenv ("AZURE_OPENAI_EMBEDDING_MODEL" ),
48- ),
49- cosmos_client = CosmosClient .from_connection_string (getenv ("AZURE_COSMOS_DB_CONNECTION_STRING" )),
50- database_name = database_name ,
51- container_name = getenv ("AZURE_COSMOS_DB_CONTAINER_NAME" ),
52- vector_embedding_policy = {
53- "vectorEmbeddings" : [
54- {
55- "path" : "/embedding" ,
56- "dataType" : "float32" ,
57- "distanceFunction" : "cosine" ,
58- "dimensions" : 3072 , # for text-embedding-3-large
59- }
60- ]
61- },
62- indexing_policy = {
63- "indexingMode" : "consistent" ,
64- "includedPaths" : [{"path" : "/*" }],
65- "excludedPaths" : [{"path" : '/"_etag"/?' }],
66- "vectorIndexes" : [{"path" : "/embedding" , "type" : "quantizedFlat" }],
67- },
68- cosmos_container_properties = {"partition_key" : PartitionKey (path = "/id" )},
69- cosmos_database_properties = {"id" : database_name }, # need to add this
70- )
78+ vector_search = get_azure_cosmos_db_no_sql_vector_search ()
79+ vector_search .add_documents (docs )
7180 except Exception as e :
7281 logger .error (f"error: { e } " )
7382
@@ -79,40 +88,10 @@ def query_data(
7988):
8089 if verbose :
8190 logging .basicConfig (level = logging .DEBUG )
82-
83- database_name = getenv ("AZURE_COSMOS_DB_DATABASE_NAME" )
84- vector_search = AzureCosmosDBNoSqlVectorSearch (
85- embedding = AzureOpenAIEmbeddings (
86- api_key = getenv ("AZURE_OPENAI_API_KEY" ),
87- api_version = getenv ("AZURE_OPENAI_API_VERSION" ),
88- azure_endpoint = getenv ("AZURE_OPENAI_ENDPOINT" ),
89- model = getenv ("AZURE_OPENAI_EMBEDDING_MODEL" ),
90- ),
91- cosmos_client = CosmosClient .from_connection_string (getenv ("AZURE_COSMOS_DB_CONNECTION_STRING" )),
92- database_name = database_name ,
93- container_name = getenv ("AZURE_COSMOS_DB_CONTAINER_NAME" ),
94- vector_embedding_policy = {
95- "vectorEmbeddings" : [
96- {
97- "path" : "/embedding" ,
98- "dataType" : "float32" ,
99- "distanceFunction" : "cosine" ,
100- "dimensions" : 3072 , # for text-embedding-3-large
101- }
102- ]
103- },
104- indexing_policy = {
105- "indexingMode" : "consistent" ,
106- "includedPaths" : [{"path" : "/*" }],
107- "excludedPaths" : [{"path" : '/"_etag"/?' }],
108- "vectorIndexes" : [{"path" : "/embedding" , "type" : "quantizedFlat" }],
109- },
110- cosmos_container_properties = {"partition_key" : PartitionKey (path = "/id" )},
111- cosmos_database_properties = {"id" : database_name },
112- )
113-
11491 try :
92+ vector_search = get_azure_cosmos_db_no_sql_vector_search ()
11593 results = vector_search .similarity_search (query = query )
94+ logger .info (f"got { len (results )} results" )
11695 for idx , result in enumerate (results ):
11796 print (f"Result { idx + 1 } : { result } " )
11897 except Exception as e :
0 commit comments