14
14
app = typer .Typer ()
15
15
16
16
17
- # https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/
17
+ def get_vector_embedding_policy ():
18
+ return {
19
+ "vectorEmbeddings" : [
20
+ {
21
+ "path" : "/embedding" ,
22
+ "dataType" : "float32" ,
23
+ "distanceFunction" : "cosine" ,
24
+ "dimensions" : 3072 , # for text-embedding-3-large
25
+ }
26
+ ]
27
+ }
28
+
29
+
30
+ def get_indexing_policy ():
31
+ return {
32
+ "indexingMode" : "consistent" ,
33
+ "includedPaths" : [{"path" : "/*" }],
34
+ "excludedPaths" : [{"path" : '/"_etag"/?' }],
35
+ "vectorIndexes" : [{"path" : "/embedding" , "type" : "quantizedFlat" }],
36
+ }
37
+
38
+
39
+ def get_azure_cosmos_db_no_sql_vector_search ():
40
+ return AzureCosmosDBNoSqlVectorSearch (
41
+ embedding = AzureOpenAIEmbeddings (
42
+ api_key = getenv ("AZURE_OPENAI_API_KEY" ),
43
+ api_version = getenv ("AZURE_OPENAI_API_VERSION" ),
44
+ azure_endpoint = getenv ("AZURE_OPENAI_ENDPOINT" ),
45
+ model = getenv ("AZURE_OPENAI_EMBEDDING_MODEL" ),
46
+ ),
47
+ cosmos_client = CosmosClient .from_connection_string (getenv ("AZURE_COSMOS_DB_CONNECTION_STRING" )),
48
+ database_name = getenv ("AZURE_COSMOS_DB_DATABASE_NAME" ),
49
+ container_name = getenv ("AZURE_COSMOS_DB_CONTAINER_NAME" ),
50
+ vector_embedding_policy = get_vector_embedding_policy (),
51
+ indexing_policy = get_indexing_policy (),
52
+ cosmos_container_properties = {"partition_key" : PartitionKey (path = "/id" )},
53
+ cosmos_database_properties = {"id" : getenv ("AZURE_COSMOS_DB_DATABASE_NAME" )},
54
+ )
55
+
56
+
18
57
@app .command ()
19
58
def insert_data (
20
59
pdf_url : str = "https://arxiv.org/pdf/2303.08774.pdf" ,
@@ -36,38 +75,8 @@ def insert_data(
36
75
).split_documents (data )
37
76
38
77
try :
39
- # Insert the data into Azure Cosmos DB
40
- database_name = getenv ("AZURE_COSMOS_DB_DATABASE_NAME" )
41
- AzureCosmosDBNoSqlVectorSearch .from_documents (
42
- documents = docs ,
43
- embedding = AzureOpenAIEmbeddings (
44
- api_key = getenv ("AZURE_OPENAI_API_KEY" ),
45
- api_version = getenv ("AZURE_OPENAI_API_VERSION" ),
46
- azure_endpoint = getenv ("AZURE_OPENAI_ENDPOINT" ),
47
- model = getenv ("AZURE_OPENAI_EMBEDDING_MODEL" ),
48
- ),
49
- cosmos_client = CosmosClient .from_connection_string (getenv ("AZURE_COSMOS_DB_CONNECTION_STRING" )),
50
- database_name = database_name ,
51
- container_name = getenv ("AZURE_COSMOS_DB_CONTAINER_NAME" ),
52
- vector_embedding_policy = {
53
- "vectorEmbeddings" : [
54
- {
55
- "path" : "/embedding" ,
56
- "dataType" : "float32" ,
57
- "distanceFunction" : "cosine" ,
58
- "dimensions" : 3072 , # for text-embedding-3-large
59
- }
60
- ]
61
- },
62
- indexing_policy = {
63
- "indexingMode" : "consistent" ,
64
- "includedPaths" : [{"path" : "/*" }],
65
- "excludedPaths" : [{"path" : '/"_etag"/?' }],
66
- "vectorIndexes" : [{"path" : "/embedding" , "type" : "quantizedFlat" }],
67
- },
68
- cosmos_container_properties = {"partition_key" : PartitionKey (path = "/id" )},
69
- cosmos_database_properties = {"id" : database_name }, # need to add this
70
- )
78
+ vector_search = get_azure_cosmos_db_no_sql_vector_search ()
79
+ vector_search .add_documents (docs )
71
80
except Exception as e :
72
81
logger .error (f"error: { e } " )
73
82
@@ -79,40 +88,10 @@ def query_data(
79
88
):
80
89
if verbose :
81
90
logging .basicConfig (level = logging .DEBUG )
82
-
83
- database_name = getenv ("AZURE_COSMOS_DB_DATABASE_NAME" )
84
- vector_search = AzureCosmosDBNoSqlVectorSearch (
85
- embedding = AzureOpenAIEmbeddings (
86
- api_key = getenv ("AZURE_OPENAI_API_KEY" ),
87
- api_version = getenv ("AZURE_OPENAI_API_VERSION" ),
88
- azure_endpoint = getenv ("AZURE_OPENAI_ENDPOINT" ),
89
- model = getenv ("AZURE_OPENAI_EMBEDDING_MODEL" ),
90
- ),
91
- cosmos_client = CosmosClient .from_connection_string (getenv ("AZURE_COSMOS_DB_CONNECTION_STRING" )),
92
- database_name = database_name ,
93
- container_name = getenv ("AZURE_COSMOS_DB_CONTAINER_NAME" ),
94
- vector_embedding_policy = {
95
- "vectorEmbeddings" : [
96
- {
97
- "path" : "/embedding" ,
98
- "dataType" : "float32" ,
99
- "distanceFunction" : "cosine" ,
100
- "dimensions" : 3072 , # for text-embedding-3-large
101
- }
102
- ]
103
- },
104
- indexing_policy = {
105
- "indexingMode" : "consistent" ,
106
- "includedPaths" : [{"path" : "/*" }],
107
- "excludedPaths" : [{"path" : '/"_etag"/?' }],
108
- "vectorIndexes" : [{"path" : "/embedding" , "type" : "quantizedFlat" }],
109
- },
110
- cosmos_container_properties = {"partition_key" : PartitionKey (path = "/id" )},
111
- cosmos_database_properties = {"id" : database_name },
112
- )
113
-
114
91
try :
92
+ vector_search = get_azure_cosmos_db_no_sql_vector_search ()
115
93
results = vector_search .similarity_search (query = query )
94
+ logger .info (f"got { len (results )} results" )
116
95
for idx , result in enumerate (results ):
117
96
print (f"Result { idx + 1 } : { result } " )
118
97
except Exception as e :
0 commit comments