@@ -6,16 +6,18 @@ class Milvus < Base
66 # Wrapper around Milvus REST APIs.
77 #
88 # Gem requirements:
9- # gem "milvus", "~> 0.9 .3"
9+ # gem "milvus", "~> 0.10 .3"
1010 #
1111 # Usage:
12- # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
12+ # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
1313 #
14-
1514 def initialize ( url :, index_name :, llm :, api_key : nil )
1615 depends_on "milvus"
1716
18- @client = ::Milvus ::Client . new ( url : url )
17+ @client = ::Milvus ::Client . new (
18+ url : url ,
19+ logger : Langchain . logger
20+ )
1921 @index_name = index_name
2022
2123 super ( llm : llm )
@@ -24,33 +26,24 @@ def initialize(url:, index_name:, llm:, api_key: nil)
2426 def add_texts ( texts :)
2527 client . entities . insert (
2628 collection_name : index_name ,
27- num_rows : Array ( texts ) . size ,
28- fields_data : [
29- {
30- field_name : "content" ,
31- type : ::Milvus ::DATA_TYPES [ "varchar" ] ,
32- field : Array ( texts )
33- } , {
34- field_name : "vectors" ,
35- type : ::Milvus ::DATA_TYPES [ "float_vector" ] ,
36- field : Array ( texts ) . map { |text | llm . embed ( text : text ) . embedding }
37- }
38- ]
29+ data : texts . map do |text |
30+ { content : text , vector : llm . embed ( text : text ) . embedding }
31+ end
3932 )
4033 end
4134
35+ # TODO: Add update_texts method
36+
4237 # Deletes a list of texts in the index
4338 #
4439 # @param ids [Array<Integer>] The ids of texts to delete
4540 # @return [Boolean] The response from the server
4641 def remove_texts ( ids :)
4742 raise ArgumentError , "ids must be an array" unless ids . is_a? ( Array )
48- # Convert ids to integers if strings are passed
49- ids = ids . map ( &:to_i )
5043
5144 client . entities . delete (
5245 collection_name : index_name ,
53- expression : "id in #{ ids } "
46+ filter : "id in #{ ids } "
5447 )
5548 end
5649
@@ -62,33 +55,25 @@ def create_default_schema
6255 client . collections . create (
6356 auto_id : true ,
6457 collection_name : index_name ,
65- description : "Default schema created by langchain.rb" ,
6658 fields : [
6759 {
68- name : "id" ,
69- is_primary_key : true ,
70- autoID : true ,
71- data_type : ::Milvus ::DATA_TYPES [ "int64" ]
60+ fieldName : "id" ,
61+ isPrimary : true ,
62+ dataType : "Int64"
7263 } , {
73- name : "content" ,
74- is_primary_key : false ,
75- data_type : ::Milvus ::DATA_TYPES [ "varchar" ] ,
76- type_params : [
77- {
78- key : "max_length" ,
79- value : "32768" # Largest allowed value
80- }
81- ]
64+ fieldName : "content" ,
65+ isPrimary : false ,
66+ dataType : "VarChar" ,
67+ elementTypeParams : {
68+ max_length : "32768" # Largest allowed value
69+ }
8270 } , {
83- name : "vectors" ,
84- data_type : ::Milvus ::DATA_TYPES [ "float_vector" ] ,
85- is_primary_key : false ,
86- type_params : [
87- {
88- key : "dim" ,
89- value : llm . default_dimensions . to_s
90- }
91- ]
71+ fieldName : "vector" ,
72+ isPrimary : false ,
73+ dataType : "FloatVector" ,
74+ elementTypeParams : {
75+ dim : llm . default_dimensions . to_s
76+ }
9277 }
9378 ]
9479 )
@@ -97,27 +82,31 @@ def create_default_schema
9782 # Create the default index
9883 # @return [Boolean] The response from the server
9984 def create_default_index
100- client . indices . create (
85+ client . indexes . create (
10186 collection_name : index_name ,
102- field_name : "vectors" ,
103- extra_params : [
104- { key : "metric_type" , value : "L2" } ,
105- { key : "index_type" , value : "IVF_FLAT" } ,
106- { key : "params" , value : "{\" nlist\" :1024}" }
87+ index_params : [
88+ {
89+ metricType : "L2" ,
90+ fieldName : "vector" ,
91+ indexName : "vector_idx" ,
92+ indexConfig : {
93+ index_type : "AUTOINDEX"
94+ }
95+ }
10796 ]
10897 )
10998 end
11099
111100 # Get the default schema
112101 # @return [Hash] The response from the server
113102 def get_default_schema
114- client . collections . get ( collection_name : index_name )
103+ client . collections . describe ( collection_name : index_name )
115104 end
116105
117106 # Delete default schema
118107 # @return [Hash] The response from the server
119108 def destroy_default_schema
120- client . collections . delete ( collection_name : index_name )
109+ client . collections . drop ( collection_name : index_name )
121110 end
122111
123112 # Load default schema into memory
@@ -138,16 +127,12 @@ def similarity_search(query:, k: 4)
138127 def similarity_search_by_vector ( embedding :, k : 4 )
139128 load_default_schema
140129
141- client . search (
130+ client . entities . search (
142131 collection_name : index_name ,
143- output_fields : [ "id" , "content" ] , # Add "vectors" if need to have full vectors returned.
144- top_k : k . to_s ,
145- vectors : [ embedding ] ,
146- dsl_type : 1 ,
147- params : "{\" nprobe\" : 10}" ,
148- anns_field : "vectors" ,
149- metric_type : "L2" ,
150- vector_type : ::Milvus ::DATA_TYPES [ "float_vector" ]
132+ anns_field : "vector" ,
133+ data : [ embedding ] ,
134+ limit : k ,
135+ output_fields : [ "content" , "id" , "vector" ]
151136 )
152137 end
153138
@@ -159,8 +144,7 @@ def similarity_search_by_vector(embedding:, k: 4)
159144 def ask ( question :, k : 4 , &block )
160145 search_results = similarity_search ( query : question , k : k )
161146
162- content_field = search_results . dig ( "results" , "fields_data" ) . select { |field | field . dig ( "field_name" ) == "content" }
163- content_data = content_field . first . dig ( "Field" , "Scalars" , "Data" , "StringData" , "data" )
147+ content_data = search_results . dig ( "data" ) . map { |result | result . dig ( "content" ) }
164148
165149 context = content_data . join ( "\n ---\n " )
166150
0 commit comments