11package com .milvus .io .kafka .utils ;
22
3- import com .alibaba .fastjson .JSONObject ;
43import com .google .common .collect .Lists ;
54import com .google .gson .Gson ;
5+ import com .google .gson .JsonObject ;
6+ import com .google .gson .JsonParser ;
67import com .milvus .io .kafka .MilvusSinkConnectorConfig ;
7- import io .milvus .param . dml . InsertParam ;
8+ import io .milvus .common . utils . JsonUtils ;
89import io .milvus .v2 .common .DataType ;
910import io .milvus .v2 .service .collection .request .CreateCollectionReq ;
1011import org .apache .kafka .connect .data .Struct ;
1314import org .slf4j .LoggerFactory ;
1415
1516import java .nio .ByteBuffer ;
16- import java .util .* ;
17- import java .util .stream . Collectors ;
17+ import java .util .HashMap ;
18+ import java .util .List ;
1819
1920public class DataConverter {
2021
21- private final MilvusSinkConnectorConfig config ;
22-
2322 private static final Logger log = LoggerFactory .getLogger (DataConverter .class );
23+ private final MilvusSinkConnectorConfig config ;
2424
2525 public DataConverter (MilvusSinkConnectorConfig config ) {
2626 this .config = config ;
2727 }
28+
2829 /*
29- * Convert SinkRecord to JSONObject
30+ * Convert SinkRecord to JsonObject
3031 */
31- public JSONObject convertRecord (SinkRecord sr , CreateCollectionReq .CollectionSchema collectionSchema ) {
32- // parse sinkRecord to get filed name and value
33- if (sr .value () instanceof Struct ) {
34- return parseValue ((Struct )sr .value (), collectionSchema );
35- }else if (sr .value () instanceof HashMap ) {
36- return parseValue ((HashMap <?, ?>)sr .value (), collectionSchema );
37- }else {
38- throw new RuntimeException ("Unsupported SinkRecord data type" + sr .value ());
32+ public JsonObject convertRecord (SinkRecord sr , CreateCollectionReq .CollectionSchema collectionSchema ) {
33+ // parse sinkRecord to get field name and value
34+ if (sr .value () instanceof Struct ) {
35+ return parseValue ((Struct ) sr .value (), collectionSchema );
36+ } else if (sr .value () instanceof HashMap ) {
37+ return parseValue ((HashMap <?, ?>) sr .value (), collectionSchema );
38+ } else {
39+ throw new RuntimeException ("Unsupported SinkRecord data type: " + sr .value ());
3940 }
4041 }
4142
42- private JSONObject parseValue (HashMap <?, ?> mapValue , CreateCollectionReq .CollectionSchema collectionSchema ) {
43- JSONObject fields = new JSONObject ();
43+ private JsonObject parseValue (HashMap <?, ?> mapValue , CreateCollectionReq .CollectionSchema collectionSchema ) {
44+ JsonObject fields = new JsonObject ();
45+ Gson gson = new Gson ();
4446 mapValue .forEach ((field , value ) -> {
45- if (collectionSchema .getField (field .toString ())!= null ){
47+ if (collectionSchema .getField (field .toString ()) != null ) {
4648 // if the key exists in the collection, store the value by collectionSchema DataType
47- fields .put (field .toString (), castValueToType (value , collectionSchema .getField (field .toString ()).getDataType ()));
48- }else {
49+ Object object = convertValueByMilvusType (value , collectionSchema .getField (field .toString ()).getDataType ());
50+ fields .add (field .toString (), gson .toJsonTree (object ));
51+ } else {
4952 log .warn ("Field {} not exists in collection" , field );
5053 }
51-
5254 });
5355 return fields ;
5456 }
5557
56- private JSONObject parseValue (Struct structValue , CreateCollectionReq .CollectionSchema collectionSchema ) {
57- JSONObject fields = new JSONObject ();
58-
58+ private JsonObject parseValue (Struct structValue , CreateCollectionReq .CollectionSchema collectionSchema ) {
59+ JsonObject fields = new JsonObject ();
60+ Gson gson = new Gson ();
5961 structValue .schema ().fields ().forEach (field -> {
60- if (collectionSchema .getField (field .name ()) != null ){
62+ if (collectionSchema .getField (field .name ()) != null ) {
6163 // if the key exists in the collection, store the value by collectionSchema DataType
62- fields .put (field .toString (), castValueToType (structValue .get (field .name ()), collectionSchema .getField (field .name ()).getDataType ()));
63- }else {
64+ Object object = convertValueByMilvusType (structValue .get (field .name ()), collectionSchema .getField (field .name ()).getDataType ());
65+ fields .add (field .name (), gson .toJsonTree (object ));
66+ } else {
6467 log .warn ("Field {} not exists in collection" , field );
6568 }
6669 });
6770
6871 return fields ;
6972 }
7073
71- private Object castValueToType (Object value , DataType dataType ) {
72- switch (dataType ){
74+ private Object convertValueByMilvusType (Object value , DataType dataType ) {
75+ Gson gson = new Gson ();
76+ switch (dataType ) {
7377 case Bool :
7478 return Boolean .parseBoolean (value .toString ());
7579 case Int8 :
@@ -87,36 +91,37 @@ private Object castValueToType(Object value, DataType dataType) {
8791 case String :
8892 return value .toString ();
8993 case JSON :
90- Gson gson = new Gson ();
9194 return gson .toJson (value );
9295 case BinaryVector :
9396 return parseBinaryVectorField (value .toString ());
9497 case FloatVector :
9598 return parseFloatVectorField (value .toString ());
99+ case SparseFloatVector :
100+ return gson .toJsonTree (value ).getAsJsonObject ();
96101 default :
97- throw new RuntimeException ("Unsupported data type" + dataType );
102+ throw new RuntimeException ("Unsupported data type: " + dataType );
98103 }
99104 }
100105
101- protected List <Float > parseFloatVectorField (String vectors ){
106+ protected List <Float > parseFloatVectorField (String vectors ) {
102107 try {
103108 log .debug ("parse float vectors: {}" , vectors );
104109
105110 String [] vectorArrays = vectors .replaceAll ("\\ [" , "" ).replaceAll ("\\ ]" , "" )
106- .replaceAll (" " ,"" ).split ("," );
111+ .replaceAll (" " , "" ).split ("," );
107112
108113 List <Float > floatList = Lists .newLinkedList ();
109114 for (String vector : vectorArrays ) {
110115 floatList .add (Float .valueOf (vector ));
111116 }
112117
113118 return floatList ;
114- }catch (Exception e ){
115- throw new RuntimeException ("parse float vector field error: " + e .getMessage () + vectors );
119+ } catch (Exception e ) {
120+ throw new RuntimeException ("parse float vector field error: " + e .getMessage () + " " + vectors );
116121 }
117-
118122 }
119- protected ByteBuffer parseBinaryVectorField (String vectors ){
123+
124+ protected ByteBuffer parseBinaryVectorField (String vectors ) {
120125 try {
121126 log .debug ("parse binary vectors: {}" , vectors );
122127
@@ -130,8 +135,8 @@ protected ByteBuffer parseBinaryVectorField(String vectors){
130135 }
131136
132137 return buffer ;
133- }catch (Exception e ){
134- throw new RuntimeException ("parse binary vector field error: " + e .getMessage () + vectors );
138+ } catch (Exception e ) {
139+ throw new RuntimeException ("parse binary vector field error: " + e .getMessage () + " " + vectors );
135140 }
136141 }
137- }
142+ }
0 commit comments