Skip to content

Commit 0785f1d

Browse files
authored
Merge pull request #9 from psilberk/adressing-review-comments
Addressed some of the review comments
2 parents 68f7914 + e669017 commit 0785f1d

File tree

7 files changed

+185
-127
lines changed

7 files changed

+185
-127
lines changed

.vscode/settings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@
2121
},
2222
},
2323
"java.debug.settings.onBuildFailureProceed": true,
24-
"java.compile.nullAnalysis.mode": "disabled"
24+
"java.compile.nullAnalysis.mode": "disabled",
25+
"java.configuration.updateBuildConfiguration": "interactive"
2526
}

data/semantickernel-data-jdbc/pom.xml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<parent>
55
<groupId>com.microsoft.semantic-kernel</groupId>
66
<artifactId>semantickernel-parent</artifactId>
7-
<version>1.4.4-SNAPSHOT</version>
7+
<version>1.4.4-RC2-SNAPSHOT</version>
88
<relativePath>../../pom.xml</relativePath>
99
</parent>
1010

@@ -15,9 +15,8 @@
1515
<dependencies>
1616
<dependency>
1717
<groupId>com.microsoft.semantic-kernel</groupId>
18-
<artifactId>semantickernel-api</artifactId>
18+
<artifactId>semantickernel-api-data</artifactId>
1919
</dependency>
20-
2120
<dependency>
2221
<groupId>org.slf4j</groupId>
2322
<artifactId>slf4j-api</artifactId>

data/semantickernel-data-jdbc/src/main/java/com/microsoft/semantickernel/data/jdbc/JDBCVectorStoreQueryProvider.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ public JDBCVectorStoreQueryProvider(
106106
@SuppressFBWarnings("EI_EXPOSE_REP2") @Nonnull DataSource dataSource,
107107
@Nonnull String collectionsTable,
108108
@Nonnull String prefixForCollectionTables,
109-
@Nonnull HashMap<Class<?>, String> supportedKeyTypes,
109+
@Nonnull Map<Class<?>, String> supportedKeyTypes,
110110
@Nonnull Map<Class<?>, String> supportedDataTypes,
111111
@Nonnull Map<Class<?>, String> supportedVectorTypes) {
112112
this.dataSource = dataSource;

data/semantickernel-data-oracle/src/main/java/com/microsoft/semantickernel/data/jdbc/oracle/OracleVectorStoreFieldHelper.java

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@
2929
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDataField;
3030
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordKeyField;
3131
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordVectorField;
32+
import com.microsoft.semantickernel.exceptions.SKException;
3233
import java.math.BigDecimal;
3334
import java.time.OffsetDateTime;
3435
import java.util.Collection;
36+
import java.util.Collections;
3537
import java.util.HashMap;
3638
import java.util.List;
3739
import java.util.Map;
@@ -45,6 +47,10 @@
4547
*/
4648
class OracleVectorStoreFieldHelper {
4749

50+
/**
51+
* Object naming regular expression
52+
*/
53+
private static final String OBJECT_NAMING_REGEXP = "[a-zA-Z_][a-zA-Z0-9_]{1,128}";
4854
/**
4955
* The logger
5056
*/
@@ -109,32 +115,34 @@ class OracleVectorStoreFieldHelper {
109115
*
110116
* @return the mapping between the supported Java key types and the Oracle database type.
111117
*/
112-
public static HashMap<Class<?>, String> getSupportedKeyTypes() {
113-
return supportedKeyTypes;
118+
static Map<Class<?>, String> getSupportedKeyTypes() {
119+
120+
return Collections.unmodifiableMap(supportedKeyTypes);
114121
}
115122

116123
/**
117124
* Gets the mapping between the supported Java data types and the Oracle database type.
118125
*
119126
* @return the mapping between the supported Java data types and the Oracle database type.
120127
*/
121-
public static Map<Class<?>, String> getSupportedDataTypes(
128+
static Map<Class<?>, String> getSupportedDataTypes(
122129
StringTypeMapping stringTypeMapping, int defaultVarCharLength) {
123130
String stringType = stringTypeMapping.equals(StringTypeMapping.USE_VARCHAR)
124131
? String.format(OracleDataTypesMapping.STRING_VARCHAR, defaultVarCharLength)
125132
: OracleDataTypesMapping.STRING_CLOB;
126133
supportedDataTypes.put(String.class, stringType);
127134
LOGGER.finest("Mapping String columns to " + stringType);
128-
return supportedDataTypes;
135+
return Collections.unmodifiableMap(supportedDataTypes);
129136
}
130137

131138
/**
132139
* Gets the mapping between the supported Java data types and the Oracle database type.
133140
*
134141
* @return the mapping between the supported Java data types and the Oracle database type.
135142
*/
136-
public static Map<Class<?>, String> getSupportedVectorTypes() {
137-
return supportedVectorTypes;
143+
static Map<Class<?>, String> getSupportedVectorTypes() {
144+
145+
return Collections.unmodifiableMap(supportedVectorTypes);
138146
}
139147

140148
/**
@@ -143,20 +151,23 @@ public static Map<Class<?>, String> getSupportedVectorTypes() {
143151
* @return the CREATE VECTOR INDEX statement to create the index according to the vector
144152
* field definition.
145153
*/
146-
public static String getCreateVectorIndexStatement(VectorStoreRecordVectorField field, String collectionTableName) {
154+
static String getCreateVectorIndexStatement(VectorStoreRecordVectorField field, String collectionTableName) {
147155
switch (field.getIndexKind()) {
148156
case IVFFLAT:
149157
return "CREATE VECTOR INDEX IF NOT EXISTS "
150-
+ getIndexName(field.getEffectiveStorageName())
158+
+ validateObjectNaming(getIndexName(field.getEffectiveStorageName()))
151159
+ " ON "
152-
+ collectionTableName + "( " + field.getEffectiveStorageName() + " ) "
160+
+ validateObjectNaming(collectionTableName)
161+
+ "( " + validateObjectNaming(field.getEffectiveStorageName()) + " ) "
153162
+ " ORGANIZATION NEIGHBOR PARTITIONS "
154163
+ " WITH DISTANCE COSINE "
155164
+ "PARAMETERS ( TYPE IVF )";
156165
case HNSW:
157-
return "CREATE VECTOR INDEX IF NOT EXISTS " + getIndexName(field.getEffectiveStorageName())
166+
return "CREATE VECTOR INDEX IF NOT EXISTS "
167+
+ validateObjectNaming(getIndexName(field.getEffectiveStorageName()))
158168
+ " ON "
159-
+ collectionTableName + "( " + field.getEffectiveStorageName() + " ) "
169+
+ validateObjectNaming(collectionTableName)
170+
+ "( " + validateObjectNaming(field.getEffectiveStorageName()) + " ) "
160171
+ "ORGANIZATION INMEMORY GRAPH "
161172
+ "WITH DISTANCE COSINE "
162173
+ "PARAMETERS (TYPE HNSW)";
@@ -173,20 +184,20 @@ public static String getCreateVectorIndexStatement(VectorStoreRecordVectorField
173184
*
174185
* @return the CREATE INDEX statement to create the index according to the field definition.
175186
*/
176-
public static String createIndexForDataField(String collectionTableName, VectorStoreRecordDataField dataField, Map<Class<?>, String> supportedDataTypes) {
187+
static String createIndexForDataField(String collectionTableName, VectorStoreRecordDataField dataField, Map<Class<?>, String> supportedDataTypes) {
177188
if (supportedDataTypes.get(dataField.getFieldType()) == "JSON") {
178189
String dataFieldIndex = "CREATE MULTIVALUE INDEX IF NOT EXISTS %s ON %s t (t.%s.%s)";
179190
return String.format(dataFieldIndex,
180-
collectionTableName + "_" + dataField.getEffectiveStorageName(),
181-
collectionTableName,
182-
dataField.getEffectiveStorageName(),
191+
validateObjectNaming(collectionTableName + "_" + dataField.getEffectiveStorageName()),
192+
validateObjectNaming(collectionTableName),
193+
validateObjectNaming(dataField.getEffectiveStorageName()),
183194
getFunctionForType(supportedDataTypes.get(dataField.getFieldSubType())));
184195
} else {
185196
String dataFieldIndex = "CREATE INDEX IF NOT EXISTS %s ON %s (%s ASC)";
186197
return String.format(dataFieldIndex,
187-
collectionTableName + "_" + dataField.getEffectiveStorageName(),
188-
collectionTableName,
189-
dataField.getEffectiveStorageName()
198+
validateObjectNaming(collectionTableName + "_" + dataField.getEffectiveStorageName()),
199+
validateObjectNaming(collectionTableName),
200+
validateObjectNaming(dataField.getEffectiveStorageName())
190201
);
191202
}
192203
}
@@ -196,9 +207,9 @@ public static String createIndexForDataField(String collectionTableName, VectorS
196207
* @param fields list of vector record fields.
197208
* @return comma separated list of columns and types for CREATE TABLE statement.
198209
*/
199-
public static String getVectorColumnNamesAndTypes(List<VectorStoreRecordVectorField> fields) {
210+
static String getVectorColumnNamesAndTypes(List<VectorStoreRecordVectorField> fields) {
200211
List<String> columns = fields.stream()
201-
.map(field -> field.getEffectiveStorageName() + " " +
212+
.map(field -> validateObjectNaming(field.getEffectiveStorageName()) + " " +
202213
OracleVectorStoreFieldHelper.getTypeForVectorField(field)
203214
).collect(Collectors.toList());
204215

@@ -210,8 +221,8 @@ public static String getVectorColumnNamesAndTypes(List<VectorStoreRecordVectorFi
210221
* @param field the key field.
211222
* @return column name and type of the key field for CREATE TABLE statement.
212223
*/
213-
public static String getKeyColumnNameAndType(VectorStoreRecordKeyField field) {
214-
return field.getEffectiveStorageName() + " " + supportedKeyTypes.get(field.getFieldType());
224+
static String getKeyColumnNameAndType(VectorStoreRecordKeyField field) {
225+
return validateObjectNaming(field.getEffectiveStorageName()) + " " + supportedKeyTypes.get(field.getFieldType());
215226
}
216227

217228

@@ -220,7 +231,7 @@ public static String getKeyColumnNameAndType(VectorStoreRecordKeyField field) {
220231
* @param effectiveStorageName the field name.
221232
* @return the index name.
222233
*/
223-
private static String getIndexName(String effectiveStorageName) {
234+
static String getIndexName(String effectiveStorageName) {
224235
return effectiveStorageName + VECTOR_INDEX_SUFFIX;
225236
}
226237

@@ -261,4 +272,19 @@ private static String getFunctionForType(String jdbcType) {
261272
}
262273
}
263274

275+
276+
/**
277+
* Validates an SQL identifier.
278+
*
279+
* @param identifier the identifier
280+
* @return the identifier if it is valid
281+
* @throws SKException if the identifier is invalid
282+
*/
283+
static String validateObjectNaming(String identifier) {
284+
if (identifier.matches(OBJECT_NAMING_REGEXP)) {
285+
return identifier;
286+
}
287+
throw new SKException("Invalid SQL identifier: " + identifier);
288+
}
289+
264290
}

data/semantickernel-data-oracle/src/main/java/com/microsoft/semantickernel/data/jdbc/oracle/OracleVectorStoreQueryProvider.java

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -426,17 +426,21 @@ public <Record> VectorSearchResults<Record> search(String collectionName, List<F
426426
VectorSearchOptions options, VectorStoreRecordDefinition recordDefinition,
427427
VectorStoreRecordMapper<Record, ResultSet> mapper) {
428428

429+
430+
if (vector != null && recordDefinition.getVectorFields().isEmpty()) {
431+
throw new SKException("Record definition must contain at least one vector field"
432+
+ " to perform a vector search");
433+
}
434+
429435
// Gets the search vector field and its distance function. If not vector field was provided,
430436
// use the first one
431-
VectorStoreRecordVectorField vectorField = options.getVectorFieldName() == null
432-
? recordDefinition.getVectorFields().get(0)
433-
: (VectorStoreRecordVectorField) recordDefinition
434-
.getField(options.getVectorFieldName());
435-
DistanceFunction distanceFunction = vectorField == null ? null : vectorField.getDistanceFunction();
436-
if (options.getVectorFieldName() != null && vectorField == null) {
437-
throw new SKException("");
437+
VectorStoreRecordVectorField vectorField = null;
438+
if (vector != null) {
439+
vectorField = getVectorFieldByName(recordDefinition, options.getVectorFieldName());
438440
}
439441

442+
443+
440444
// get list of fields that should be returned by the query
441445
List<VectorStoreRecordField> fields = (options.isIncludeVectors())
442446
? recordDefinition.getAllFields()
@@ -449,7 +453,9 @@ public <Record> VectorSearchResults<Record> search(String collectionName, List<F
449453
// generate SQL statement
450454
String selectQuery = "SELECT "
451455
+ (vector == null ? "0 as distance, " :
452-
formatQuery("VECTOR_DISTANCE(%s, ?, %s) distance, ", vectorField.getEffectiveStorageName(), toOracleDistanceFunction(distanceFunction)))
456+
formatQuery("VECTOR_DISTANCE(%s, ?, %s) distance, ",
457+
OracleVectorStoreFieldHelper.validateObjectNaming(vectorField.getEffectiveStorageName()),
458+
toOracleDistanceFunction(vectorField.getDistanceFunction())))
453459
+ getQueryColumnsFromFields(fields)
454460
+ " FROM " + getCollectionTableName(collectionName)
455461
+ (filter != null && !filter.isEmpty() ? " WHERE " + filter : "")
@@ -504,7 +510,7 @@ public <Record> VectorSearchResults<Record> search(String collectionName, List<F
504510
while (rs.next()) {
505511
// Cosine distance function. 1 - cosine similarity.
506512
double score = Math.abs(rs.getDouble("distance"));
507-
if (distanceFunction == DistanceFunction.COSINE_SIMILARITY) {
513+
if (vector != null && vectorField.getDistanceFunction() == DistanceFunction.COSINE_SIMILARITY) {
508514
score = 1d - score;
509515
}
510516
// Use the mapper to convert to result set to records
@@ -518,6 +524,27 @@ public <Record> VectorSearchResults<Record> search(String collectionName, List<F
518524
return new VectorSearchResults<>(records);
519525
}
520526

527+
private VectorStoreRecordVectorField getVectorFieldByName(
528+
VectorStoreRecordDefinition recordDefinition,
529+
String name) {
530+
VectorStoreRecordField vectorField;
531+
if (name != null) {
532+
vectorField = recordDefinition.getField(name);
533+
if (vectorField == null) {
534+
throw new SKException("Vector field not found in record definition");
535+
}
536+
if (!(vectorField instanceof VectorStoreRecordVectorField)) {
537+
throw new SKException("Invalid type");
538+
}
539+
} else {
540+
if (recordDefinition.getVectorFields().isEmpty()) {
541+
throw new SKException("Record definition should contain at least one vector field");
542+
}
543+
vectorField = recordDefinition.getVectorFields().get(0);
544+
}
545+
return (VectorStoreRecordVectorField)vectorField;
546+
}
547+
521548
/**
522549
* Sets the parameter value
523550
* @param statement the statement

0 commit comments

Comments
 (0)