Skip to content

Commit c2b7f70

Browse files
authored
.Net: Improve mapper effeciency for pinecone and redis hashsets. (microsoft#9064)
### Motivation and Context Originally we optimized for flexibility when mapping from storage to data models, but this comes at a cost of efficiency. microsoft#9025 ### Description - Updating Redis HashSets and Pinecone mappers to not use Json as an intermediary - Adding restrictions around the types of enumerables supported and that the data model needs a public parameterless constructor. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
1 parent 6d5aa6e commit c2b7f70

File tree

7 files changed

+164
-95
lines changed

7 files changed

+164
-95
lines changed

dotnet/src/Connectors/Connectors.Memory.Pinecone/PineconeVectorStoreRecordMapper.cs

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
using System;
44
using System.Collections.Generic;
55
using System.Linq;
6-
using System.Text.Json;
7-
using System.Text.Json.Nodes;
86
using Microsoft.SemanticKernel.Data;
97
using Pinecone;
108

@@ -61,6 +59,7 @@ public PineconeVectorStoreRecordMapper(
6159
VectorStoreRecordPropertyReader propertyReader)
6260
{
6361
// Validate property types.
62+
propertyReader.VerifyHasParameterlessConstructor();
6463
propertyReader.VerifyKeyProperties(s_supportedKeyTypes);
6564
propertyReader.VerifyDataProperties(s_supportedDataTypes, s_supportedEnumerableDataElementTypes);
6665
propertyReader.VerifyVectorProperties(s_supportedVectorTypes);
@@ -110,52 +109,70 @@ public Vector MapFromDataToStorageModel(TRecord dataModel)
110109
/// <inheritdoc />
111110
public TRecord MapFromStorageToDataModel(Vector storageModel, StorageToDataModelMapperOptions options)
112111
{
113-
var keyJsonName = this._propertyReader.KeyPropertyJsonName;
114-
var outputJsonObject = new JsonObject
115-
{
116-
{ keyJsonName, JsonValue.Create(storageModel.Id) },
117-
};
112+
// Construct the output record.
113+
var outputRecord = (TRecord)this._propertyReader.ParameterLessConstructorInfo.Invoke(null);
118114

115+
// Set Key.
116+
this._propertyReader.KeyPropertyInfo.SetValue(outputRecord, storageModel.Id);
117+
118+
// Set Vector.
119119
if (options?.IncludeVectors is true)
120120
{
121-
var propertyName = this._propertyReader.GetStoragePropertyName(this._propertyReader.FirstVectorPropertyName!);
122-
var jsonName = this._propertyReader.GetJsonPropertyName(this._propertyReader.FirstVectorPropertyName!);
123-
outputJsonObject.Add(jsonName, new JsonArray(storageModel.Values.Select(x => JsonValue.Create(x)).ToArray()));
121+
this._propertyReader.FirstVectorPropertyInfo!.SetValue(
122+
outputRecord,
123+
new ReadOnlyMemory<float>(storageModel.Values));
124124
}
125125

126+
// Set Data.
126127
if (storageModel.Metadata != null)
127128
{
128-
foreach (var dataProperty in this._propertyReader.DataPropertiesInfo)
129-
{
130-
var propertyName = this._propertyReader.GetStoragePropertyName(dataProperty.Name);
131-
var jsonName = this._propertyReader.GetJsonPropertyName(dataProperty.Name);
132-
133-
if (storageModel.Metadata.TryGetValue(propertyName, out var value))
134-
{
135-
outputJsonObject.Add(jsonName, ConvertFromMetadataValueToJsonNode(value));
136-
}
137-
}
129+
VectorStoreRecordMapping.SetValuesOnProperties(
130+
outputRecord,
131+
this._propertyReader.DataPropertiesInfo,
132+
this._propertyReader.StoragePropertyNamesMap,
133+
storageModel.Metadata,
134+
ConvertFromMetadataValueToNativeType);
138135
}
139136

140-
return outputJsonObject.Deserialize<TRecord>()!;
137+
return outputRecord;
141138
}
142139

143-
private static JsonNode? ConvertFromMetadataValueToJsonNode(MetadataValue metadataValue)
140+
private static object? ConvertFromMetadataValueToNativeType(MetadataValue metadataValue, Type targetType)
144141
=> metadataValue.Inner switch
145142
{
146143
null => null,
147-
bool boolValue => JsonValue.Create(boolValue),
148-
string stringValue => JsonValue.Create(stringValue),
149-
int intValue => JsonValue.Create(intValue),
150-
long longValue => JsonValue.Create(longValue),
151-
float floatValue => JsonValue.Create(floatValue),
152-
double doubleValue => JsonValue.Create(doubleValue),
153-
decimal decimalValue => JsonValue.Create(decimalValue),
154-
MetadataValue[] array => new JsonArray(array.Select(ConvertFromMetadataValueToJsonNode).ToArray()),
155-
List<MetadataValue> list => new JsonArray(list.Select(ConvertFromMetadataValueToJsonNode).ToArray()),
144+
bool boolValue => boolValue,
145+
string stringValue => stringValue,
146+
// Numeric values are not always coming from the SDK in the desired type
147+
// that the data model requires, so we need to convert them.
148+
int intValue => ConvertToNumericValue(intValue, targetType),
149+
long longValue => ConvertToNumericValue(longValue, targetType),
150+
float floatValue => ConvertToNumericValue(floatValue, targetType),
151+
double doubleValue => ConvertToNumericValue(doubleValue, targetType),
152+
decimal decimalValue => ConvertToNumericValue(decimalValue, targetType),
153+
MetadataValue[] array => VectorStoreRecordMapping.CreateEnumerable(array.Select(x => ConvertFromMetadataValueToNativeType(x, VectorStoreRecordPropertyVerification.GetCollectionElementType(targetType))), targetType),
154+
List<MetadataValue> list => VectorStoreRecordMapping.CreateEnumerable(list.Select(x => ConvertFromMetadataValueToNativeType(x, VectorStoreRecordPropertyVerification.GetCollectionElementType(targetType))), targetType),
156155
_ => throw new VectorStoreRecordMappingException($"Unsupported metadata type: '{metadataValue.Inner?.GetType().FullName}'."),
157156
};
158157

158+
private static object? ConvertToNumericValue(object? number, Type targetType)
159+
{
160+
if (number is null)
161+
{
162+
return null;
163+
}
164+
165+
return targetType switch
166+
{
167+
Type intType when intType == typeof(int) || intType == typeof(int?) => Convert.ToInt32(number),
168+
Type longType when longType == typeof(long) || longType == typeof(long?) => Convert.ToInt64(number),
169+
Type floatType when floatType == typeof(float) || floatType == typeof(float?) => Convert.ToSingle(number),
170+
Type doubleType when doubleType == typeof(double) || doubleType == typeof(double?) => Convert.ToDouble(number),
171+
Type decimalType when decimalType == typeof(decimal) || decimalType == typeof(decimal?) => Convert.ToDecimal(number),
172+
_ => throw new VectorStoreRecordMappingException($"Unsupported target numeric type '{targetType.FullName}'."),
173+
};
174+
}
175+
159176
// TODO: take advantage of MetadataValue.TryCreate once we upgrade the version of Pinecone.NET
160177
private static MetadataValue ConvertToMetadataValue(object? sourceValue)
161178
=> sourceValue switch

dotnet/src/Connectors/Connectors.Memory.Qdrant/QdrantVectorStoreRecordMapper.cs

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
// Copyright (c) Microsoft. All rights reserved.
22

33
using System;
4-
using System.Collections.Generic;
54
using System.Linq;
6-
using System.Reflection;
75
using Microsoft.SemanticKernel.Data;
86
using Qdrant.Client.Grpc;
97

@@ -123,44 +121,35 @@ public TRecord MapFromStorageToDataModel(PointStruct storageModel, StorageToData
123121
var outputRecord = (TRecord)this._propertyReader.ParameterLessConstructorInfo.Invoke(null);
124122

125123
// Set Key.
126-
var keyPropertyInfoWithValue = new KeyValuePair<PropertyInfo, object?>(
127-
this._propertyReader.KeyPropertyInfo,
128-
keyPropertyValue);
129-
VectorStoreRecordMapping.SetPropertiesOnRecord(
130-
outputRecord,
131-
[keyPropertyInfoWithValue]);
124+
this._propertyReader.KeyPropertyInfo.SetValue(outputRecord, keyPropertyValue);
132125

133126
// Set each vector property if embeddings are included in the point.
134127
if (options?.IncludeVectors is true)
135128
{
136129
if (this._hasNamedVectors)
137130
{
138-
var propertiesInfoWithValues = VectorStoreRecordMapping.BuildPropertiesInfoWithValues(
131+
VectorStoreRecordMapping.SetValuesOnProperties(
132+
outputRecord,
139133
this._propertyReader.VectorPropertiesInfo,
140134
this._propertyReader.StoragePropertyNamesMap,
141135
storageModel.Vectors.Vectors_.Vectors,
142136
(Vector vector, Type targetType) => new ReadOnlyMemory<float>(vector.Data.ToArray()));
143-
VectorStoreRecordMapping.SetPropertiesOnRecord(outputRecord, propertiesInfoWithValues);
144137
}
145138
else
146139
{
147-
var propertyInfoWithValue = new KeyValuePair<PropertyInfo, object?>(
148-
this._propertyReader.FirstVectorPropertyInfo!,
149-
new ReadOnlyMemory<float>(storageModel.Vectors.Vector.Data.ToArray()));
150-
VectorStoreRecordMapping.SetPropertiesOnRecord(
140+
this._propertyReader.FirstVectorPropertyInfo!.SetValue(
151141
outputRecord,
152-
[propertyInfoWithValue]);
142+
new ReadOnlyMemory<float>(storageModel.Vectors.Vector.Data.ToArray()));
153143
}
154144
}
155145

156146
// Set each data property.
157-
var dataPropertiesInfoWithValues = VectorStoreRecordMapping.BuildPropertiesInfoWithValues(
147+
VectorStoreRecordMapping.SetValuesOnProperties(
148+
outputRecord,
158149
this._propertyReader.DataPropertiesInfo,
159150
this._propertyReader.StoragePropertyNamesMap,
160151
storageModel.Payload,
161-
(Value grpcValue, Type targetType) =>
162-
QdrantVectorStoreRecordFieldMapping.ConvertFromGrpcFieldValueToNativeType(grpcValue, targetType));
163-
VectorStoreRecordMapping.SetPropertiesOnRecord(outputRecord, dataPropertiesInfoWithValues);
152+
QdrantVectorStoreRecordFieldMapping.ConvertFromGrpcFieldValueToNativeType);
164153

165154
return outputRecord;
166155
}

dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetGenericDataModelMapper.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ public VectorStoreGenericDataModel<string> MapFromStorageToDataModel((string Key
107107
var convertedValue = Convert.ChangeType(hashEntry.Value, typeOrNullableType);
108108
dataModel.Data.Add(dataProperty.DataModelPropertyName, convertedValue);
109109
}
110+
110111
// Map vector properties
111112
else if (property is VectorStoreRecordVectorProperty vectorProperty)
112113
{

dotnet/src/Connectors/Connectors.Memory.Redis/RedisHashSetVectorStoreRecordMapper.cs

Lines changed: 35 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
using System.Collections.Generic;
55
using System.Linq;
66
using System.Runtime.InteropServices;
7-
using System.Text.Json;
8-
using System.Text.Json.Nodes;
97
using Microsoft.SemanticKernel.Data;
108
using StackExchange.Redis;
119

@@ -29,6 +27,7 @@ public RedisHashSetVectorStoreRecordMapper(
2927
VectorStoreRecordPropertyReader propertyReader)
3028
{
3129
Verify.NotNull(propertyReader);
30+
propertyReader.VerifyHasParameterlessConstructor();
3231
this._propertyReader = propertyReader;
3332
}
3433

@@ -72,58 +71,53 @@ public RedisHashSetVectorStoreRecordMapper(
7271
/// <inheritdoc />
7372
public TConsumerDataModel MapFromStorageToDataModel((string Key, HashEntry[] HashEntries) storageModel, StorageToDataModelMapperOptions options)
7473
{
75-
var jsonObject = new JsonObject();
74+
var hashEntriesDictionary = storageModel.HashEntries.ToDictionary(x => (string)x.Name!, x => x.Value);
7675

77-
foreach (var property in this._propertyReader.DataPropertiesInfo)
78-
{
79-
var storageName = this._propertyReader.GetStoragePropertyName(property.Name);
80-
var jsonName = this._propertyReader.GetJsonPropertyName(property.Name);
81-
var hashEntry = storageModel.HashEntries.FirstOrDefault(x => x.Name == storageName);
82-
if (hashEntry.Name.HasValue)
83-
{
84-
var typeOrNullableType = Nullable.GetUnderlyingType(property.PropertyType) ?? property.PropertyType;
85-
var convertedValue = Convert.ChangeType(hashEntry.Value, typeOrNullableType);
86-
jsonObject.Add(jsonName, JsonValue.Create(convertedValue));
87-
}
88-
}
76+
// Construct the output record.
77+
var outputRecord = (TConsumerDataModel)this._propertyReader.ParameterLessConstructorInfo.Invoke(null);
8978

90-
if (options.IncludeVectors)
91-
{
92-
foreach (var property in this._propertyReader.VectorPropertiesInfo)
93-
{
94-
var storageName = this._propertyReader.GetStoragePropertyName(property.Name);
95-
var jsonName = this._propertyReader.GetJsonPropertyName(property.Name);
79+
// Set Key.
80+
this._propertyReader.KeyPropertyInfo.SetValue(outputRecord, storageModel.Key);
9681

97-
var hashEntry = storageModel.HashEntries.FirstOrDefault(x => x.Name == storageName);
98-
if (hashEntry.Name.HasValue)
82+
// Set each vector property if embeddings should be returned.
83+
if (options?.IncludeVectors is true)
84+
{
85+
VectorStoreRecordMapping.SetValuesOnProperties(
86+
outputRecord,
87+
this._propertyReader.VectorPropertiesInfo,
88+
this._propertyReader.StoragePropertyNamesMap,
89+
hashEntriesDictionary,
90+
(RedisValue vector, Type targetType) =>
9991
{
100-
if (property.PropertyType == typeof(ReadOnlyMemory<float>) || property.PropertyType == typeof(ReadOnlyMemory<float>?))
92+
if (targetType == typeof(ReadOnlyMemory<float>) || targetType == typeof(ReadOnlyMemory<float>?))
10193
{
102-
var array = MemoryMarshal.Cast<byte, float>((byte[])hashEntry.Value!).ToArray();
103-
jsonObject.Add(jsonName, JsonValue.Create(array));
94+
var array = MemoryMarshal.Cast<byte, float>((byte[])vector!).ToArray();
95+
return new ReadOnlyMemory<float>(array);
10496
}
105-
else if (property.PropertyType == typeof(ReadOnlyMemory<double>) || property.PropertyType == typeof(ReadOnlyMemory<double>?))
97+
else if (targetType == typeof(ReadOnlyMemory<double>) || targetType == typeof(ReadOnlyMemory<double>?))
10698
{
107-
var array = MemoryMarshal.Cast<byte, double>((byte[])hashEntry.Value!).ToArray();
108-
jsonObject.Add(jsonName, JsonValue.Create(array));
99+
var array = MemoryMarshal.Cast<byte, double>((byte[])vector!).ToArray();
100+
return new ReadOnlyMemory<double>(array);
109101
}
110102
else
111103
{
112-
throw new VectorStoreRecordMappingException($"Invalid vector type '{property.PropertyType.Name}' found on property '{property.Name}' on provided record of type '{typeof(TConsumerDataModel).FullName}'. Only float and double vectors are supported.");
104+
throw new VectorStoreRecordMappingException($"Unsupported vector type '{targetType}'. Only float and double vectors are supported.");
113105
}
114-
}
115-
}
106+
});
116107
}
117108

118-
// Check that the key field is not already present in the redis value.
119-
if (jsonObject.ContainsKey(this._propertyReader.KeyPropertyJsonName))
120-
{
121-
throw new VectorStoreRecordMappingException($"Invalid data format for document with key '{storageModel.Key}'. Key property '{this._propertyReader.KeyPropertyJsonName}' is already present on retrieved object.");
122-
}
123-
124-
// Since the key is not stored in the redis value, add it back in before deserializing into the data model.
125-
jsonObject.Add(this._propertyReader.KeyPropertyJsonName, storageModel.Key);
109+
// Set each data property.
110+
VectorStoreRecordMapping.SetValuesOnProperties(
111+
outputRecord,
112+
this._propertyReader.DataPropertiesInfo,
113+
this._propertyReader.StoragePropertyNamesMap,
114+
hashEntriesDictionary,
115+
(RedisValue hashValue, Type targetType) =>
116+
{
117+
var typeOrNullableType = Nullable.GetUnderlyingType(targetType) ?? targetType;
118+
return Convert.ChangeType(hashValue, typeOrNullableType);
119+
});
126120

127-
return JsonSerializer.Deserialize<TConsumerDataModel>(jsonObject)!;
121+
return outputRecord;
128122
}
129123
}

dotnet/src/IntegrationTests/Connectors/Memory/Pinecone/PineconeVectorStoreRecordCollectionTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ public void UseRecordWithNoEmbeddingThrows()
410410
"Whatever"));
411411

412412
Assert.Equal(
413-
$"No vector property found on type {typeof(PineconeRecordNoEmbedding).FullName}.",
413+
$"No vector property found on type {nameof(PineconeRecordNoEmbedding)} or the provided VectorStoreRecordDefinition while at least one is required.",
414414
exception.Message);
415415
}
416416

@@ -434,7 +434,7 @@ public void UseRecordWithMultipleEmbeddingsThrows()
434434
"Whatever"));
435435

436436
Assert.Equal(
437-
$"Multiple vector properties found on type {typeof(PineconeRecordMultipleEmbeddings).FullName} while only one is supported.",
437+
$"Multiple vector properties found on type {nameof(PineconeRecordMultipleEmbeddings)} or the provided VectorStoreRecordDefinition while only one is supported.",
438438
exception.Message);
439439
}
440440

0 commit comments

Comments
 (0)