Skip to content

Commit d8f1c97

Browse files
authored
feat: Vector length definition moved to Feature View from Config (feast-dev#5289)
* No vector length - Postgress Signed-off-by: jyejare <[email protected]> * Field of Vector length Signed-off-by: jyejare <[email protected]> * SQLite vector length Signed-off-by: jyejare <[email protected]> * ElasticSearch vector length Signed-off-by: jyejare <[email protected]> * Qdrant Vector length Signed-off-by: jyejare <[email protected]> * Test vector_length updates and related Fixes Signed-off-by: jyejare <[email protected]> * Vector length cleanup for Store confgis Signed-off-by: jyejare <[email protected]> * All Protos regenerated Signed-off-by: jyejare <[email protected]> * Vector len param renamed to Vector length Signed-off-by: jyejare <[email protected]> * On Demand feature view transformation updated Signed-off-by: jyejare <[email protected]> --------- Signed-off-by: jyejare <[email protected]>
1 parent bb1cbea commit d8f1c97

File tree

84 files changed

+1941
-401
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1941
-401
lines changed

docs/reference/online-stores/elasticsearch.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ online_store:
2121
port: ES_PORT
2222
user: ES_USERNAME
2323
password: ES_PASSWORD
24-
vector_len: 512
2524
write_batch_size: 1000
2625
```
2726
{% endcode %}
@@ -88,7 +87,7 @@ Currently, the indexing mapping in the ElasticSearch online store is configured
8887
"created_ts": {"type": "date"},
8988
"vector_value": {
9089
"type": "dense_vector",
91-
"dims": config.online_store.vector_len,
90+
"dims": vector_field_length,
9291
"index": "true",
9392
"similarity": config.online_store.similarity,
9493
},

docs/reference/online-stores/postgres.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ online_store:
3131
sslcert_path: /path/to/client-cert.pem
3232
sslrootcert_path: /path/to/server-ca.pem
3333
vector_enabled: false
34-
vector_len: 512
3534
```
3635
{% endcode %}
3736
@@ -67,7 +66,7 @@ To compare this set of functionality against other online stores, please see the
6766
The Postgres online store supports the use of [PGVector](https://github.com/pgvector/pgvector) for storing feature values.
6867
To enable PGVector, set `vector_enabled: true` in the online store configuration.
6968

70-
The `vector_len` parameter can be used to specify the length of the vector. The default value is 512.
69+
The `vector_length` parameter can be used to specify the length of the vector in the Field.
7170

7271
Please make sure to follow the instructions in the repository, which, as the time of this writing, requires you to
7372
run `CREATE EXTENSION vector;` in the database.

docs/reference/online-stores/qdrant.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ online_store:
2020
type: qdrant
2121
host: localhost
2222
port: 6333
23-
vector_len: 384
2423
write_batch_size: 100
2524
```
2625

protos/feast/core/Feature.proto

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,7 @@ message FeatureSpecV2 {
4242

4343
// Metric used for vector similarity search.
4444
string vector_search_metric = 6;
45+
46+
// Field indicating the vector length
47+
int32 vector_length = 7;
4548
}

sdk/python/feast/feature_store.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
from feast.feast_object import FeastObject
6363
from feast.feature_service import FeatureService
6464
from feast.feature_view import DUMMY_ENTITY, DUMMY_ENTITY_NAME, FeatureView
65-
from feast.field import Field
6665
from feast.inference import (
6766
update_data_sources_with_inferred_event_timestamp_col,
6867
update_feature_views_with_inferred_features_and_entities,
@@ -91,7 +90,7 @@
9190
from feast.stream_feature_view import StreamFeatureView
9291
from feast.transformation.pandas_transformation import PandasTransformation
9392
from feast.transformation.python_transformation import PythonTransformation
94-
from feast.utils import _utc_now
93+
from feast.utils import _get_feature_view_vector_field_metadata, _utc_now
9594

9695
warnings.simplefilter("once", DeprecationWarning)
9796

@@ -856,7 +855,6 @@ def apply(
856855
if not isinstance(objects, Iterable):
857856
objects = [objects]
858857
assert isinstance(objects, list)
859-
860858
if not objects_to_delete:
861859
objects_to_delete = []
862860

@@ -1555,6 +1553,18 @@ def _get_feature_view_and_df_for_online_write(
15551553
except Exception as _:
15561554
raise DataFrameSerializationError(df)
15571555

1556+
if feature_view.features[0].vector_index and df is not None:
1557+
fv_vector_feature_name = feature_view.features[0].name
1558+
df_vector_feature_index = df.columns.get_loc(fv_vector_feature_name)
1559+
if feature_view.features[0].vector_length != 0:
1560+
if (
1561+
df.shape[df_vector_feature_index]
1562+
> feature_view.features[0].vector_length
1563+
):
1564+
raise ValueError(
1565+
f"The dataframe for {fv_vector_feature_name} column has {df.shape[1]} vectors which is greater than expected (i.e {feature_view.features[0].vector_length}) by feature view {feature_view.name}."
1566+
)
1567+
15581568
# # Apply transformations if this is an OnDemandFeatureView with write_to_online_store=True
15591569
if (
15601570
isinstance(feature_view, OnDemandFeatureView)
@@ -2502,16 +2512,3 @@ def _validate_data_sources(data_sources: List[DataSource]):
25022512
raise DataSourceRepeatNamesException(case_insensitive_ds_name)
25032513
else:
25042514
ds_names.add(case_insensitive_ds_name)
2505-
2506-
2507-
def _get_feature_view_vector_field_metadata(
2508-
feature_view: FeatureView,
2509-
) -> Optional[Field]:
2510-
vector_fields = [field for field in feature_view.schema if field.vector_index]
2511-
if len(vector_fields) > 1:
2512-
raise ValueError(
2513-
f"Feature view {feature_view.name} has multiple vector fields. Only one vector field per feature view is supported."
2514-
)
2515-
if not vector_fields:
2516-
return None
2517-
return vector_fields[0]

sdk/python/feast/field.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class Field:
3333
description: A human-readable description.
3434
tags: User-defined metadata in dictionary form.
3535
vector_index: If set to True the field will be indexed for vector similarity search.
36+
vector_length: The length of the vector if the vector index is set to True.
3637
vector_search_metric: The metric used for vector similarity search.
3738
"""
3839

@@ -41,6 +42,7 @@ class Field:
4142
description: str
4243
tags: Dict[str, str]
4344
vector_index: bool
45+
vector_length: int
4446
vector_search_metric: Optional[str]
4547

4648
def __init__(
@@ -51,6 +53,7 @@ def __init__(
5153
description: str = "",
5254
tags: Optional[Dict[str, str]] = None,
5355
vector_index: bool = False,
56+
vector_length: int = 0,
5457
vector_search_metric: Optional[str] = None,
5558
):
5659
"""
@@ -69,6 +72,7 @@ def __init__(
6972
self.description = description
7073
self.tags = tags or {}
7174
self.vector_index = vector_index
75+
self.vector_length = vector_length
7276
self.vector_search_metric = vector_search_metric
7377

7478
def __eq__(self, other):
@@ -80,6 +84,7 @@ def __eq__(self, other):
8084
or self.dtype != other.dtype
8185
or self.description != other.description
8286
or self.tags != other.tags
87+
or self.vector_length != other.vector_length
8388
# or self.vector_index != other.vector_index
8489
# or self.vector_search_metric != other.vector_search_metric
8590
):
@@ -100,6 +105,7 @@ def __repr__(self):
100105
f" description={self.description!r},\n"
101106
f" tags={self.tags!r}\n"
102107
f" vector_index={self.vector_index!r}\n"
108+
f" vector_length={self.vector_length!r}\n"
103109
f" vector_search_metric={self.vector_search_metric!r}\n"
104110
f")"
105111
)
@@ -117,6 +123,7 @@ def to_proto(self) -> FieldProto:
117123
description=self.description,
118124
tags=self.tags,
119125
vector_index=self.vector_index,
126+
vector_length=self.vector_length,
120127
vector_search_metric=vector_search_metric,
121128
)
122129

@@ -131,12 +138,14 @@ def from_proto(cls, field_proto: FieldProto):
131138
value_type = ValueType(field_proto.value_type)
132139
vector_search_metric = getattr(field_proto, "vector_search_metric", "")
133140
vector_index = getattr(field_proto, "vector_index", False)
141+
vector_length = getattr(field_proto, "vector_length", 0)
134142
return cls(
135143
name=field_proto.name,
136144
dtype=from_value_type(value_type=value_type),
137145
tags=dict(field_proto.tags),
138146
description=field_proto.description,
139147
vector_index=vector_index,
148+
vector_length=vector_length,
140149
vector_search_metric=vector_search_metric,
141150
)
142151

sdk/python/feast/infra/online_stores/elasticsearch_online_store/elasticsearch.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
1919
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
2020
from feast.repo_config import FeastConfigBaseModel
21-
from feast.utils import _build_retrieve_online_document_record, to_naive_utc
21+
from feast.utils import (
22+
_build_retrieve_online_document_record,
23+
_get_feature_view_vector_field_metadata,
24+
to_naive_utc,
25+
)
2226

2327

2428
class ElasticSearchOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig):
@@ -161,6 +165,10 @@ def create_index(self, config: RepoConfig, table: FeatureView):
161165
config: Feast repo configuration object.
162166
table: FeatureView table for which the index needs to be created.
163167
"""
168+
vector_field_length = getattr(
169+
_get_feature_view_vector_field_metadata(table), "vector_length", 512
170+
)
171+
164172
index_mapping = {
165173
"properties": {
166174
"entity_key": {"type": "binary"},
@@ -170,7 +178,7 @@ def create_index(self, config: RepoConfig, table: FeatureView):
170178
"created_ts": {"type": "date"},
171179
"vector_value": {
172180
"type": "dense_vector",
173-
"dims": config.online_store.vector_len,
181+
"dims": vector_field_length,
174182
"index": "true",
175183
"similarity": config.online_store.similarity,
176184
},

sdk/python/feast/infra/online_stores/postgres_online_store/postgres.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def update(
311311
for table in tables_to_keep:
312312
table_name = _table_id(project, table)
313313
if config.online_store.vector_enabled:
314-
vector_value_type = f"vector({config.online_store.vector_len})"
314+
vector_value_type = "vector"
315315
else:
316316
# keep the vector_value_type as BYTEA if pgvector is not enabled, to maintain compatibility
317317
vector_value_type = "BYTEA"

sdk/python/feast/infra/online_stores/qdrant_online_store/qdrant.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919
from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
2020
from feast.protos.feast.types.Value_pb2 import Value as ValueProto
2121
from feast.repo_config import FeastConfigBaseModel
22-
from feast.utils import _build_retrieve_online_document_record, to_naive_utc
22+
from feast.utils import (
23+
_build_retrieve_online_document_record,
24+
_get_feature_view_vector_field_metadata,
25+
to_naive_utc,
26+
)
2327

2428
SCROLL_SIZE = 1000
2529

@@ -198,13 +202,17 @@ def create_collection(self, config: RepoConfig, table: FeatureView):
198202
table: FeatureView table for which the index needs to be created.
199203
"""
200204

205+
vector_field_length = getattr(
206+
_get_feature_view_vector_field_metadata(table), "vector_length", 512
207+
)
208+
201209
client: QdrantClient = self._get_client(config)
202210

203211
client.create_collection(
204212
collection_name=table.name,
205213
vectors_config={
206214
config.online_store.vector_name: models.VectorParams(
207-
size=config.online_store.vector_len,
215+
size=vector_field_length,
208216
distance=DISTANCE_MAPPING[config.online_store.similarity.lower()],
209217
)
210218
},

sdk/python/feast/infra/online_stores/sqlite.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from feast.types import FEAST_VECTOR_TYPES, PrimitiveFeastType
4444
from feast.utils import (
4545
_build_retrieve_online_document_record,
46+
_get_feature_view_vector_field_metadata,
4647
_serialize_vector_to_float_list,
4748
to_naive_utc,
4849
)
@@ -100,7 +101,6 @@ class SqliteOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig):
100101
""" (optional) Path to sqlite db """
101102

102103
vector_enabled: bool = False
103-
vector_len: Optional[int] = None
104104
text_search_enabled: bool = False
105105

106106

@@ -171,8 +171,13 @@ def online_write_batch(
171171
feature_type_dict.get(feature_name, None)
172172
in FEAST_VECTOR_TYPES
173173
):
174+
vector_field_length = getattr(
175+
_get_feature_view_vector_field_metadata(table),
176+
"vector_length",
177+
512,
178+
)
174179
val_bin = serialize_f32(
175-
val.float_list_val.val, config.online_store.vector_len
180+
val.float_list_val.val, vector_field_length
176181
) # type: ignore
177182
else:
178183
val_bin = feast_value_type_to_python_type(val)
@@ -354,15 +359,19 @@ def retrieve_online_documents(
354359
conn = self._get_conn(config)
355360
cur = conn.cursor()
356361

362+
vector_field_length = getattr(
363+
_get_feature_view_vector_field_metadata(table), "vector_length", 512
364+
)
365+
357366
# Convert the embedding to a binary format instead of using SerializeToString()
358-
query_embedding_bin = serialize_f32(embedding, config.online_store.vector_len)
367+
query_embedding_bin = serialize_f32(embedding, vector_field_length)
359368
table_name = _table_id(project, table)
360369
vector_field = _get_vector_field(table)
361370

362371
cur.execute(
363372
f"""
364373
CREATE VIRTUAL TABLE vec_table using vec0(
365-
vector_value float[{config.online_store.vector_len}]
374+
vector_value float[{vector_field_length}]
366375
);
367376
"""
368377
)
@@ -378,7 +387,7 @@ def retrieve_online_documents(
378387
cur.execute(
379388
f"""
380389
CREATE VIRTUAL TABLE IF NOT EXISTS vec_table using vec0(
381-
vector_value float[{config.online_store.vector_len}]
390+
vector_value float[{vector_field_length}]
382391
);
383392
"""
384393
)
@@ -476,18 +485,19 @@ def retrieve_online_documents_v2(
476485
conn = self._get_conn(config)
477486
cur = conn.cursor()
478487

479-
if online_store.vector_enabled and not online_store.vector_len:
480-
raise ValueError("vector_len is not configured in the online store config")
488+
vector_field_length = getattr(
489+
_get_feature_view_vector_field_metadata(table), "vector_length", 512
490+
)
481491

482492
table_name = _table_id(config.project, table)
483493
vector_field = _get_vector_field(table)
484494

485495
if online_store.vector_enabled:
486-
query_embedding_bin = serialize_f32(query, online_store.vector_len) # type: ignore
496+
query_embedding_bin = serialize_f32(query, vector_field_length) # type: ignore
487497
cur.execute(
488498
f"""
489499
CREATE VIRTUAL TABLE IF NOT EXISTS vec_table using vec0(
490-
vector_value float[{online_store.vector_len}]
500+
vector_value float[{vector_field_length}]
491501
);
492502
"""
493503
)

0 commit comments

Comments
 (0)