Skip to content

Commit dd992d3

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 86810f7 + df942b9 commit dd992d3

File tree

11 files changed

+694
-43
lines changed

11 files changed

+694
-43
lines changed

docs/getting-started/faq.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,16 @@ Yes, this is possible. For example, you can use BigQuery as an offline store and
3939

4040
### How do I run `get_historical_features` without providing an entity dataframe?
4141

42-
Feast does not provide a way to do this right now. This is an area we're actively interested in contributions for. See [GitHub issue](https://github.com/feast-dev/feast/issues/1611)
42+
Feast does supports fetching historical features without passing an entity dataframe with the request.
43+
- As of today, only `postgres offline feature store` is supported for entity dataframe less retrieval. Remaining offline stores would be gradually updated to support the entity df less retrieval. The stores would be selected based on priorities and user base/request.
44+
- The retrieval is based on `start_date` and `end_date` parameters to the function. Here are some combinations supported.
45+
- Both params are given, Returns data during the given start to end timerange.
46+
- Only start_date param is given, Returns data from the start date to `now` time.
47+
- Only end_date param is given, Returns data during the end_date minus TTL time in feature view.
48+
- Both params are `not` given, Returns data during the TTL time in feature view to now time.
49+
- When multiple features are requested from multiple feature-views it is required to have entity ids in both of them for `JOIN` so that
50+
51+
This is an area we're actively interested in contributions for. See [GitHub issue](https://github.com/feast-dev/feast/issues/1611)
4352

4453
### Does Feast provide security or access control?
4554

sdk/python/feast/cli/features.py

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
from datetime import datetime
23
from typing import List
34

45
import click
@@ -140,37 +141,69 @@ def get_online_features(ctx: click.Context, entities: List[str], features: List[
140141
"--dataframe",
141142
"-d",
142143
type=str,
143-
required=True,
144144
help='JSON string containing entities and timestamps. Example: \'[{"event_timestamp": "2025-03-29T12:00:00", "driver_id": 1001}]\'',
145145
)
146146
@click.option(
147147
"--features",
148148
"-f",
149149
multiple=True,
150-
required=True,
151150
help="Features to retrieve. feature-view:feature-name ex: driver_hourly_stats:conv_rate",
152151
)
152+
@click.option(
153+
"--start-date",
154+
"-s",
155+
type=str,
156+
help="Start date for historical feature retrieval. Format: YYYY-MM-DD HH:MM:SS",
157+
)
158+
@click.option(
159+
"--end-date",
160+
"-e",
161+
type=str,
162+
help="End date for historical feature retrieval. Format: YYYY-MM-DD HH:MM:SS",
163+
)
153164
@click.pass_context
154-
def get_historical_features(ctx: click.Context, dataframe: str, features: List[str]):
165+
def get_historical_features(
166+
ctx: click.Context,
167+
dataframe: str,
168+
features: List[str],
169+
start_date: str,
170+
end_date: str,
171+
):
155172
"""
156173
Fetch historical feature values for a given entity ID
157174
"""
158175
store = create_feature_store(ctx)
159-
try:
160-
entity_list = json.loads(dataframe)
161-
if not isinstance(entity_list, list):
162-
raise ValueError("Entities must be a list of dictionaries.")
163-
164-
entity_df = pd.DataFrame(entity_list)
165-
entity_df["event_timestamp"] = pd.to_datetime(entity_df["event_timestamp"])
176+
if not dataframe and not start_date and not end_date:
177+
click.echo(
178+
"Either --dataframe or --start-date and/or --end-date must be provided."
179+
)
180+
return
166181

167-
except Exception as e:
168-
click.echo(f"Error parsing entities JSON: {e}", err=True)
182+
if dataframe and (start_date or end_date):
183+
click.echo("Cannot specify both --dataframe and --start-date/--end-date.")
169184
return
170185

186+
entity_df = None
187+
if dataframe:
188+
try:
189+
entity_list = json.loads(dataframe)
190+
if not isinstance(entity_list, list):
191+
raise ValueError("Entities must be a list of dictionaries.")
192+
193+
entity_df = pd.DataFrame(entity_list)
194+
entity_df["event_timestamp"] = pd.to_datetime(entity_df["event_timestamp"])
195+
196+
except Exception as e:
197+
click.echo(f"Error parsing entities JSON: {e}", err=True)
198+
return
199+
171200
feature_vector = store.get_historical_features(
172201
entity_df=entity_df,
173202
features=list(features),
203+
start_date=datetime.strptime(start_date, "%Y-%m-%d %H:%M:%S")
204+
if start_date
205+
else None,
206+
end_date=datetime.strptime(end_date, "%Y-%m-%d %H:%M:%S") if end_date else None,
174207
).to_df()
175208

176209
click.echo(feature_vector.to_json(orient="records", indent=4))

sdk/python/feast/feature_store.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,14 +1087,17 @@ def teardown(self):
10871087

10881088
def get_historical_features(
10891089
self,
1090-
entity_df: Union[pd.DataFrame, str],
1091-
features: Union[List[str], FeatureService],
1090+
entity_df: Optional[Union[pd.DataFrame, str]] = None,
1091+
features: Union[List[str], FeatureService] = [],
10921092
full_feature_names: bool = False,
1093+
start_date: Optional[datetime] = None,
1094+
end_date: Optional[datetime] = None,
10931095
) -> RetrievalJob:
10941096
"""Enrich an entity dataframe with historical feature values for either training or batch scoring.
10951097
10961098
This method joins historical feature data from one or more feature views to an entity dataframe by using a time
1097-
travel join.
1099+
travel join. Alternatively, features can be retrieved for a specific timestamp range without requiring an entity
1100+
dataframe.
10981101
10991102
Each feature view is joined to the entity dataframe using all entities configured for the respective feature
11001103
view. All configured entities must be available in the entity dataframe. Therefore, the entity dataframe must
@@ -1105,16 +1108,21 @@ def get_historical_features(
11051108
TTL may result in null values being returned.
11061109
11071110
Args:
1108-
entity_df (Union[pd.DataFrame, str]): An entity dataframe is a collection of rows containing all entity
1109-
columns (e.g., customer_id, driver_id) on which features need to be joined, as well as a event_timestamp
1110-
column used to ensure point-in-time correctness. Either a Pandas DataFrame can be provided or a string
1111-
SQL query. The query must be of a format supported by the configured offline store (e.g., BigQuery)
11121111
features: The list of features that should be retrieved from the offline store. These features can be
11131112
specified either as a list of string feature references or as a feature service. String feature
11141113
references must have format "feature_view:feature", e.g. "customer_fv:daily_transactions".
1114+
entity_df (Optional[Union[pd.DataFrame, str]]): An entity dataframe is a collection of rows containing all entity
1115+
columns (e.g., customer_id, driver_id) on which features need to be joined, as well as a event_timestamp
1116+
column used to ensure point-in-time correctness. Either a Pandas DataFrame can be provided or a string
1117+
SQL query. The query must be of a format supported by the configured offline store (e.g., BigQuery).
1118+
If not provided, features will be retrieved for the specified timestamp range without entity joins.
11151119
full_feature_names: If True, feature names will be prefixed with the corresponding feature view name,
11161120
changing them from the format "feature" to "feature_view__feature" (e.g. "daily_transactions"
11171121
changes to "customer_fv__daily_transactions").
1122+
start_date (Optional[datetime]): Start date for the timestamp range when retrieving features without entity_df.
1123+
Required when entity_df is not provided.
1124+
end_date (Optional[datetime]): End date for the timestamp range when retrieving features without entity_df.
1125+
Required when entity_df is not provided. By default, the current time is used.
11181126
11191127
Returns:
11201128
RetrievalJob which can be used to materialize the results.
@@ -1147,6 +1155,15 @@ def get_historical_features(
11471155
... )
11481156
>>> feature_data = retrieval_job.to_df()
11491157
"""
1158+
1159+
if entity_df is not None and (start_date is not None or end_date is not None):
1160+
raise ValueError(
1161+
"Cannot specify both entity_df and start_date/end_date. Use either entity_df for entity-based retrieval or start_date/end_date for timestamp range retrieval."
1162+
)
1163+
1164+
if entity_df is None and end_date is None:
1165+
end_date = datetime.now()
1166+
11501167
_feature_refs = utils._get_features(self._registry, self.project, features)
11511168
(
11521169
all_feature_views,
@@ -1180,6 +1197,13 @@ def get_historical_features(
11801197
utils._validate_feature_refs(_feature_refs, full_feature_names)
11811198
provider = self._get_provider()
11821199

1200+
# Optional kwargs
1201+
kwargs = {}
1202+
if start_date is not None:
1203+
kwargs["start_date"] = start_date
1204+
if end_date is not None:
1205+
kwargs["end_date"] = end_date
1206+
11831207
job = provider.get_historical_features(
11841208
self.config,
11851209
feature_views,
@@ -1188,6 +1212,7 @@ def get_historical_features(
11881212
self._registry,
11891213
self.project,
11901214
full_feature_names,
1215+
**kwargs,
11911216
)
11921217

11931218
return job

0 commit comments

Comments
 (0)