|
67 | 67 | update_feature_views_with_inferred_features_and_entities,
|
68 | 68 | )
|
69 | 69 | from feast.infra.infra_object import Infra
|
| 70 | +from feast.infra.offline_stores.offline_utils import ( |
| 71 | + DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL, |
| 72 | +) |
70 | 73 | from feast.infra.provider import Provider, RetrievalJob, get_provider
|
71 | 74 | from feast.infra.registry.base_registry import BaseRegistry
|
72 | 75 | from feast.infra.registry.registry import Registry
|
@@ -1287,6 +1290,115 @@ def get_saved_dataset(self, name: str) -> SavedDataset:
|
1287 | 1290 | )
|
1288 | 1291 | return dataset.with_retrieval_job(retrieval_job)
|
1289 | 1292 |
|
| 1293 | + def _materialize_odfv( |
| 1294 | + self, |
| 1295 | + feature_view: OnDemandFeatureView, |
| 1296 | + start_date: datetime, |
| 1297 | + end_date: datetime, |
| 1298 | + ): |
| 1299 | + """Helper to materialize a single OnDemandFeatureView.""" |
| 1300 | + if not feature_view.source_feature_view_projections: |
| 1301 | + print( |
| 1302 | + f"[WARNING] ODFV {feature_view.name} materialization: No source feature views found." |
| 1303 | + ) |
| 1304 | + return |
| 1305 | + start_date = utils.make_tzaware(start_date) |
| 1306 | + end_date = utils.make_tzaware(end_date) |
| 1307 | + |
| 1308 | + source_features_from_projections = [] |
| 1309 | + all_join_keys = set() |
| 1310 | + entity_timestamp_col_names = set() |
| 1311 | + source_fvs = { |
| 1312 | + self._get_feature_view(p.name) |
| 1313 | + for p in feature_view.source_feature_view_projections.values() |
| 1314 | + } |
| 1315 | + |
| 1316 | + for source_fv in source_fvs: |
| 1317 | + all_join_keys.update(source_fv.entities) |
| 1318 | + if source_fv.batch_source: |
| 1319 | + entity_timestamp_col_names.add(source_fv.batch_source.timestamp_field) |
| 1320 | + |
| 1321 | + for proj in feature_view.source_feature_view_projections.values(): |
| 1322 | + source_features_from_projections.extend( |
| 1323 | + [f"{proj.name}:{f.name}" for f in proj.features] |
| 1324 | + ) |
| 1325 | + |
| 1326 | + all_join_keys = {key for key in all_join_keys if key} |
| 1327 | + |
| 1328 | + if not all_join_keys: |
| 1329 | + print( |
| 1330 | + f"[WARNING] ODFV {feature_view.name} materialization: No join keys found in source views. Cannot create entity_df. Skipping." |
| 1331 | + ) |
| 1332 | + return |
| 1333 | + |
| 1334 | + if len(entity_timestamp_col_names) > 1: |
| 1335 | + print( |
| 1336 | + f"[WARNING] ODFV {feature_view.name} materialization: Found multiple timestamp columns in sources ({entity_timestamp_col_names}). This is not supported. Skipping." |
| 1337 | + ) |
| 1338 | + return |
| 1339 | + |
| 1340 | + if not entity_timestamp_col_names: |
| 1341 | + print( |
| 1342 | + f"[WARNING] ODFV {feature_view.name} materialization: No batch sources with timestamp columns found for sources. Skipping." |
| 1343 | + ) |
| 1344 | + return |
| 1345 | + |
| 1346 | + event_timestamp_col = list(entity_timestamp_col_names)[0] |
| 1347 | + all_source_dfs = [] |
| 1348 | + provider = self._get_provider() |
| 1349 | + |
| 1350 | + for source_fv in source_fvs: |
| 1351 | + if not source_fv.batch_source: |
| 1352 | + continue |
| 1353 | + |
| 1354 | + job = provider.offline_store.pull_latest_from_table_or_query( |
| 1355 | + config=self.config, |
| 1356 | + data_source=source_fv.batch_source, |
| 1357 | + join_key_columns=source_fv.entities, |
| 1358 | + feature_name_columns=[f.name for f in source_fv.features], |
| 1359 | + timestamp_field=source_fv.batch_source.timestamp_field, |
| 1360 | + created_timestamp_column=getattr( |
| 1361 | + source_fv.batch_source, "created_timestamp_column", None |
| 1362 | + ), |
| 1363 | + start_date=start_date, |
| 1364 | + end_date=end_date, |
| 1365 | + ) |
| 1366 | + df = job.to_df() |
| 1367 | + if not df.empty: |
| 1368 | + all_source_dfs.append(df) |
| 1369 | + |
| 1370 | + if not all_source_dfs: |
| 1371 | + print( |
| 1372 | + f"No source data found for ODFV {feature_view.name} in the given time range. Skipping materialization." |
| 1373 | + ) |
| 1374 | + return |
| 1375 | + |
| 1376 | + entity_df_cols = list(all_join_keys) + [event_timestamp_col] |
| 1377 | + all_sources_combined_df = pd.concat(all_source_dfs, ignore_index=True) |
| 1378 | + if all_sources_combined_df.empty: |
| 1379 | + return |
| 1380 | + |
| 1381 | + entity_df = ( |
| 1382 | + all_sources_combined_df[entity_df_cols] |
| 1383 | + .drop_duplicates() |
| 1384 | + .reset_index(drop=True) |
| 1385 | + ) |
| 1386 | + |
| 1387 | + if event_timestamp_col != DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL: |
| 1388 | + entity_df = entity_df.rename( |
| 1389 | + columns={event_timestamp_col: DEFAULT_ENTITY_DF_EVENT_TIMESTAMP_COL} |
| 1390 | + ) |
| 1391 | + |
| 1392 | + retrieval_job = self.get_historical_features( |
| 1393 | + entity_df=entity_df, |
| 1394 | + features=source_features_from_projections, |
| 1395 | + ) |
| 1396 | + input_df = retrieval_job.to_df() |
| 1397 | + transformed_df = self._transform_on_demand_feature_view_df( |
| 1398 | + feature_view, input_df |
| 1399 | + ) |
| 1400 | + self.write_to_online_store(feature_view.name, df=transformed_df) |
| 1401 | + |
1290 | 1402 | def materialize_incremental(
|
1291 | 1403 | self,
|
1292 | 1404 | end_date: datetime,
|
@@ -1332,7 +1444,27 @@ def materialize_incremental(
|
1332 | 1444 | # TODO paging large loads
|
1333 | 1445 | for feature_view in feature_views_to_materialize:
|
1334 | 1446 | if isinstance(feature_view, OnDemandFeatureView):
|
| 1447 | + if feature_view.write_to_online_store: |
| 1448 | + source_fvs = { |
| 1449 | + self._get_feature_view(p.name) |
| 1450 | + for p in feature_view.source_feature_view_projections.values() |
| 1451 | + } |
| 1452 | + max_ttl = timedelta(0) |
| 1453 | + for fv in source_fvs: |
| 1454 | + if fv.ttl and fv.ttl > max_ttl: |
| 1455 | + max_ttl = fv.ttl |
| 1456 | + |
| 1457 | + if max_ttl.total_seconds() > 0: |
| 1458 | + odfv_start_date = end_date - max_ttl |
| 1459 | + else: |
| 1460 | + odfv_start_date = end_date - timedelta(weeks=52) |
| 1461 | + |
| 1462 | + print( |
| 1463 | + f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:" |
| 1464 | + ) |
| 1465 | + self._materialize_odfv(feature_view, odfv_start_date, end_date) |
1335 | 1466 | continue
|
| 1467 | + |
1336 | 1468 | start_date = feature_view.most_recent_end_time
|
1337 | 1469 | if start_date is None:
|
1338 | 1470 | if feature_view.ttl is None:
|
@@ -1428,6 +1560,13 @@ def materialize(
|
1428 | 1560 | )
|
1429 | 1561 | # TODO paging large loads
|
1430 | 1562 | for feature_view in feature_views_to_materialize:
|
| 1563 | + if isinstance(feature_view, OnDemandFeatureView): |
| 1564 | + if feature_view.write_to_online_store: |
| 1565 | + print( |
| 1566 | + f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:" |
| 1567 | + ) |
| 1568 | + self._materialize_odfv(feature_view, start_date, end_date) |
| 1569 | + continue |
1431 | 1570 | provider = self._get_provider()
|
1432 | 1571 | print(f"{Style.BRIGHT + Fore.GREEN}{feature_view.name}{Style.RESET_ALL}:")
|
1433 | 1572 |
|
|
0 commit comments