feast-dev
diff --git a/‎protos/feast/core/Transformation.proto‎
Lines changed: 1 addition & 1 deletion b/‎protos/feast/core/Transformation.proto‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sdk/python/feast/batch_feature_view.py‎
Lines changed: 2 additions & 2 deletions b/‎sdk/python/feast/batch_feature_view.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎sdk/python/feast/cli/cli.py‎
Lines changed: 1 addition & 0 deletions b/‎sdk/python/feast/cli/cli.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎sdk/python/feast/feature_view.py‎
Lines changed: 6 additions & 1 deletion b/‎sdk/python/feast/feature_view.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎sdk/python/feast/infra/compute_engines/feature_builder.py‎
Lines changed: 9 additions & 0 deletions b/‎sdk/python/feast/infra/compute_engines/feature_builder.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sdk/python/feast/infra/compute_engines/ray/feature_builder.py‎
Lines changed: 2 additions & 0 deletions b/‎sdk/python/feast/infra/compute_engines/ray/feature_builder.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sdk/python/feast/infra/compute_engines/ray/nodes.py‎
Lines changed: 60 additions & 24 deletions b/‎sdk/python/feast/infra/compute_engines/ray/nodes.py‎
Lines changed: 60 additions & 24 deletions
@@ -16,7 +16,7 @@ message UserDefinedFunctionV2 {
     // The string representation of the udf
     string body_text = 3;
 
-    // The transformation mode (e.g., "python", "pandas", "spark", "sql")
+    // The transformation mode (e.g., "python", "pandas", "ray", "spark", "sql")
     string mode = 4;
 }
 
 
@@ -57,7 +57,6 @@ class BatchFeatureView(FeatureView):
     """
 
     name: str
-    mode: Union[TransformationMode, str]
     entities: List[str]
     ttl: Optional[timedelta]
     source: DataSource
@@ -146,7 +145,8 @@ def get_feature_transformation(self) -> Optional[Transformation]:
             TransformationMode.PANDAS,
             TransformationMode.PYTHON,
             TransformationMode.SQL,
-        ) or self.mode in ("pandas", "python", "sql"):
+            TransformationMode.RAY,
+        ) or self.mode in ("pandas", "python", "sql", "ray"):
             return Transformation(
                 mode=self.mode, udf=self.udf, udf_string=self.udf_string or ""
             )
 
@@ -411,6 +411,7 @@ def materialize_incremental_command(ctx: click.Context, end_ts: str, views: List
             "couchbase",
             "milvus",
             "ray",
+            "ray_rag",
         ],
         case_sensitive=False,
     ),
 
@@ -87,6 +87,9 @@ class FeatureView(BaseFeatureView):
         tags: A dictionary of key-value pairs to store arbitrary metadata.
         owner: The owner of the feature view, typically the email of the primary
             maintainer.
+        mode: The transformation mode for feature transformations. Only meaningful when
+            transformations are applied. Choose from TransformationMode enum values
+            (e.g., PYTHON, PANDAS, RAY, SQL, SPARK, SUBSTRAIT).
     """
 
     name: str
@@ -143,7 +146,8 @@ def __init__(
             tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
             owner (optional): The owner of the feature view, typically the email of the
                 primary maintainer.
-            mode (optional): The transformation mode to use (e.g., python, pandas, spark, sql).
+            mode (optional): The transformation mode for feature transformations. Only meaningful
+                when transformations are applied. Choose from TransformationMode enum values.
 
         Raises:
             ValueError: A field mapping conflicts with an Entity or a Feature.
@@ -152,6 +156,7 @@ def __init__(
         self.entities = [e.name for e in entities] if entities else [DUMMY_ENTITY_NAME]
         self.ttl = ttl
         schema = schema or []
+        self.mode = mode
 
         # Normalize source
         self.stream_source = None
 
@@ -154,6 +154,15 @@ def get_column_info(
         )
         field_mapping = self.get_field_mapping(self.task.feature_view)
 
+        # For feature views with transformations that need access to all source columns,
+        # we need to read ALL source columns, not just the output feature columns.
+        # This is specifically for transformations that create new columns or need raw data.
+        mode = getattr(getattr(view, "feature_transformation", None), "mode", None)
+        if mode == "ray" or getattr(mode, "value", None) == "ray":
+            # Signal to read all columns by passing empty list for feature_cols
+            # The transformation will produce the output columns defined in the schema
+            feature_cols = []
+
         return ColumnInfo(
             join_keys=join_keys,
             feature_cols=feature_cols,
 
@@ -161,6 +161,7 @@ def build_output_nodes(self, view, final_node):
             name="output",
             feature_view=view,
             inputs=[final_node],
+            config=self.config,
         )
 
         self.nodes.append(node)
@@ -275,6 +276,7 @@ def _build_materialization_plan(self) -> ExecutionPlan:
                 name=f"{view.name}:write",
                 feature_view=view,
                 inputs=[processing_node],
+                config=self.config,
             )
 
             view_to_write_node[view.name] = write_node
 
@@ -173,7 +173,9 @@ def join_with_aggregated_features(batch: pd.DataFrame) -> pd.DataFrame:
                 return result
 
             joined_dataset = entity_dataset.map_batches(
-                join_with_aggregated_features, batch_format="pandas"
+                join_with_aggregated_features,
+                batch_format="pandas",
+                concurrency=self.config.max_workers or 12,
             )
         else:
             if feature_size <= self.config.broadcast_join_threshold_mb * 1024 * 1024:
@@ -274,8 +276,8 @@ def apply_filters(batch: pd.DataFrame) -> pd.DataFrame:
                     else:
                         # Use current time for TTL calculation (real-time retrieval)
                         # Check if timestamp column is timezone-aware
-                        if pd.api.types.is_datetime64tz_dtype(
-                            filtered_batch[timestamp_col]
+                        if isinstance(
+                            filtered_batch[timestamp_col].dtype, pd.DatetimeTZDtype
                         ):
                             # Use timezone-aware current time
                             current_time = datetime.now(timezone.utc)
@@ -517,31 +519,59 @@ def execute(self, context: ExecutionContext) -> DAGValue:
         input_value.assert_format(DAGFormat.RAY)
         dataset: Dataset = input_value.data
 
-        transformation_serialized = None
-        if hasattr(self.transformation, "udf") and callable(self.transformation.udf):
-            transformation_serialized = dill.dumps(self.transformation.udf)
-        elif callable(self.transformation):
-            transformation_serialized = dill.dumps(self.transformation)
+        # Check transformation mode
+        from feast.transformation.mode import TransformationMode
 
-        @safe_batch_processor
-        def apply_transformation_with_serialized_udf(
-            batch: pd.DataFrame,
-        ) -> pd.DataFrame:
-            """Apply the transformation using pre-serialized UDF."""
-            if transformation_serialized:
-                transformation_func = dill.loads(transformation_serialized)
-                transformed_batch = transformation_func(batch)
+        transformation_mode = getattr(
+            self.transformation, "mode", TransformationMode.PYTHON
+        )
+        is_ray_native = transformation_mode in (TransformationMode.RAY, "ray")
+        if is_ray_native:
+            transformation_func = None
+            if hasattr(self.transformation, "udf") and callable(
+                self.transformation.udf
+            ):
+                transformation_func = self.transformation.udf
+            elif callable(self.transformation):
+                transformation_func = self.transformation
+
+            if transformation_func:
+                transformed_dataset = transformation_func(dataset)
             else:
                 logger.warning(
-                    "No serialized transformation available, returning original batch"
+                    "No transformation function available in RAY mode, returning original dataset"
                 )
-                transformed_batch = batch
+                transformed_dataset = dataset
+        else:
+            transformation_serialized = None
+            if hasattr(self.transformation, "udf") and callable(
+                self.transformation.udf
+            ):
+                transformation_serialized = dill.dumps(self.transformation.udf)
+            elif callable(self.transformation):
+                transformation_serialized = dill.dumps(self.transformation)
 
-            return transformed_batch
+            @safe_batch_processor
+            def apply_transformation_with_serialized_udf(
+                batch: pd.DataFrame,
+            ) -> pd.DataFrame:
+                """Apply the transformation using pre-serialized UDF."""
+                if transformation_serialized:
+                    transformation_func = dill.loads(transformation_serialized)
+                    transformed_batch = transformation_func(batch)
+                else:
+                    logger.warning(
+                        "No serialized transformation available, returning original batch"
+                    )
+                    transformed_batch = batch
 
-        transformed_dataset = dataset.map_batches(
-            apply_transformation_with_serialized_udf, batch_format="pandas"
-        )
+                return transformed_batch
+
+            transformed_dataset = dataset.map_batches(
+                apply_transformation_with_serialized_udf,
+                batch_format="pandas",
+                concurrency=self.config.max_workers or 12,
+            )
 
         return DAGValue(
             data=transformed_dataset,
@@ -598,7 +628,9 @@ def apply_transformation(batch: pd.DataFrame) -> pd.DataFrame:
                     return transformation_func(batch)
 
                 transformed_dataset = parent_value.data.map_batches(
-                    apply_transformation
+                    apply_transformation,
+                    batch_format="pandas",
+                    concurrency=self.config.max_workers or 12,
                 )
                 return DAGValue(
                     data=transformed_dataset,
@@ -630,9 +662,11 @@ def __init__(
         name: str,
         feature_view: Union[BatchFeatureView, StreamFeatureView, FeatureView],
         inputs=None,
+        config: Optional[RayComputeEngineConfig] = None,
     ):
         super().__init__(name, inputs=inputs)
         self.feature_view = feature_view
+        self.config = config
 
     def execute(self, context: ExecutionContext) -> DAGValue:
         """Execute the write operation."""
@@ -676,7 +710,9 @@ def write_batch_with_serialized_artifacts(batch: pd.DataFrame) -> pd.DataFrame:
             return batch
 
         written_dataset = dataset.map_batches(
-            write_batch_with_serialized_artifacts, batch_format="pandas"
+            write_batch_with_serialized_artifacts,
+            batch_format="pandas",
+            concurrency=self.config.max_workers if self.config else 12,
         )
         written_dataset = written_dataset.materialize()
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@ message UserDefinedFunctionV2 {`
`16`	`16`	`// The string representation of the udf`
`17`	`17`	`string body_text = 3;`
`18`	`18`
`19`		`- // The transformation mode (e.g., "python", "pandas", "spark", "sql")`
	`19`	`+ // The transformation mode (e.g., "python", "pandas", "ray", "spark", "sql")`
`20`	`20`	`string mode = 4;`
`21`	`21`	`}`
`22`	`22`
Original file line number	Diff line number	Diff line change
`@@ -161,6 +161,7 @@ def build_output_nodes(self, view, final_node):`
`161`	`161`	`name="output",`
`162`	`162`	`feature_view=view,`
`163`	`163`	`inputs=[final_node],`
	`164`	`+ config=self.config,`
`164`	`165`	`)`
`165`	`166`
`166`	`167`	`self.nodes.append(node)`
`@@ -275,6 +276,7 @@ def _build_materialization_plan(self) -> ExecutionPlan:`
`275`	`276`	`name=f"{view.name}:write",`
`276`	`277`	`feature_view=view,`
`277`	`278`	`inputs=[processing_node],`
	`279`	`+ config=self.config,`
`278`	`280`	`)`
`279`	`281`
`280`	`282`	`view_to_write_node[view.name] = write_node`