|
22 | 22 | from __future__ import annotations |
23 | 23 |
|
24 | 24 | import warnings |
| 25 | +from enum import Enum |
25 | 26 | from typing import ( |
26 | 27 | TYPE_CHECKING, |
27 | 28 | Any, |
|
33 | 34 | overload, |
34 | 35 | ) |
35 | 36 |
|
| 37 | +import pyarrow as pa |
36 | 38 | from typing_extensions import deprecated |
37 | 39 |
|
| 40 | +from datafusion import functions as f |
| 41 | +from datafusion._internal import DataFrame as DataFrameInternal |
| 42 | +from datafusion.expr import Expr, SortExpr, sort_or_default |
38 | 43 | from datafusion.plan import ExecutionPlan, LogicalPlan |
39 | 44 | from datafusion.record_batch import RecordBatchStream |
40 | 45 |
|
41 | | -import pyarrow as pa |
42 | | -from datafusion import functions as f |
43 | | - |
44 | 46 | if TYPE_CHECKING: |
45 | 47 | import pathlib |
46 | 48 | from typing import Callable, Sequence |
47 | 49 |
|
48 | 50 | import pandas as pd |
49 | 51 | import polars as pl |
50 | | - |
51 | | -from enum import Enum |
52 | | - |
53 | | -from datafusion._internal import DataFrame as DataFrameInternal |
54 | | -from datafusion.expr import Expr, SortExpr, sort_or_default |
55 | | - |
56 | | - |
57 | 52 | # excerpt from deltalake |
58 | 53 | # https://github.com/apache/datafusion-python/pull/981#discussion_r1905619163 |
59 | 54 | class Compression(Enum): |
@@ -868,14 +863,14 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame": |
868 | 863 |
|
869 | 864 | Examples: |
870 | 865 | >>> df = df.fill_null(0) # Fill all nulls with 0 where possible |
871 | | - >>> df = df.fill_null("missing", subset=["name", "category"]) # Fill string columns |
| 866 | + >>> # Fill nulls in specific string columns |
| 867 | + >>> df = df.fill_null("missing", subset=["name", "category"]) |
872 | 868 |
|
873 | 869 | Notes: |
874 | 870 | - Only fills nulls in columns where the value can be cast to the column type |
875 | 871 | - For columns where casting fails, the original column is kept unchanged |
876 | 872 | - For columns not in subset, the original column is kept unchanged |
877 | 873 | """ |
878 | | - |
879 | 874 | # Get columns to process |
880 | 875 | if subset is None: |
881 | 876 | subset = self.schema().names |
@@ -910,29 +905,28 @@ def fill_null(self, value: Any, subset: list[str] | None = None) -> "DataFrame": |
910 | 905 |
|
911 | 906 | return self.select(*exprs) |
912 | 907 |
|
913 | | - def fill_nan( |
914 | | - self, value: float | int, subset: list[str] | None = None |
915 | | - ) -> "DataFrame": |
| 908 | + def fill_nan(self, value: float | int, subset: list[str] | None = None) -> "DataFrame": |
916 | 909 | """Fill NaN values in specified numeric columns with a value. |
917 | 910 |
|
918 | 911 | Args: |
919 | | - value: Numeric value to replace NaN values with |
920 | | - subset: Optional list of column names to fill. If None, fills all numeric columns. |
| 912 | + value: Numeric value to replace NaN values with. |
| 913 | + subset: Optional list of column names to fill. If None, fills all numeric |
| 914 | + columns. |
921 | 915 |
|
922 | 916 | Returns: |
923 | | - DataFrame with NaN values replaced in numeric columns |
| 917 | + DataFrame with NaN values replaced in numeric columns. |
924 | 918 |
|
925 | 919 | Examples: |
926 | 920 | >>> df = df.fill_nan(0) # Fill all NaNs with 0 in numeric columns |
927 | | - >>> df = df.fill_nan(99.9, subset=["price", "score"]) # Fill specific columns |
| 921 | + >>> # Fill NaNs in specific numeric columns |
| 922 | + >>> df = df.fill_nan(99.9, subset=["price", "score"]) |
928 | 923 |
|
929 | 924 | Notes: |
930 | 925 | - Only fills NaN values in numeric columns (float32, float64) |
931 | 926 | - Non-numeric columns are kept unchanged |
932 | 927 | - For columns not in subset, the original column is kept unchanged |
933 | 928 | - Value must be numeric (int or float) |
934 | 929 | """ |
935 | | - |
936 | 930 | if not isinstance(value, (int, float)): |
937 | 931 | raise ValueError("Value must be numeric (int or float)") |
938 | 932 |
|
|
0 commit comments