@@ -30,6 +30,29 @@ def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
3030 return _invoke_function (name , * cols )
3131
3232
33+ def _nan_constant () -> Expression :
34+ """Create a NaN constant expression.
35+
36+ Note: ConstantExpression(float("nan")) returns NULL instead of NaN because
37+ TransformPythonValue() in the C++ layer has nan_as_null=true by default.
38+ This is intentional for data import scenarios (CSV, Pandas, etc.) where NaN
39+ represents missing data.
40+
41+ For mathematical functions that need to return NaN (not NULL) for out-of-range
42+ inputs per PySpark/IEEE 754 semantics, we use SQLExpression as a workaround.
43+
44+ Returns:
45+ -------
46+ Expression
47+ An expression that evaluates to NaN (not NULL)
48+
49+ See Also:
50+ --------
51+ NAN_ROOT_CAUSE_ANALYSIS.md for full explanation
52+ """
53+ return SQLExpression ("'NaN'::DOUBLE" )
54+
55+
3356def col (column : str ) -> Column : # noqa: D103
3457 return Column (ColumnExpression (column ))
3558
@@ -617,11 +640,9 @@ def asin(col: "ColumnOrName") -> Column:
617640 +--------+
618641 """
619642 col = _to_column_expr (col )
620- # TODO: ConstantExpression(float("nan")) gives NULL and not NaN # noqa: TD002, TD003
643+ # asin domain is [-1, 1]; return NaN for out-of-range values per PySpark semantics
621644 return Column (
622- CaseExpression ((col < - 1.0 ) | (col > 1.0 ), ConstantExpression (float ("nan" ))).otherwise (
623- FunctionExpression ("asin" , col )
624- )
645+ CaseExpression ((col < - 1.0 ) | (col > 1.0 ), _nan_constant ()).otherwise (FunctionExpression ("asin" , col ))
625646 )
626647
627648
@@ -4177,7 +4198,11 @@ def acos(col: "ColumnOrName") -> Column:
41774198 | NaN|
41784199 +--------+
41794200 """
4180- return _invoke_function_over_columns ("acos" , col )
4201+ col = _to_column_expr (col )
4202+ # acos domain is [-1, 1]; return NaN for out-of-range values per PySpark semantics
4203+ return Column (
4204+ CaseExpression ((col < - 1.0 ) | (col > 1.0 ), _nan_constant ()).otherwise (FunctionExpression ("acos" , col ))
4205+ )
41814206
41824207
41834208def call_function (funcName : str , * cols : "ColumnOrName" ) -> Column :
0 commit comments