224224 StoredProcedureRegistration ,
225225)
226226from snowflake .snowpark .types import (
227+ ArrayType ,
227228 DataType ,
228229 FloatType ,
229230 PandasDataFrameType ,
@@ -3561,20 +3562,67 @@ def _concat_ws_ignore_nulls(sep: str, *cols: ColumnOrName) -> Column:
35613562 |Hello |
35623563 -----------------------------------------------------
35633564 <BLANKLINE>
3565+
3566+ >>> df = session.create_dataframe([
3567+ ... (['Hello', 'World', None], None, '!'),
3568+ ... (['Hi', 'World', "."], "I'm Dad", '.'),
3569+ ... ], schema=['a', 'b', 'c'])
3570+ >>> df.select(_concat_ws_ignore_nulls(", ", "a", "b", "c")).show()
3571+ -----------------------------------------------------
3572+ |"CONCAT_WS_IGNORE_NULLS(', ', ""A"",""B"",""C"")" |
3573+ -----------------------------------------------------
3574+ |Hello, World, ! |
3575+ |Hi, World, ., I'm Dad, . |
3576+ -----------------------------------------------------
3577+ <BLANKLINE>
35643578 """
35653579 # TODO: SNOW-1831917 create ast
35663580 columns = [_to_col_if_str (c , "_concat_ws_ignore_nulls" ) for c in cols ]
35673581 names = "," .join ([c .get_name () for c in columns ])
35683582
3569- input_column_array = array_construct_compact (* columns , _emit_ast = False )
3570- reduced_result = builtin ("reduce" , _emit_ast = False )(
3571- input_column_array ,
3572- lit ("" , _emit_ast = False ),
3573- sql_expr (f"(l, r) -> l || '{ sep } ' || r" ),
3574- )
3575- return substring (reduced_result , len (sep ) + 1 , _emit_ast = False ).alias (
3576- f"CONCAT_WS_IGNORE_NULLS('{ sep } ', { names } )" , _emit_ast = False
3577- )
3583+ # The implementation of this function is as follows with example input of
3584+ # sep = "," and row = [a, NULL], b, NULL, c:
3585+ # 1. Cast all columns to array.
3586+ # [a, NULL], [b], NULL, [c]
3587+ # 2. Combine all arrays into a array of arrays after removing nulls (array_construct_compact).
3588+ # [[a, NULL], [b], [c]]
3589+ # 3. Flatten the array of arrays into a single array (array_flatten).
3590+ # [a, NULL, b, c]
3591+ # 4. Filter out nulls (array_remove_nulls).
3592+ # [a, b, c]
3593+ # 5. Concatenate the non-null values into a single string (concat_strings_with_sep).
3594+ # "a,b,c"
3595+
3596+ def array_remove_nulls (col : Column ) -> Column :
3597+ """Expects an array and returns an array with nulls removed."""
3598+ return builtin ("filter" , _emit_ast = False )(
3599+ col , sql_expr ("x -> NOT IS_NULL_VALUE(x)" , _emit_ast = False )
3600+ )
3601+
3602+ def concat_strings_with_sep (col : Column ) -> Column :
3603+ """
3604+ Expects an array of strings and returns a single string
3605+ with the values concatenated with the separator.
3606+ """
3607+ return substring (
3608+ builtin ("reduce" , _emit_ast = False )(
3609+ col , lit ("" ), sql_expr (f"(l, r) -> l || '{ sep } ' || r" , _emit_ast = False )
3610+ ),
3611+ len (sep ) + 1 ,
3612+ _emit_ast = False ,
3613+ )
3614+
3615+ return concat_strings_with_sep (
3616+ array_remove_nulls (
3617+ array_flatten (
3618+ array_construct_compact (
3619+ * [c .cast (ArrayType (), _emit_ast = False ) for c in columns ],
3620+ _emit_ast = False ,
3621+ ),
3622+ _emit_ast = False ,
3623+ )
3624+ )
3625+ ).alias (f"CONCAT_WS_IGNORE_NULLS('{ sep } ', { names } )" , _emit_ast = False )
35783626
35793627
35803628@publicapi
@@ -3828,6 +3876,19 @@ def date_format(
38283876 |2022/05/15 10:45:00 |
38293877 -----------------------
38303878 <BLANKLINE>
3879+
3880+ Example::
3881+ >>> df = session.sql("select '2023-10-10'::DATE as date_col, '2023-10-10 15:30:00'::TIMESTAMP as timestamp_col")
3882+ >>> df.select(
3883+ ... date_format('date_col', 'YYYY/MM/DD').as_('formatted_dt'),
3884+ ... date_format('timestamp_col', 'YYYY/MM/DD HH:mi:ss').as_('formatted_ts')
3885+ ... ).show()
3886+ ----------------------------------------
3887+ |"FORMATTED_DT" |"FORMATTED_TS" |
3888+ ----------------------------------------
3889+ |2023/10/10 |2023/10/10 15:30:00 |
3890+ ----------------------------------------
3891+ <BLANKLINE>
38313892 """
38323893
38333894 # AST.
@@ -3836,7 +3897,11 @@ def date_format(
38363897 ast = proto .Expr ()
38373898 build_builtin_fn_apply (ast , "date_format" , c , fmt )
38383899
3839- ans = to_char (try_cast (c , TimestampType (), _emit_ast = False ), fmt , _emit_ast = False )
3900+ ans = to_char (
3901+ try_cast (to_char (c , _emit_ast = False ), TimestampType (), _emit_ast = False ),
3902+ fmt ,
3903+ _emit_ast = False ,
3904+ )
38403905 ans ._ast = ast
38413906 return ans
38423907
0 commit comments