@@ -3971,3 +3971,227 @@ def try_to_geometry(
39713971 return builtin ("try_to_geometry" , _emit_ast = _emit_ast )(c , allow_invalid_col )
39723972 else :
39733973 return builtin ("try_to_geometry" , _emit_ast = _emit_ast )(c )
3974+
3975+
3976+ @publicapi
3977+ def strtok (
3978+ string : ColumnOrName ,
3979+ delimiter : ColumnOrName = None ,
3980+ part_nr : ColumnOrName = None ,
3981+ _emit_ast : bool = True ,
3982+ ) -> Column :
3983+ """
3984+ Tokenizes a string with the given set of delimiters and returns the requested part.
3985+
3986+ Args:
3987+ string (ColumnOrName): The string to be tokenized.
3988+ delimiter (ColumnOrName, optional): A set of delimiters. Each character in the delimiter string is treated as a delimiter. If not specified, defaults to a single space character.
3989+ part_nr (ColumnOrName, optional): The requested part number (1-based). If not specified, returns the entire string.
3990+
3991+ Returns:
3992+ Column: The requested part of the tokenized string.
3993+
3994+ Examples::
3995+ >>> from snowflake.snowpark.functions import col, lit
3996+ >>> df = session.create_dataframe([["a.b.c"]], schema=["string_col"])
3997+ >>> df.select(strtok(col("string_col")).alias("result")).collect()
3998+ [Row(RESULT='a.b.c')]
3999+ >>> df.select(strtok(col("string_col"), lit(".")).alias("result")).collect()
4000+ [Row(RESULT='a')]
4001+ >>> df.select(strtok(col("string_col"), lit("."), lit(2)).alias("result")).collect()
4002+ [Row(RESULT='b')]
4003+ >>> df2 = session.create_dataframe([["[email protected] "]], schema=["string_col"]) 4004+ >>> df2.select(strtok(col("string_col"), lit("@."), lit(1)).alias("result")).collect()
4005+ [Row(RESULT='user')]
4006+ >>> df2.select(strtok(col("string_col"), lit("@."), lit(3)).alias("result")).collect()
4007+ [Row(RESULT='com')]
4008+ """
4009+ string_col = _to_col_if_str (string , "strtok" )
4010+
4011+ if delimiter is None and part_nr is None :
4012+ return builtin ("strtok" , _emit_ast = _emit_ast )(string_col )
4013+ elif part_nr is None :
4014+ delimiter_col = _to_col_if_str (delimiter , "strtok" )
4015+ return builtin ("strtok" , _emit_ast = _emit_ast )(string_col , delimiter_col )
4016+ else :
4017+ delimiter_col = (
4018+ _to_col_if_str (delimiter , "strtok" ) if delimiter is not None else lit (" " )
4019+ )
4020+ part_nr_col = _to_col_if_str (part_nr , "strtok" )
4021+ return builtin ("strtok" , _emit_ast = _emit_ast )(
4022+ string_col , delimiter_col , part_nr_col
4023+ )
4024+
4025+
4026+ @publicapi
4027+ def try_base64_decode_binary (
4028+ input_expr : ColumnOrName , alphabet : ColumnOrName = None , _emit_ast : bool = True
4029+ ) -> Column :
4030+ """
4031+ Decodes a base64-encoded string to binary data. Returns None if the input is not valid base64.
4032+
4033+ Args:
4034+ input_expr (ColumnOrName): The base64-encoded string to decode.
4035+ alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
4036+
4037+ Returns:
4038+ Column: A column containing the decoded binary data, or None if the input is invalid.
4039+
4040+ Examples::
4041+ >>> from snowflake.snowpark.functions import base64_encode
4042+ >>> df = session.create_dataframe(["HELP", "TEST"], schema=["input"])
4043+ >>> df.select(try_base64_decode_binary(base64_encode(df["input"]))).collect()
4044+ [Row(TRY_BASE64_DECODE_BINARY(BASE64_ENCODE("INPUT"))=bytearray(b'HELP')), Row(TRY_BASE64_DECODE_BINARY(BASE64_ENCODE("INPUT"))=bytearray(b'TEST'))]
4045+
4046+ >>> df2 = session.create_dataframe(["SEVMUA==", "VEVTVA=="], schema=["encoded"])
4047+ >>> df2.select(try_base64_decode_binary(df2["encoded"])).collect()
4048+ [Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'HELP')), Row(TRY_BASE64_DECODE_BINARY("ENCODED")=bytearray(b'TEST'))]
4049+
4050+ >>> df3 = session.create_dataframe(["invalid_base64!"], schema=["bad_input"])
4051+ >>> df3.select(try_base64_decode_binary(df3["bad_input"])).collect()
4052+ [Row(TRY_BASE64_DECODE_BINARY("BAD_INPUT")=None)]
4053+ """
4054+ input_col = _to_col_if_str (input_expr , "try_base64_decode_binary" )
4055+
4056+ if alphabet is not None :
4057+ alphabet_col = _to_col_if_str (alphabet , "try_base64_decode_binary" )
4058+ return builtin ("try_base64_decode_binary" , _emit_ast = _emit_ast )(
4059+ input_col , alphabet_col
4060+ )
4061+ else :
4062+ return builtin ("try_base64_decode_binary" , _emit_ast = _emit_ast )(input_col )
4063+
4064+
4065+ @publicapi
4066+ def try_base64_decode_string (
4067+ input_expr : ColumnOrName , alphabet : ColumnOrName = None , _emit_ast : bool = True
4068+ ) -> Column :
4069+ """
4070+ Decodes a base64-encoded string and returns the result. If the input is not a valid base64-encoded string, returns NULL instead of raising an error.
4071+
4072+ Args:
4073+ input_expr (ColumnOrName): A base64-encoded string to decode.
4074+ alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
4075+
4076+ Returns:
4077+ Column: The decoded string, or NULL if the input is not valid base64.
4078+
4079+ Examples::
4080+ >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
4081+ >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
4082+ [Row(RESULT='HELLO')]
4083+
4084+ >>> df = session.create_dataframe([["invalid_base64"]], schema=["encoded"])
4085+ >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
4086+ [Row(RESULT=None)]
4087+
4088+ >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
4089+ >>> df.select(try_base64_decode_string(df["encoded"], lit('$')).alias('result')).collect()
4090+ [Row(RESULT='HELLO')]
4091+ """
4092+ c = _to_col_if_str (input_expr , "try_base64_decode_string" )
4093+ if alphabet is not None :
4094+ alphabet_col = _to_col_if_str (alphabet , "try_base64_decode_string" )
4095+ return builtin ("try_base64_decode_string" , _emit_ast = _emit_ast )(c , alphabet_col )
4096+ else :
4097+ return builtin ("try_base64_decode_string" , _emit_ast = _emit_ast )(c )
4098+
4099+
4100+ @publicapi
4101+ def try_hex_decode_binary (input_expr : ColumnOrName , _emit_ast : bool = True ) -> Column :
4102+ """
4103+ Decodes a hex-encoded string to binary data. Returns None if the input is not a valid hex string.
4104+
4105+ Args:
4106+ input_expr (ColumnOrName): A hex-encoded string to decode to binary data.
4107+
4108+ Returns:
4109+ Column: The decoded binary data as bytearray, or None if input is invalid.
4110+
4111+ Examples::
4112+ >>> from snowflake.snowpark.functions import col
4113+ >>> df = session.create_dataframe([["41426162"], ["48656C6C6F"], ["576F726C64"]], schema=["hex_string"])
4114+ >>> df.select(try_hex_decode_binary(col("hex_string")).alias("decoded_binary")).collect()
4115+ [Row(DECODED_BINARY=bytearray(b'ABab')), Row(DECODED_BINARY=bytearray(b'Hello')), Row(DECODED_BINARY=bytearray(b'World'))]
4116+ """
4117+ c = _to_col_if_str (input_expr , "try_hex_decode_binary" )
4118+ return builtin ("try_hex_decode_binary" , _emit_ast = _emit_ast )(c )
4119+
4120+
4121+ @publicapi
4122+ def try_hex_decode_string (input_expr : ColumnOrName , _emit_ast : bool = True ) -> Column :
4123+ """
4124+ Decodes a hex-encoded string to its original string value. Returns None if the input is not a valid hex string.
4125+
4126+ Args:
4127+ input_expr (ColumnOrName): The hex-encoded string to decode.
4128+
4129+ Returns:
4130+ Column: The decoded string, or None if the input is not valid hex.
4131+
4132+ Examples::
4133+ >>> df = session.create_dataframe([["41614262"], ["127"], ["invalid_hex"]], schema=["hex_input"])
4134+ >>> df.select(try_hex_decode_string(df["hex_input"]).alias("decoded")).collect()
4135+ [Row(DECODED='AaBb'), Row(DECODED=None), Row(DECODED=None)]
4136+ """
4137+ c = _to_col_if_str (input_expr , "try_hex_decode_string" )
4138+ return builtin ("try_hex_decode_string" , _emit_ast = _emit_ast )(c )
4139+
4140+
4141+ @publicapi
4142+ def unicode (input_str : ColumnOrName , _emit_ast : bool = True ) -> Column :
4143+ """
4144+ Returns the Unicode code point of the first character in a string.
4145+
4146+ Args:
4147+ input_str (ColumnOrName): The input string column or string value to get the Unicode code point from.
4148+
4149+ Returns:
4150+ Column: The Unicode code point of the first character. Returns 0 for empty strings.
4151+
4152+ Examples::
4153+ >>> from snowflake.snowpark.functions import col
4154+ >>> df = session.create_dataframe([['a'], ['❄'], ['cde'], ['']], schema=["input_str"])
4155+ >>> df.select(unicode(col("input_str")).alias("unicode_result")).collect()
4156+ [Row(UNICODE_RESULT=97), Row(UNICODE_RESULT=10052), Row(UNICODE_RESULT=99), Row(UNICODE_RESULT=0)]
4157+ """
4158+ c = _to_col_if_str (input_str , "unicode" )
4159+ return builtin ("unicode" , _emit_ast = _emit_ast )(c )
4160+
4161+
4162+ @publicapi
4163+ def uuid_string (
4164+ uuid : ColumnOrName = None , name : ColumnOrName = None , _emit_ast : bool = True
4165+ ) -> Column :
4166+ """
4167+ Returns a universally unique identifier (UUID) as a string.
4168+
4169+ Args:
4170+ uuid (ColumnOrName, optional): The namespace UUID as a string. If provided, generates a UUID based on this namespace.
4171+ name (ColumnOrName, optional): The name to use for UUID generation. Used in combination with uuid parameter.
4172+
4173+ Returns:
4174+ Column: The UUID string.
4175+
4176+ Examples::
4177+ >>> df = session.create_dataframe([["test"]], schema=["a"])
4178+ >>> df.select(uuid_string().alias("random_uuid")).collect() # doctest: +SKIP
4179+ [Row(RANDOM_UUID='...')]
4180+
4181+ >>> df.select(uuid_string("fe971b24-9572-4005-b22f-351e9c09274d", "foo").alias("named_uuid")).collect() # doctest: +SKIP
4182+ [Row(NAMED_UUID='...')]
4183+
4184+ >>> df.select(uuid_string("fe971b24-9572-4005-b22f-351e9c09274d").alias("uuid_with_namespace")).collect() # doctest: +SKIP
4185+ [Row(UUID_WITH_NAMESPACE='...')]
4186+
4187+ >>> df.select(uuid_string(name="foo").alias("uuid_with_name")).collect() # doctest: +SKIP
4188+ [Row(UUID_WITH_NAME='...')]
4189+ """
4190+ if uuid is None and name is None :
4191+ return builtin ("uuid_string" , _emit_ast = _emit_ast )()
4192+ elif uuid is not None and name is not None :
4193+ return builtin ("uuid_string" , _emit_ast = _emit_ast )(uuid , name )
4194+ elif uuid is not None :
4195+ return builtin ("uuid_string" , _emit_ast = _emit_ast )(uuid )
4196+ else :
4197+ builtin ("uuid_string" , _emit_ast = _emit_ast )(name )
0 commit comments