@@ -4230,3 +4230,232 @@ def soundex_p123(varchar_expr: ColumnOrName, _emit_ast: bool = True) -> Column:
42304230 """
42314231 c = _to_col_if_str (varchar_expr , "soundex_p123" )
42324232 return builtin ("soundex_p123" , _emit_ast = _emit_ast )(c )
4233+
4234+
4235+ @publicapi
4236+ def strtok (
4237+ string : ColumnOrName ,
4238+ delimiter : ColumnOrName = None ,
4239+ part_nr : ColumnOrName = None ,
4240+ _emit_ast : bool = True ,
4241+ ) -> Column :
4242+ """
4243+ Tokenizes a string with the given set of delimiters and returns the requested part.
4244+
4245+ Args:
4246+ string (ColumnOrName): The string to be tokenized.
4247+ delimiter (ColumnOrName, optional): A set of delimiters. Each character in the delimiter string is treated as a delimiter. If not specified, defaults to a single space character.
4248+ part_nr (ColumnOrName, optional): The requested part number (1-based). If not specified, returns the entire string.
4249+
4250+ Returns:
4251+ Column: The requested part of the tokenized string.
4252+
4253+ Examples::
4254+ >>> from snowflake.snowpark.functions import col, lit
4255+ >>> df = session.create_dataframe([["a.b.c"]], schema=["string_col"])
4256+ >>> df.select(strtok(col("string_col")).alias("result")).collect()
4257+ [Row(RESULT='a.b.c')]
4258+ >>> df.select(strtok(col("string_col"), lit(".")).alias("result")).collect()
4259+ [Row(RESULT='a')]
4260+ >>> df.select(strtok(col("string_col"), lit("."), lit(2)).alias("result")).collect()
4261+ [Row(RESULT='b')]
4262+ >>> df2 = session.create_dataframe([["[email protected] "]], schema=["string_col"]) 4263+ >>> df2.select(strtok(col("string_col"), lit("@."), lit(1)).alias("result")).collect()
4264+ [Row(RESULT='user')]
4265+ >>> df2.select(strtok(col("string_col"), lit("@."), lit(3)).alias("result")).collect()
4266+ [Row(RESULT='com')]
4267+ """
4268+ string_col = _to_col_if_str (string , "strtok" )
4269+
4270+ if delimiter is None and part_nr is None :
4271+ return builtin ("strtok" , _emit_ast = _emit_ast )(string_col )
4272+ elif delimiter is not None and part_nr is None :
4273+ delimiter_col = _to_col_if_str (delimiter , "strtok" )
4274+ return builtin ("strtok" , _emit_ast = _emit_ast )(string_col , delimiter_col )
4275+ else :
4276+ delimiter_col = (
4277+ _to_col_if_str (delimiter , "strtok" ) if delimiter is not None else lit (" " )
4278+ )
4279+ part_nr_col = _to_col_if_str (part_nr , "strtok" )
4280+ return builtin ("strtok" , _emit_ast = _emit_ast )(
4281+ string_col , delimiter_col , part_nr_col
4282+ )
4283+
4284+
4285+ @publicapi
4286+ def try_base64_decode_binary (
4287+ input_expr : ColumnOrName , alphabet : ColumnOrName = None , _emit_ast : bool = True
4288+ ) -> Column :
4289+ """
4290+ Decodes a base64-encoded string to binary data. Returns None if the input is not valid base64.
4291+
4292+ Args:
4293+ input_expr (ColumnOrName): The base64-encoded string to decode.
4294+ alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
4295+
4296+ Returns:
4297+ Column: A column containing the decoded binary data, or None if the input is invalid.
4298+
4299+ Examples::
4300+ >>> from snowflake.snowpark.functions import base64_encode
4301+ >>> df = session.create_dataframe(["HELP", "TEST"], schema=["input"])
4302+ >>> df.select(try_base64_decode_binary(base64_encode(df["input"])).alias("result")).collect()
4303+ [Row(RESULT=bytearray(b'HELP')), Row(RESULT=bytearray(b'TEST'))]
4304+
4305+ >>> df2 = session.create_dataframe(["SEVMUA==", "VEVTVA=="], schema=["encoded"])
4306+ >>> df2.select(try_base64_decode_binary(df2["encoded"]).alias("result")).collect()
4307+ [Row(RESULT=bytearray(b'HELP')), Row(RESULT=bytearray(b'TEST'))]
4308+
4309+ >>> df3 = session.create_dataframe(["invalid_base64!"], schema=["bad_input"])
4310+ >>> df3.select(try_base64_decode_binary(df3["bad_input"]).alias("result")).collect()
4311+ [Row(RESULT=None)]
4312+
4313+ >>> df4 = session.create_dataframe(["SEVMUA=="], schema=["encoded"])
4314+ >>> df4.select(try_base64_decode_binary(df4["encoded"], lit("$")).alias("result")).collect()
4315+ [Row(RESULT=bytearray(b'HELP'))]
4316+ """
4317+ input_col = _to_col_if_str (input_expr , "try_base64_decode_binary" )
4318+
4319+ if alphabet is not None :
4320+ alphabet_col = _to_col_if_str (alphabet , "try_base64_decode_binary" )
4321+ return builtin ("try_base64_decode_binary" , _emit_ast = _emit_ast )(
4322+ input_col , alphabet_col
4323+ )
4324+ else :
4325+ return builtin ("try_base64_decode_binary" , _emit_ast = _emit_ast )(input_col )
4326+
4327+
4328+ @publicapi
4329+ def try_base64_decode_string (
4330+ input_expr : ColumnOrName , alphabet : ColumnOrName = None , _emit_ast : bool = True
4331+ ) -> Column :
4332+ """
4333+ Decodes a base64-encoded string and returns the result. If the input is not a valid base64-encoded string, returns NULL instead of raising an error.
4334+
4335+ Args:
4336+ input_expr (ColumnOrName): A base64-encoded string to decode.
4337+ alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
4338+
4339+ Returns:
4340+ Column: The decoded string, or NULL if the input is not valid base64.
4341+
4342+ Examples::
4343+ >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
4344+ >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
4345+ [Row(RESULT='HELLO')]
4346+
4347+ >>> df = session.create_dataframe([["invalid_base64"]], schema=["encoded"])
4348+ >>> df.select(try_base64_decode_string(df["encoded"]).alias('result')).collect()
4349+ [Row(RESULT=None)]
4350+
4351+ >>> df = session.create_dataframe([["SEVMTE8="]], schema=["encoded"])
4352+ >>> df.select(try_base64_decode_string(df["encoded"], lit('$')).alias('result')).collect()
4353+ [Row(RESULT='HELLO')]
4354+ """
4355+ c = _to_col_if_str (input_expr , "try_base64_decode_string" )
4356+ if alphabet is not None :
4357+ alphabet_col = _to_col_if_str (alphabet , "try_base64_decode_string" )
4358+ return builtin ("try_base64_decode_string" , _emit_ast = _emit_ast )(c , alphabet_col )
4359+ else :
4360+ return builtin ("try_base64_decode_string" , _emit_ast = _emit_ast )(c )
4361+
4362+
4363+ @publicapi
4364+ def try_hex_decode_binary (input_expr : ColumnOrName , _emit_ast : bool = True ) -> Column :
4365+ """
4366+ Decodes a hex-encoded string to binary data. Returns None if the input is not a valid hex string.
4367+
4368+ Args:
4369+ input_expr (ColumnOrName): A hex-encoded string to decode to binary data.
4370+
4371+ Returns:
4372+ Column: The decoded binary data as bytearray, or None if input is invalid.
4373+
4374+ Examples::
4375+ >>> from snowflake.snowpark.functions import col
4376+ >>> df = session.create_dataframe([["41426162"], ["48656C6C6F"], ["576F726C64"]], schema=["hex_string"])
4377+ >>> df.select(try_hex_decode_binary(col("hex_string")).alias("decoded_binary")).collect()
4378+ [Row(DECODED_BINARY=bytearray(b'ABab')), Row(DECODED_BINARY=bytearray(b'Hello')), Row(DECODED_BINARY=bytearray(b'World'))]
4379+ """
4380+ c = _to_col_if_str (input_expr , "try_hex_decode_binary" )
4381+ return builtin ("try_hex_decode_binary" , _emit_ast = _emit_ast )(c )
4382+
4383+
4384+ @publicapi
4385+ def try_hex_decode_string (input_expr : ColumnOrName , _emit_ast : bool = True ) -> Column :
4386+ """
4387+ Decodes a hex-encoded string to its original string value. Returns None if the input is not a valid hex string.
4388+
4389+ Args:
4390+ input_expr (ColumnOrName): The hex-encoded string to decode.
4391+
4392+ Returns:
4393+ Column: The decoded string, or None if the input is not valid hex.
4394+
4395+ Examples::
4396+ >>> df = session.create_dataframe([["41614262"], ["127"], ["invalid_hex"]], schema=["hex_input"])
4397+ >>> df.select(try_hex_decode_string(df["hex_input"]).alias("decoded")).collect()
4398+ [Row(DECODED='AaBb'), Row(DECODED=None), Row(DECODED=None)]
4399+ """
4400+ c = _to_col_if_str (input_expr , "try_hex_decode_string" )
4401+ return builtin ("try_hex_decode_string" , _emit_ast = _emit_ast )(c )
4402+
4403+
4404+ @publicapi
4405+ def unicode (input_str : ColumnOrName , _emit_ast : bool = True ) -> Column :
4406+ """
4407+ Returns the Unicode code point of the first character in a string.
4408+
4409+ Args:
4410+ input_str (ColumnOrName): The input string column or string value to get the Unicode code point from.
4411+
4412+ Returns:
4413+ Column: The Unicode code point of the first character. Returns 0 for empty strings.
4414+
4415+ Examples::
4416+ >>> from snowflake.snowpark.functions import col
4417+ >>> df = session.create_dataframe([['a'], ['❄'], ['cde'], ['']], schema=["input_str"])
4418+ >>> df.select(unicode(col("input_str")).alias("unicode_result")).collect()
4419+ [Row(UNICODE_RESULT=97), Row(UNICODE_RESULT=10052), Row(UNICODE_RESULT=99), Row(UNICODE_RESULT=0)]
4420+ """
4421+ c = _to_col_if_str (input_str , "unicode" )
4422+ return builtin ("unicode" , _emit_ast = _emit_ast )(c )
4423+
4424+
4425+ @publicapi
4426+ def uuid_string (
4427+ uuid : ColumnOrName = None , name : ColumnOrName = None , _emit_ast : bool = True
4428+ ) -> Column :
4429+ """
4430+ Returns a universally unique identifier (UUID) as a string. If the uuid is provided, also the name must be provided.
4431+
4432+ Args:
4433+ uuid (ColumnOrName, optional): The namespace UUID as a string. If provided, generates a UUID based on this namespace.
4434+ name (ColumnOrName, optional): The name to use for UUID generation. Used in combination with uuid parameter.
4435+
4436+ Returns:
4437+ Column: The UUID string.
4438+
4439+ Examples::
4440+ >>> from snowflake.snowpark.functions import col
4441+
4442+ >>> df = session.create_dataframe(
4443+ ... [["fe971b24-9572-4005-b22f-351e9c09274d", "foo"]], schema=["uuid", "name"]
4444+ ... )
4445+
4446+ >>> df.select(uuid_string().alias("random_uuid")).collect()
4447+ [Row(RANDOM_UUID='...')]
4448+
4449+ >>> result = df.select(
4450+ ... uuid_string(col("uuid"), col("name")).alias("NAMED_UUID")
4451+ ... ).collect()
4452+ >>> expected_uuid = "dc0b6f65-fca6-5b4b-9d37-ccc3fde1f3e2"
4453+ >>> result[0]["NAMED_UUID"] == expected_uuid
4454+ True
4455+ """
4456+ if uuid is None and name is None :
4457+ return builtin ("uuid_string" , _emit_ast = _emit_ast )()
4458+ else :
4459+ uuid_col = _to_col_if_str (uuid , "uuid_string" )
4460+ name_col = _to_col_if_str (name , "uuid_string" )
4461+ return builtin ("uuid_string" , _emit_ast = _emit_ast )(uuid_col , name_col )
0 commit comments