@@ -3973,6 +3973,265 @@ def try_to_geometry(
39733973 return builtin ("try_to_geometry" , _emit_ast = _emit_ast )(c )
39743974
39753975
3976+ @publicapi
3977+ def base64_decode_binary (
3978+ input_expr : ColumnOrName ,
3979+ alphabet : Optional [ColumnOrName ] = None ,
3980+ _emit_ast : bool = True ,
3981+ ) -> Column :
3982+ """
3983+ Decodes a base64-encoded string and returns the result as a binary value.
3984+
3985+ Args:
3986+ input_expr (ColumnOrName): A base64-encoded string to decode.
3987+ alphabet (ColumnOrName, optional): The base64 alphabet to use for decoding. If not specified, uses the standard base64 alphabet.
3988+
3989+ Returns:
3990+ Column: A binary value containing the decoded result.
3991+
3992+ Examples::
3993+ >>> from snowflake.snowpark.functions import col, lit
3994+ >>> df = session.create_dataframe(["SEVMUA=="], schema=["input"])
3995+ >>> df.select(base64_decode_binary(col("input")).alias("result")).collect()
3996+ [Row(RESULT=bytearray(b'HELP'))]
3997+
3998+ >>> df.select(base64_decode_binary(col('input'), lit('$')).alias('result')).collect()
3999+ [Row(RESULT=bytearray(b'HELP'))]
4000+ """
4001+ from snowflake .snowpark .functions import builtin
4002+
4003+ input_col = _to_col_if_str (input_expr , "base64_decode_binary" )
4004+
4005+ if alphabet is not None :
4006+ alphabet_col = _to_col_if_str (alphabet , "base64_decode_binary" )
4007+ return builtin ("base64_decode_binary" , _emit_ast = _emit_ast )(
4008+ input_col , alphabet_col
4009+ )
4010+ else :
4011+ return builtin ("base64_decode_binary" , _emit_ast = _emit_ast )(input_col )
4012+
4013+
4014+ @publicapi
4015+ def compress (
4016+ input_val : ColumnOrName , method : ColumnOrName , _emit_ast : bool = True
4017+ ) -> Column :
4018+ """
4019+ Compresses the input string using the specified compression method.
4020+
4021+ Args:
4022+ input_val (ColumnOrName): The input string to be compressed.
4023+ method (ColumnOrName): The compression method (e.g., "SNAPPY").
4024+
4025+ Returns:
4026+ Column: The compressed binary data.
4027+
4028+ Example::
4029+ >>> df = session.create_dataframe([['Snowflake'], ['Hello World']], schema=["input"])
4030+ >>> df.select(compress(df["input"], lit("SNAPPY")).alias("compressed")).collect()
4031+ [Row(COMPRESSED=bytearray(b'\\ t Snowflake')), Row(COMPRESSED=bytearray(b'\\ x0b(Hello World'))]
4032+ """
4033+ input_col = _to_col_if_str (input_val , "compress" )
4034+ method_col = _to_col_if_str (method , "compress" )
4035+ return builtin ("compress" , _emit_ast = _emit_ast )(input_col , method_col )
4036+
4037+
4038+ @publicapi
4039+ def decompress_binary (
4040+ input_data : ColumnOrName , method : ColumnOrName , _emit_ast : bool = True
4041+ ) -> Column :
4042+ """
4043+ Decompresses binary data using the specified compression method.
4044+
4045+ Args:
4046+ input_data (ColumnOrName): The binary data to decompress.
4047+ method (ColumnOrName): The compression method used to decompress the data.
4048+
4049+ Returns:
4050+ Column: The decompressed binary data.
4051+
4052+ Examples::
4053+ >>> from snowflake.snowpark.functions import lit
4054+ >>> from snowflake.snowpark.functions import to_binary, lit
4055+ >>> df = session.create_dataframe([['0920536E6F77666C616B65']], schema=["compressed_hex"])
4056+ >>> df.select(decompress_binary(to_binary(df["compressed_hex"]), lit("SNAPPY")).alias("decompressed")).collect()
4057+ [Row(DECOMPRESSED=bytearray(b'Snowflake'))]
4058+ """
4059+ input_col = _to_col_if_str (input_data , "decompress_binary" )
4060+ method_col = _to_col_if_str (method , "decompress_binary" )
4061+ return builtin ("decompress_binary" , _emit_ast = _emit_ast )(input_col , method_col )
4062+
4063+
4064+ @publicapi
4065+ def decompress_string (
4066+ input_data : ColumnOrName , method : ColumnOrName , _emit_ast : bool = True
4067+ ) -> Column :
4068+ """
4069+ Decompresses a BINARY value using the specified compression method and returns the result as a string.
4070+
4071+ Args:
4072+ input_data (ColumnOrName): The compressed binary data to decompress.
4073+ method (ColumnOrName): The compression method used. Supported methods include 'SNAPPY', 'GZIP', etc.
4074+
4075+ Returns:
4076+ Column: The decompressed string.
4077+
4078+ Example::
4079+
4080+ >>> from snowflake.snowpark.functions import to_binary
4081+ >>> df = session.create_dataframe([['0920536E6F77666C616B65', 'SNAPPY']], schema=["compressed_hex", "method"])
4082+ >>> df.select(decompress_string(to_binary(df["compressed_hex"], 'HEX'), df["method"]).alias("decompressed")).collect()
4083+ [Row(DECOMPRESSED='Snowflake')]
4084+ """
4085+ input_col = _to_col_if_str (input_data , "decompress_string" )
4086+ method_col = _to_col_if_str (method , "decompress_string" )
4087+ return builtin ("decompress_string" , _emit_ast = _emit_ast )(input_col , method_col )
4088+
4089+
4090+ @publicapi
4091+ def md5_binary (msg : ColumnOrName , _emit_ast : bool = True ) -> Column :
4092+ """
4093+ Returns the MD5 hash of the input message as a binary value.
4094+
4095+ Args:
4096+ msg (ColumnOrName): The input message to compute the MD5 hash for.
4097+
4098+ Returns:
4099+ Column: The MD5 hash as a binary value (bytearray).
4100+
4101+ Examples::
4102+ >>> from snowflake.snowpark import Row
4103+ >>> from snowflake.snowpark.functions import col
4104+ >>> df = session.create_dataframe([["Snowflake"], ["test"], [""]], schema=["msg"])
4105+ >>> result = df.select(md5_binary(col("msg")).alias("md5_result")).collect()
4106+
4107+ >>> expected = [
4108+ ... Row(MD5_RESULT=bytearray(b'\\ xed\\ xf1C\\ x90u\\ xa8:D\\ x7f\\ xb8\\ xb60\\ xdd\\ xc9\\ xc8\\ xde')), # "Snowflake"
4109+ ... Row(MD5_RESULT=bytearray(b"\\ t\\ x8fk\\ xcdF!\\ xd3s\\ xca\\ xdeN\\ x83&'\\ xb4\\ xf6")), # "test"
4110+ ... Row(MD5_RESULT=bytearray(b'\\ xd4\\ x1d\\ x8c\\ xd9\\ x8f\\ x00\\ xb2\\ x04\\ xe9\\ x80\\ t\\ x98\\ xec\\ xf8B~')) # "" (empty)
4111+ ... ]
4112+
4113+ >>> assert result == expected
4114+ """
4115+ c = _to_col_if_str (msg , "md5_binary" )
4116+ return builtin ("md5_binary" , _emit_ast = _emit_ast )(c )
4117+
4118+
4119+ @publicapi
4120+ def md5_number_lower64 (msg : ColumnOrName , _emit_ast : bool = True ) -> Column :
4121+ """
4122+ Returns a 64-bit number from the lower 64 bits of the MD5 hash of the input message.
4123+
4124+ Args:
4125+ msg (ColumnOrName): The input message to hash.
4126+
4127+ Returns:
4128+ Column: A 64-bit number representing the lower 64 bits of the MD5 hash.
4129+
4130+ Examples::
4131+ >>> from snowflake.snowpark.functions import col
4132+ >>> df = session.create_dataframe([["Snowflake"], ["test"], ["hello"]], schema=["msg"])
4133+ >>> df.select(md5_number_lower64(col("msg")).alias("result")).collect()
4134+ [Row(RESULT=9203306159527282910), Row(RESULT=14618207765679027446), Row(RESULT=13362634815750784402)]
4135+ """
4136+ c = _to_col_if_str (msg , "md5_number_lower64" )
4137+ return builtin ("md5_number_lower64" , _emit_ast = _emit_ast )(c )
4138+
4139+
4140+ @publicapi
4141+ def md5_number_upper64 (msg : ColumnOrName , _emit_ast : bool = True ) -> Column :
4142+ """
4143+ Returns the upper 64 bits of the MD5 hash of the input message as a number.
4144+
4145+ Args:
4146+ msg (ColumnOrName): The input message to hash.
4147+
4148+ Returns:
4149+ Column: A column containing the upper 64 bits of the MD5 hash as a number.
4150+
4151+ Examples::
4152+ >>> from snowflake.snowpark.functions import col
4153+ >>> df = session.create_dataframe([["Snowflake"], ["test"], ["hello"]], schema=["msg"])
4154+ >>> df.select(md5_number_upper64(col("msg")).alias("result")).collect()
4155+ [Row(RESULT=17145559544104499780), Row(RESULT=688887797400064883), Row(RESULT=6719722671305337462)]
4156+ """
4157+ c = _to_col_if_str (msg , "md5_number_upper64" )
4158+ return builtin ("md5_number_upper64" , _emit_ast = _emit_ast )(c )
4159+
4160+
4161+ @publicapi
4162+ def sha1_binary (msg : ColumnOrName , _emit_ast : bool = True ) -> Column :
4163+ """
4164+ Returns the SHA-1 hash of the input message as a binary value.
4165+
4166+ Args:
4167+ msg (ColumnOrName): The input message to hash.
4168+
4169+ Returns:
4170+ Column: The SHA-1 hash as a binary value.
4171+
4172+ Examples::
4173+ >>> from snowflake.snowpark.functions import col
4174+ >>> df = session.create_dataframe([["Snowflake"], ["test"], ["hello"]], schema=["msg"])
4175+ >>> df.select(sha1_binary(col("msg")).alias("sha1_result")).collect()
4176+ [Row(SHA1_RESULT=bytearray(b'\\ xfd\\ xa7k\\ x0b\\ xcc\\ x1e\\ x87\\ xcf%\\ x9b\\ x1d\\ x1e2q\\ xd7oY\\ x0f\\ xb5\\ xdd')), Row(SHA1_RESULT=bytearray(b'\\ xa9J\\ x8f\\ xe5\\ xcc\\ xb1\\ x9b\\ xa6\\ x1cL\\ x08s\\ xd3\\ x91\\ xe9\\ x87\\ x98/\\ xbb\\ xd3')), Row(SHA1_RESULT=bytearray(b'\\ xaa\\ xf4\\ xc6\\ x1d\\ xdc\\ xc5\\ xe8\\ xa2\\ xda\\ xbe\\ xde\\ x0f;H,\\ xd9\\ xae\\ xa9CM'))]
4177+ """
4178+ c = _to_col_if_str (msg , "sha1_binary" )
4179+ return builtin ("sha1_binary" , _emit_ast = _emit_ast )(c )
4180+
4181+
4182+ @publicapi
4183+ def sha2_binary (
4184+ msg : ColumnOrName , digest_size : ColumnOrName = None , _emit_ast : bool = True
4185+ ) -> Column :
4186+ """
4187+ Returns a binary SHA-2 hash of the input message. The digest size determines the hash algorithm used.
4188+
4189+ Args:
4190+ msg (ColumnOrName): The input message to hash.
4191+ digest_size (ColumnOrName, optional): The digest size in bits. Valid values are 224, 256, 384, and 512. Defaults to 256 if not specified.
4192+
4193+ Returns:
4194+ Column: A binary representation of the SHA-2 hash.
4195+
4196+ Examples::
4197+ >>> from snowflake.snowpark.functions import col, lit
4198+ >>> df = session.create_dataframe([["Snowflake"], ["test"], ["hello"]], schema=["msg"])
4199+ >>> df.select(sha2_binary(col("msg")).alias("result")).collect()
4200+ [Row(RESULT=bytearray(b'\\ x1d\\ xbdY\\ xf6a\\ xd6\\ x8b\\ x90rO!\\ x08C\\ x96\\ xb8eIqs\\ xe4\\ xd2qOM\\ x91\\ xcf\\ x05\\ xfa_\\ xc5\\ xe1\\ x8d')), Row(RESULT=bytearray(b'\\ x9f\\ x86\\ xd0\\ x81\\ x88L}e\\ x9a/\\ xea\\ xa0\\ xc5Z\\ xd0\\ x15\\ xa3\\ xbfO\\ x1b+\\ x0b\\ x82,\\ xd1]l\\ x15\\ xb0\\ xf0\\ n\\ x08')), Row(RESULT=bytearray(b',\\ xf2M\\ xba_\\ xb0\\ xa3\\ x0e&\\ xe8;*\\ xc5\\ xb9\\ xe2\\ x9e\\ x1b\\ x16\\ x1e\\ \\ \\ x1f\\ xa7B^s\\ x043b\\ x93\\ x8b\\ x98$'))]
4201+ >>> df.select(sha2_binary(col("msg"), lit(224)).alias("result")).collect()
4202+ [Row(RESULT=bytearray(b'bg\\ xd3\\ xd7\\ xa5\\ x99)\\ xe6\\ x86M\\ xd4\\ xb77\\ xd9\\ x8e>\\ xf8V\\ x9d\\ x9f\\ x88\\ xa7FfG\\ x83\\ x852')), Row(RESULT=bytearray(b'\\ x90\\ xa3\\ xed\\ x9e2\\ xb2\\ xaa\\ xf4\\ xc6\\ x1cA\\ x0e\\ xb9%Ba\\ x19\\ xe1\\ xa9\\ xdcS\\ xd4(j\\ xde\\ x99\\ xa8\\ t')), Row(RESULT=bytearray(b'\\ xea\\ t\\ xae\\ x9c\\ xc6v\\ x8cP\\ xfc\\ xee\\ x90>\\ xd0TUn[\\ xfc\\ x83G\\ x90\\ x7f\\ x12Y\\ x8a\\ xa2A\\ x93'))]
4203+ """
4204+ c = _to_col_if_str (msg , "sha2_binary" )
4205+ if digest_size is None :
4206+ return builtin ("sha2_binary" , _emit_ast = _emit_ast )(c )
4207+ else :
4208+ d = _to_col_if_str (digest_size , "sha2_binary" )
4209+ return builtin ("sha2_binary" , _emit_ast = _emit_ast )(c , d )
4210+
4211+
4212+ @publicapi
4213+ def soundex_p123 (varchar_expr : ColumnOrName , _emit_ast : bool = True ) -> Column :
4214+ """
4215+ Returns a phonetic representation of a string using the Soundex algorithm with P123 encoding.
4216+ This function converts names or words that sound similar into the same code, making it useful
4217+ for fuzzy matching and searching.
4218+
4219+ Args:
4220+ varchar_expr (ColumnOrName): The string expression to convert to Soundex P123 format.
4221+
4222+ Returns:
4223+ Column: The Soundex P123 encoded string.
4224+
4225+ Examples::
4226+ >>> from snowflake.snowpark.functions import col
4227+ >>> df = session.create_dataframe([["Pfister"], ["Lloyd"], ["Smith"], ["Johnson"]], schema=["name"])
4228+ >>> df.select(soundex_p123(col("name")).alias("soundex_result")).collect()
4229+ [Row(SOUNDEX_RESULT='P123'), Row(SOUNDEX_RESULT='L430'), Row(SOUNDEX_RESULT='S530'), Row(SOUNDEX_RESULT='J525')]
4230+ """
4231+ c = _to_col_if_str (varchar_expr , "soundex_p123" )
4232+ return builtin ("soundex_p123" , _emit_ast = _emit_ast )(c )
4233+
4234+
39764235@publicapi
39774236def booland_agg (expr : ColumnOrName , _emit_ast : bool = True ) -> Column :
39784237 """
0 commit comments