1+ """Unit tests for cleaning transforms."""
2+ import pytest
3+ from airbyte_cdk .utils .transforms .cleaning import (
4+ to_lower ,
5+ strip_whitespace ,
6+ squash_whitespace ,
7+ normalize_unicode ,
8+ remove_punctuation ,
9+ map_values ,
10+ cast_numeric ,
11+ )
12+
13+ def test_to_lower ():
14+ """Test string lowercasing function."""
15+ # Test normal cases
16+ assert to_lower ("Hello" ) == "hello"
17+ assert to_lower ("HELLO" ) == "hello"
18+ assert to_lower ("HeLLo" ) == "hello"
19+
20+ # Test with spaces and special characters
21+ assert to_lower ("Hello World!" ) == "hello world!"
22+ assert to_lower ("Hello123" ) == "hello123"
23+
24+ # Test empty and None
25+ assert to_lower ("" ) == ""
26+ assert to_lower (None ) is None
27+
28+ def test_strip_whitespace ():
29+ """Test whitespace stripping function."""
30+ # Test normal cases
31+ assert strip_whitespace (" hello " ) == "hello"
32+ assert strip_whitespace ("hello" ) == "hello"
33+
34+ # Test with tabs and newlines
35+ assert strip_whitespace ("\t hello\n " ) == "hello"
36+ assert strip_whitespace (" hello\n world " ) == "hello\n world"
37+
38+ # Test empty and None
39+ assert strip_whitespace (" " ) == ""
40+ assert strip_whitespace ("" ) == ""
41+ assert strip_whitespace (None ) is None
42+
43+ def test_squash_whitespace ():
44+ """Test whitespace squashing function."""
45+ # Test normal cases
46+ assert squash_whitespace ("hello world" ) == "hello world"
47+ assert squash_whitespace (" hello world " ) == "hello world"
48+
49+ # Test with tabs and newlines
50+ assert squash_whitespace ("hello\n \n world" ) == "hello world"
51+ assert squash_whitespace ("hello\t \t world" ) == "hello world"
52+ assert squash_whitespace ("\n hello \t world \n " ) == "hello world"
53+
54+ # Test empty and None
55+ assert squash_whitespace (" " ) == ""
56+ assert squash_whitespace ("" ) == ""
57+ assert squash_whitespace (None ) is None
58+
59+ def test_normalize_unicode ():
60+ """Test unicode normalization function."""
61+ # Test normal cases
62+ assert normalize_unicode ("hello" ) == "hello"
63+
64+ # Test composed characters
65+ assert normalize_unicode ("café" ) == "café" # Composed 'é'
66+
67+ # Test decomposed characters
68+ decomposed = "cafe\u0301 " # 'e' with combining acute accent
69+ assert normalize_unicode (decomposed ) == "café" # Should normalize to composed form
70+
71+ # Test different normalization forms
72+ assert normalize_unicode ("café" , form = "NFD" ) != normalize_unicode ("café" , form = "NFC" )
73+
74+ # Test empty and None
75+ assert normalize_unicode ("" ) == ""
76+ assert normalize_unicode (None ) is None
77+
78+ def test_remove_punctuation ():
79+ """Test punctuation removal function."""
80+ # Test normal cases
81+ assert remove_punctuation ("hello, world!" ) == "hello world"
82+ assert remove_punctuation ("hello.world" ) == "helloworld"
83+
84+ # Test with multiple punctuation marks
85+ assert remove_punctuation ("hello!!! world???" ) == "hello world"
86+ assert remove_punctuation ("hello@#$%world" ) == "helloworld"
87+
88+ # Test with unicode punctuation
89+ assert remove_punctuation ("hello—world" ) == "helloworld"
90+ assert remove_punctuation ("«hello»" ) == "hello"
91+
92+ # Test empty and None
93+ assert remove_punctuation ("" ) == ""
94+ assert remove_punctuation (None ) is None
95+
96+ def test_map_values ():
97+ """Test value mapping function."""
98+ mapping = {"a" : 1 , "b" : 2 , "c" : 3 }
99+
100+ # Test normal cases
101+ assert map_values ("a" , mapping ) == 1
102+ assert map_values ("b" , mapping ) == 2
103+
104+ # Test with default value
105+ assert map_values ("x" , mapping ) is None
106+ assert map_values ("x" , mapping , default = 0 ) == 0
107+
108+ # Test with different value types
109+ mixed_mapping = {1 : "one" , "two" : 2 , None : "null" }
110+ assert map_values (1 , mixed_mapping ) == "one"
111+ assert map_values (None , mixed_mapping ) == "null"
112+
113+ def test_cast_numeric ():
114+ """Test numeric casting function."""
115+ # Test successful casts
116+ assert cast_numeric ("123" ) == 123
117+ assert cast_numeric ("123.45" ) == 123.45
118+ assert cast_numeric (123 ) == 123
119+ assert cast_numeric (123.45 ) == 123.45
120+
121+ # Test integers vs floats
122+ assert isinstance (cast_numeric ("123" ), int )
123+ assert isinstance (cast_numeric ("123.45" ), float )
124+
125+ # Test empty values
126+ assert cast_numeric (None ) is None
127+ assert cast_numeric ("" , on_error = "none" ) is None # Need to specify on_error="none" to get None for empty string
128+ assert cast_numeric (" " , on_error = "none" ) is None # Need to specify on_error="none" to get None for whitespace
129+
130+ # Test empty values with default behavior (on_error="ignore")
131+ assert cast_numeric ("" ) == ""
132+ assert cast_numeric (" " ) == " "
133+
134+ # Test error handling modes
135+ non_numeric = "abc"
136+ assert cast_numeric (non_numeric , on_error = "ignore" ) == non_numeric
137+ assert cast_numeric (non_numeric , on_error = "none" ) is None
138+ assert cast_numeric (non_numeric , on_error = "default" , default = 0 ) == 0
139+
140+ # Test error raising
141+ with pytest .raises (Exception ):
142+ cast_numeric (non_numeric , on_error = "raise" )
0 commit comments