Skip to content

Commit bc3e2b6

Browse files
Copilotjoocer
andcommitted
Add comprehensive tests for Cython JSONL decoder
Co-authored-by: joocer <[email protected]>
1 parent b88bfbc commit bc3e2b6

File tree

1 file changed

+157
-0
lines changed

1 file changed

+157
-0
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
"""
2+
Test Cython-based fast JSONL decoder
3+
4+
This test can only run after the Cython extension is built.
5+
Run with: python -m pytest tests/unit/utils/test_cython_jsonl_decoder.py
6+
"""
7+
8+
import os
9+
import sys
10+
11+
sys.path.insert(1, os.path.join(sys.path[0], "../../.."))
12+
13+
import pytest
14+
15+
16+
def test_cython_jsonl_decoder_import():
17+
"""Test that the Cython decoder can be imported"""
18+
try:
19+
from opteryx.compiled.structures import jsonl_decoder
20+
assert hasattr(jsonl_decoder, 'fast_jsonl_decode_columnar')
21+
except ImportError:
22+
pytest.skip("Cython extension not built")
23+
24+
25+
def test_cython_jsonl_decoder_basic():
26+
"""Test basic functionality of Cython JSONL decoder"""
27+
try:
28+
from opteryx.compiled.structures import jsonl_decoder
29+
except ImportError:
30+
pytest.skip("Cython extension not built")
31+
32+
# Create simple JSONL data
33+
data = b'''\
34+
{"id": 1, "name": "Alice", "active": true, "score": 95.5}
35+
{"id": 2, "name": "Bob", "active": false, "score": 87.3}
36+
{"id": 3, "name": "Charlie", "active": true, "score": 92.1}
37+
'''
38+
39+
column_names = ['id', 'name', 'active', 'score']
40+
column_types = {
41+
'id': 'int',
42+
'name': 'str',
43+
'active': 'bool',
44+
'score': 'float'
45+
}
46+
47+
num_rows, num_cols, column_data = jsonl_decoder.fast_jsonl_decode_columnar(
48+
data, column_names, column_types
49+
)
50+
51+
assert num_rows == 3
52+
assert num_cols == 4
53+
assert column_data['id'] == [1, 2, 3]
54+
assert column_data['name'] == ['Alice', 'Bob', 'Charlie']
55+
assert column_data['active'] == [True, False, True]
56+
# Floats might have minor precision differences
57+
assert abs(column_data['score'][0] - 95.5) < 0.01
58+
assert abs(column_data['score'][1] - 87.3) < 0.01
59+
assert abs(column_data['score'][2] - 92.1) < 0.01
60+
61+
62+
def test_cython_jsonl_decoder_with_nulls():
63+
"""Test Cython JSONL decoder with null values"""
64+
try:
65+
from opteryx.compiled.structures import jsonl_decoder
66+
except ImportError:
67+
pytest.skip("Cython extension not built")
68+
69+
data = b'''\
70+
{"id": 1, "name": "Alice", "city": "NYC"}
71+
{"id": 2, "name": "Bob", "city": null}
72+
{"id": 3, "name": null, "city": "LA"}
73+
'''
74+
75+
column_names = ['id', 'name', 'city']
76+
column_types = {
77+
'id': 'int',
78+
'name': 'str',
79+
'city': 'str'
80+
}
81+
82+
num_rows, num_cols, column_data = jsonl_decoder.fast_jsonl_decode_columnar(
83+
data, column_names, column_types
84+
)
85+
86+
assert num_rows == 3
87+
assert column_data['name'][0] == "Alice"
88+
assert column_data['name'][1] == "Bob"
89+
assert column_data['name'][2] is None
90+
assert column_data['city'][0] == "NYC"
91+
assert column_data['city'][1] is None
92+
assert column_data['city'][2] == "LA"
93+
94+
95+
def test_cython_jsonl_decoder_negative_numbers():
96+
"""Test Cython JSONL decoder with negative numbers"""
97+
try:
98+
from opteryx.compiled.structures import jsonl_decoder
99+
except ImportError:
100+
pytest.skip("Cython extension not built")
101+
102+
data = b'''\
103+
{"id": -1, "balance": -123.45}
104+
{"id": -2, "balance": -0.99}
105+
{"id": 3, "balance": 100.00}
106+
'''
107+
108+
column_names = ['id', 'balance']
109+
column_types = {
110+
'id': 'int',
111+
'balance': 'float'
112+
}
113+
114+
num_rows, num_cols, column_data = jsonl_decoder.fast_jsonl_decode_columnar(
115+
data, column_names, column_types
116+
)
117+
118+
assert num_rows == 3
119+
assert column_data['id'] == [-1, -2, 3]
120+
assert abs(column_data['balance'][0] - (-123.45)) < 0.01
121+
122+
123+
def test_jsonl_decoder_integration():
124+
"""Test integration with file_decoders.jsonl_decoder"""
125+
from opteryx.utils.file_decoders import jsonl_decoder
126+
127+
data = b'''\
128+
{"id": 1, "name": "Alice", "active": true}
129+
{"id": 2, "name": "Bob", "active": false}
130+
{"id": 3, "name": "Charlie", "active": true}
131+
'''
132+
133+
# This should work with or without the Cython extension
134+
num_rows, num_cols, _, table = jsonl_decoder(data, use_fast_decoder=True)
135+
136+
assert num_rows == 3
137+
assert table.num_rows == 3
138+
assert set(table.column_names) == {"id", "name", "active"}
139+
140+
141+
def test_jsonl_decoder_fallback():
142+
"""Test that decoder falls back gracefully when Cython unavailable"""
143+
from opteryx.utils.file_decoders import jsonl_decoder
144+
145+
# Small data that won't trigger fast decoder
146+
data = b'{"id": 1, "name": "Alice"}\n'
147+
148+
num_rows, num_cols, _, table = jsonl_decoder(data)
149+
150+
assert num_rows == 1
151+
assert table.num_rows == 1
152+
153+
154+
if __name__ == "__main__": # pragma: no cover
155+
from tests import run_tests
156+
157+
run_tests()

0 commit comments

Comments
 (0)