Skip to content

Commit fc4b33e

Browse files
committed
Add tests on arrow
1 parent 3ac2b09 commit fc4b33e

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

tests/test_query_py.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
#!python3
22

33
import io
4+
import json
45
import random
56
import unittest
67
import numpy as np
78
import pandas as pd
89
import pyarrow as pa
910
from pyarrow import csv
11+
import pyarrow.json
12+
import pyarrow.parquet
1013
import chdb
1114

1215

@@ -42,6 +45,11 @@
4245
639860,win,1989-06-30
4346
"""
4447

48+
ARROW_DATA_JSONL = """{"match_id": 3943077, "match_date": "2024-07-15", "kick_off": "04:15:00.000", "competition": {"competition_id": 223, "country_name": "South America", "competition_name": "Copa America"}, "season": {"season_id": 282, "season_name": "2024"}, "home_team": {"home_team_id": 779, "home_team_name": "Argentina", "home_team_gender": "male", "home_team_group": null, "country": {"id": 11, "name": "Argentina"}, "managers": [{"id": 5677, "name": "Lionel Sebasti\u00e1n Scaloni", "nickname": null, "dob": "1978-05-16", "country": {"id": 11, "name": "Argentina"}}]}, "away_team": {"away_team_id": 769, "away_team_name": "Colombia", "away_team_gender": "male", "away_team_group": null, "country": {"id": 49, "name": "Colombia"}, "managers": [{"id": 5905, "name": "N\u00e9stor Gabriel Lorenzo", "nickname": null, "dob": "1966-02-28", "country": {"id": 11, "name": "Argentina"}}]}, "home_score": 1, "away_score": 0, "match_status": "available", "match_status_360": "unscheduled", "last_updated": "2024-07-15T15:50:08.671355", "last_updated_360": null, "metadata": {"data_version": "1.1.0", "shot_fidelity_version": "2", "xy_fidelity_version": "2"}, "match_week": 6, "competition_stage": {"id": 26, "name": "Final"}, "stadium": {"id": 5337, "name": "Hard Rock Stadium", "country": {"id": 241, "name": "United States of America"}}, "referee": {"id": 2638, "name": "Raphael Claus", "country": {"id": 31, "name": "Brazil"}}}
49+
{"match_id": 3943076, "match_date": "2024-07-14", "kick_off": "03:00:00.000", "competition": {"competition_id": 223, "country_name": "South America", "competition_name": "Copa America"}, "season": {"season_id": 282, "season_name": "2024"}, "home_team": {"home_team_id": 1833, "home_team_name": "Canada", "home_team_gender": "male", "home_team_group": null, "country": {"id": 40, "name": "Canada"}, "managers": [{"id": 165, "name": "Jesse Marsch", "nickname": null, "dob": "1973-11-08", "country": {"id": 241, "name": "United States of America"}}]}, "away_team": {"away_team_id": 783, "away_team_name": "Uruguay", "away_team_gender": "male", "away_team_group": null, "country": {"id": 242, "name": "Uruguay"}, "managers": [{"id": 269, "name": "Marcelo Alberto Bielsa Caldera", "nickname": "Marcelo Bielsa", "dob": "1955-07-21", "country": {"id": 11, "name": "Argentina"}}]}, "home_score": 2, "away_score": 2, "match_status": "available", "match_status_360": "unscheduled", "last_updated": "2024-07-15T07:57:02.660641", "last_updated_360": null, "metadata": {"data_version": "1.1.0", "shot_fidelity_version": "2", "xy_fidelity_version": "2"}, "match_week": 6, "competition_stage": {"id": 25, "name": "3rd Place Final"}, "stadium": {"id": 52985, "name": "Bank of America Stadium", "country": {"id": 241, "name": "United States of America"}}, "referee": {"id": 1849, "name": "Alexis Herrera", "country": {"id": 246, "name": "Venezuela\u00a0(Bolivarian Republic)"}}}
50+
"""
51+
52+
4553
class myReader(chdb.PyReader):
4654
def __init__(self, data):
4755
self.data = data
@@ -58,6 +66,7 @@ def read(self, col_names, count):
5866

5967

6068
class TestQueryPy(unittest.TestCase):
69+
6170
# def test_query_np(self):
6271
# t3 = {
6372
# "a": np.array([1, 2, 3, 4, 5, 6]),
@@ -135,6 +144,72 @@ def test_query_arrow3(self):
135144
"5872873,587287.3,553446.5,470878.25,3,0,7,10\n",
136145
)
137146

147+
def test_query_arrow4(self):
148+
arrow_table = pa.json.read_json(io.BytesIO(ARROW_DATA_JSONL.encode()))
149+
# print(arrow_table.schema)
150+
ret = chdb.query("SELECT * FROM Python(arrow_table) LIMIT 10", "JSONEachRow")
151+
# print(ret)
152+
self.assertEqual("", ret.error_message())
153+
154+
def test_query_arrow5(self):
155+
arrow_table = pa.parquet.read_table(
156+
"data/sample_2021-04-01_performance_mobile_tiles.parquet"
157+
)
158+
# print("Arrow Schema:\n", arrow_table.schema)
159+
ret = chdb.query("SELECT * FROM Python(arrow_table) LIMIT 1", "JSONCompact")
160+
# print("JSON:\n", ret)
161+
schema = json.loads(str(ret)).get("meta")
162+
# shema is array like:
163+
# [{"name":"quadkey","type":"String"},{"name":"tile","type":"String"}]
164+
schema_dict = {x["name"]: x["type"] for x in schema}
165+
self.assertDictEqual(
166+
schema_dict,
167+
{
168+
"quadkey": "String",
169+
"tile": "String",
170+
"tile_x": "Float64",
171+
"tile_y": "Float64",
172+
"avg_d_kbps": "Int64",
173+
"avg_u_kbps": "Int64",
174+
"avg_lat_ms": "Int64",
175+
"avg_lat_down_ms": "Float64",
176+
"avg_lat_up_ms": "Float64",
177+
"tests": "Int64",
178+
"devices": "Int64",
179+
},
180+
)
181+
ret = chdb.query(
182+
"""
183+
WITH numericColumns AS (
184+
SELECT * EXCEPT ('tile.*') EXCEPT(quadkey)
185+
FROM Python(arrow_table)
186+
)
187+
SELECT * APPLY(max), * APPLY(median) APPLY(x -> round(x, 2))
188+
FROM numericColumns
189+
""",
190+
"JSONCompact",
191+
)
192+
# print("JSONCompact:\n", ret)
193+
self.assertDictEqual(
194+
{x["name"]: x["type"] for x in json.loads(str(ret)).get("meta")},
195+
{
196+
"max(avg_d_kbps)": "Int64",
197+
"max(avg_lat_down_ms)": "Float64",
198+
"max(avg_lat_ms)": "Int64",
199+
"max(avg_lat_up_ms)": "Float64",
200+
"max(avg_u_kbps)": "Int64",
201+
"max(devices)": "Int64",
202+
"max(tests)": "Int64",
203+
"round(median(avg_d_kbps), 2)": "Float64",
204+
"round(median(avg_lat_down_ms), 2)": "Float64",
205+
"round(median(avg_lat_ms), 2)": "Float64",
206+
"round(median(avg_lat_up_ms), 2)": "Float64",
207+
"round(median(avg_u_kbps), 2)": "Float64",
208+
"round(median(devices), 2)": "Float64",
209+
"round(median(tests), 2)": "Float64",
210+
},
211+
)
212+
138213
def test_random_float(self):
139214
x = {"col1": [random.uniform(0, 1) for _ in range(0, 100000)]}
140215
ret = chdb.sql(

0 commit comments

Comments
 (0)