11#!python3
22
33import io
4+ import json
45import random
56import unittest
67import numpy as np
78import pandas as pd
89import pyarrow as pa
910from pyarrow import csv
11+ import pyarrow .json
12+ import pyarrow .parquet
1013import chdb
1114
1215
4245639860,win,1989-06-30
4346"""
4447
48+ ARROW_DATA_JSONL = """{"match_id": 3943077, "match_date": "2024-07-15", "kick_off": "04:15:00.000", "competition": {"competition_id": 223, "country_name": "South America", "competition_name": "Copa America"}, "season": {"season_id": 282, "season_name": "2024"}, "home_team": {"home_team_id": 779, "home_team_name": "Argentina", "home_team_gender": "male", "home_team_group": null, "country": {"id": 11, "name": "Argentina"}, "managers": [{"id": 5677, "name": "Lionel Sebasti\u00e1 n Scaloni", "nickname": null, "dob": "1978-05-16", "country": {"id": 11, "name": "Argentina"}}]}, "away_team": {"away_team_id": 769, "away_team_name": "Colombia", "away_team_gender": "male", "away_team_group": null, "country": {"id": 49, "name": "Colombia"}, "managers": [{"id": 5905, "name": "N\u00e9 stor Gabriel Lorenzo", "nickname": null, "dob": "1966-02-28", "country": {"id": 11, "name": "Argentina"}}]}, "home_score": 1, "away_score": 0, "match_status": "available", "match_status_360": "unscheduled", "last_updated": "2024-07-15T15:50:08.671355", "last_updated_360": null, "metadata": {"data_version": "1.1.0", "shot_fidelity_version": "2", "xy_fidelity_version": "2"}, "match_week": 6, "competition_stage": {"id": 26, "name": "Final"}, "stadium": {"id": 5337, "name": "Hard Rock Stadium", "country": {"id": 241, "name": "United States of America"}}, "referee": {"id": 2638, "name": "Raphael Claus", "country": {"id": 31, "name": "Brazil"}}}
49+ {"match_id": 3943076, "match_date": "2024-07-14", "kick_off": "03:00:00.000", "competition": {"competition_id": 223, "country_name": "South America", "competition_name": "Copa America"}, "season": {"season_id": 282, "season_name": "2024"}, "home_team": {"home_team_id": 1833, "home_team_name": "Canada", "home_team_gender": "male", "home_team_group": null, "country": {"id": 40, "name": "Canada"}, "managers": [{"id": 165, "name": "Jesse Marsch", "nickname": null, "dob": "1973-11-08", "country": {"id": 241, "name": "United States of America"}}]}, "away_team": {"away_team_id": 783, "away_team_name": "Uruguay", "away_team_gender": "male", "away_team_group": null, "country": {"id": 242, "name": "Uruguay"}, "managers": [{"id": 269, "name": "Marcelo Alberto Bielsa Caldera", "nickname": "Marcelo Bielsa", "dob": "1955-07-21", "country": {"id": 11, "name": "Argentina"}}]}, "home_score": 2, "away_score": 2, "match_status": "available", "match_status_360": "unscheduled", "last_updated": "2024-07-15T07:57:02.660641", "last_updated_360": null, "metadata": {"data_version": "1.1.0", "shot_fidelity_version": "2", "xy_fidelity_version": "2"}, "match_week": 6, "competition_stage": {"id": 25, "name": "3rd Place Final"}, "stadium": {"id": 52985, "name": "Bank of America Stadium", "country": {"id": 241, "name": "United States of America"}}, "referee": {"id": 1849, "name": "Alexis Herrera", "country": {"id": 246, "name": "Venezuela\u00a0 (Bolivarian Republic)"}}}
50+ """
51+
52+
4553class myReader (chdb .PyReader ):
4654 def __init__ (self , data ):
4755 self .data = data
@@ -58,6 +66,7 @@ def read(self, col_names, count):
5866
5967
6068class TestQueryPy (unittest .TestCase ):
69+
6170 # def test_query_np(self):
6271 # t3 = {
6372 # "a": np.array([1, 2, 3, 4, 5, 6]),
@@ -135,6 +144,72 @@ def test_query_arrow3(self):
135144 "5872873,587287.3,553446.5,470878.25,3,0,7,10\n " ,
136145 )
137146
147+ def test_query_arrow4 (self ):
148+ arrow_table = pa .json .read_json (io .BytesIO (ARROW_DATA_JSONL .encode ()))
149+ # print(arrow_table.schema)
150+ ret = chdb .query ("SELECT * FROM Python(arrow_table) LIMIT 10" , "JSONEachRow" )
151+ # print(ret)
152+ self .assertEqual ("" , ret .error_message ())
153+
154+ def test_query_arrow5 (self ):
155+ arrow_table = pa .parquet .read_table (
156+ "data/sample_2021-04-01_performance_mobile_tiles.parquet"
157+ )
158+ # print("Arrow Schema:\n", arrow_table.schema)
159+ ret = chdb .query ("SELECT * FROM Python(arrow_table) LIMIT 1" , "JSONCompact" )
160+ # print("JSON:\n", ret)
161+ schema = json .loads (str (ret )).get ("meta" )
162+ # shema is array like:
163+ # [{"name":"quadkey","type":"String"},{"name":"tile","type":"String"}]
164+ schema_dict = {x ["name" ]: x ["type" ] for x in schema }
165+ self .assertDictEqual (
166+ schema_dict ,
167+ {
168+ "quadkey" : "String" ,
169+ "tile" : "String" ,
170+ "tile_x" : "Float64" ,
171+ "tile_y" : "Float64" ,
172+ "avg_d_kbps" : "Int64" ,
173+ "avg_u_kbps" : "Int64" ,
174+ "avg_lat_ms" : "Int64" ,
175+ "avg_lat_down_ms" : "Float64" ,
176+ "avg_lat_up_ms" : "Float64" ,
177+ "tests" : "Int64" ,
178+ "devices" : "Int64" ,
179+ },
180+ )
181+ ret = chdb .query (
182+ """
183+ WITH numericColumns AS (
184+ SELECT * EXCEPT ('tile.*') EXCEPT(quadkey)
185+ FROM Python(arrow_table)
186+ )
187+ SELECT * APPLY(max), * APPLY(median) APPLY(x -> round(x, 2))
188+ FROM numericColumns
189+ """ ,
190+ "JSONCompact" ,
191+ )
192+ # print("JSONCompact:\n", ret)
193+ self .assertDictEqual (
194+ {x ["name" ]: x ["type" ] for x in json .loads (str (ret )).get ("meta" )},
195+ {
196+ "max(avg_d_kbps)" : "Int64" ,
197+ "max(avg_lat_down_ms)" : "Float64" ,
198+ "max(avg_lat_ms)" : "Int64" ,
199+ "max(avg_lat_up_ms)" : "Float64" ,
200+ "max(avg_u_kbps)" : "Int64" ,
201+ "max(devices)" : "Int64" ,
202+ "max(tests)" : "Int64" ,
203+ "round(median(avg_d_kbps), 2)" : "Float64" ,
204+ "round(median(avg_lat_down_ms), 2)" : "Float64" ,
205+ "round(median(avg_lat_ms), 2)" : "Float64" ,
206+ "round(median(avg_lat_up_ms), 2)" : "Float64" ,
207+ "round(median(avg_u_kbps), 2)" : "Float64" ,
208+ "round(median(devices), 2)" : "Float64" ,
209+ "round(median(tests), 2)" : "Float64" ,
210+ },
211+ )
212+
138213 def test_random_float (self ):
139214 x = {"col1" : [random .uniform (0 , 1 ) for _ in range (0 , 100000 )]}
140215 ret = chdb .sql (
0 commit comments