Skip to content

Commit 44a024a

Browse files
committed
fix: change statistics to use pyarrow result type
1 parent 2a4f524 commit 44a024a

File tree

4 files changed

+117
-19
lines changed

4 files changed

+117
-19
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ authors = [
88
dependencies = [
99
"pyarrow>=20.0.0",
1010
"query-farm-flight-server",
11-
# "query-farm-flight-server @ git+https://github.com/Query-farm/server_base.git@master",
1211
"duckdb>=1.3.1",
1312
"query-farm-duckdb-json-serialization>=0.1.1",
1413
]

requirements-dev.lock

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,20 @@
1212
-e file:.
1313
annotated-types==0.7.0
1414
# via pydantic
15-
boto3==1.39.0
15+
boto3==1.39.4
1616
# via query-farm-flight-server
17-
botocore==1.39.0
17+
botocore==1.39.4
1818
# via boto3
1919
# via s3transfer
2020
cache3==0.4.3
2121
# via query-farm-flight-server
22-
certifi==2025.6.15
22+
certifi==2025.7.9
2323
# via sentry-sdk
2424
click==8.2.1
2525
# via query-farm-flight-server
26-
coverage==7.9.1
26+
coverage==7.9.2
2727
# via pytest-cov
28-
duckdb==1.3.1
28+
duckdb==1.3.2
2929
# via query-farm-airport-test-server
3030
execnet==2.1.1
3131
# via pytest-xdist
@@ -46,7 +46,7 @@ mypy==1.16.1
4646
# via pytest-mypy
4747
mypy-boto3-dynamodb==1.39.0
4848
# via query-farm-flight-server
49-
mypy-boto3-s3==1.39.0
49+
mypy-boto3-s3==1.39.2
5050
# via query-farm-flight-server
5151
mypy-extensions==1.1.0
5252
# via mypy
@@ -79,14 +79,14 @@ pytest-cov==6.2.1
7979
pytest-env==1.1.5
8080
pytest-mypy==1.0.1
8181
pytest-parallel==0.1.1
82-
pytest-xdist==3.7.0
82+
pytest-xdist==3.8.0
8383
python-dateutil==2.9.0.post0
8484
# via botocore
8585
python-levenshtein==0.27.1
8686
# via query-farm-flight-server
87-
query-farm-duckdb-json-serialization==0.1.1
87+
query-farm-duckdb-json-serialization==0.1.2
8888
# via query-farm-airport-test-server
89-
query-farm-flight-server==0.1.2
89+
query-farm-flight-server==0.1.4
9090
# via query-farm-airport-test-server
9191
rapidfuzz==3.13.0
9292
# via levenshtein
@@ -101,7 +101,7 @@ structlog==25.4.0
101101
# via query-farm-flight-server
102102
tblib==3.1.0
103103
# via pytest-parallel
104-
typing-extensions==4.14.0
104+
typing-extensions==4.14.1
105105
# via mypy
106106
# via pydantic
107107
# via pydantic-core

requirements.lock

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@
1212
-e file:.
1313
annotated-types==0.7.0
1414
# via pydantic
15-
boto3==1.39.0
15+
boto3==1.39.4
1616
# via query-farm-flight-server
17-
botocore==1.39.0
17+
botocore==1.39.4
1818
# via boto3
1919
# via s3transfer
2020
cache3==0.4.3
2121
# via query-farm-flight-server
22-
certifi==2025.6.15
22+
certifi==2025.7.9
2323
# via sentry-sdk
2424
click==8.2.1
2525
# via query-farm-flight-server
26-
duckdb==1.3.1
26+
duckdb==1.3.2
2727
# via query-farm-airport-test-server
2828
fuzzywuzzy==0.18.0
2929
# via query-farm-flight-server
@@ -36,7 +36,7 @@ msgpack==1.1.1
3636
# via query-farm-flight-server
3737
mypy-boto3-dynamodb==1.39.0
3838
# via query-farm-flight-server
39-
mypy-boto3-s3==1.39.0
39+
mypy-boto3-s3==1.39.2
4040
# via query-farm-flight-server
4141
prettytable==3.16.0
4242
# via query-farm-flight-server
@@ -52,9 +52,9 @@ python-dateutil==2.9.0.post0
5252
# via botocore
5353
python-levenshtein==0.27.1
5454
# via query-farm-flight-server
55-
query-farm-duckdb-json-serialization==0.1.1
55+
query-farm-duckdb-json-serialization==0.1.2
5656
# via query-farm-airport-test-server
57-
query-farm-flight-server==0.1.2
57+
query-farm-flight-server==0.1.4
5858
# via query-farm-airport-test-server
5959
rapidfuzz==3.13.0
6060
# via levenshtein
@@ -66,7 +66,7 @@ six==1.17.0
6666
# via python-dateutil
6767
structlog==25.4.0
6868
# via query-farm-flight-server
69-
typing-extensions==4.14.0
69+
typing-extensions==4.14.1
7070
# via pydantic
7171
# via pydantic-core
7272
# via typing-inspection

src/query_farm_airport_test_server/server.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1+
import datetime
12
import hashlib
23
import json
34
import re
5+
import uuid
46
from collections.abc import Callable, Generator, Iterator
57
from dataclasses import dataclass, field
8+
from decimal import Decimal
69
from typing import Any, Literal, TypeVar, overload
710

811
import click
@@ -889,6 +892,49 @@ def in_out_handler(
889892

890893
return pa.RecordBatch.from_arrays([["last"], ["row"]], schema=output_schema)
891894

895+
static_data_schema = SchemaCollection(
896+
scalar_functions_by_name=CaseInsensitiveDict(),
897+
table_functions_by_name=CaseInsensitiveDict(),
898+
tables_by_name=CaseInsensitiveDict(
899+
{
900+
"employees": TableInfo(
901+
table_versions=[
902+
pa.Table.from_arrays(
903+
[
904+
["Emily", "Amy"],
905+
[30, 32],
906+
[datetime.datetime(2023, 10, 1), datetime.datetime(2024, 10, 2)],
907+
["{}", "[1,2,3]"],
908+
[uuid.uuid4().bytes, uuid.uuid4().bytes],
909+
[datetime.date(2023, 10, 1), datetime.date(2024, 10, 2)],
910+
[True, False],
911+
["Ann", None],
912+
[1234.123, 5678.123],
913+
[Decimal("12345.678790"), Decimal("67890.123456")],
914+
],
915+
schema=pa.schema(
916+
[
917+
pa.field("name", pa.string()),
918+
pa.field("age", pa.int32()),
919+
pa.field("start_date", pa.timestamp("ms")),
920+
pa.field("json_data", pa.json_(pa.string())),
921+
pa.field("id", pa.uuid()),
922+
pa.field("birthdate", pa.date32()),
923+
pa.field("is_active", pa.bool_()),
924+
pa.field("nickname", pa.string()),
925+
pa.field("salary", pa.float64()),
926+
pa.field("balance", pa.decimal128(12, 6)),
927+
],
928+
metadata={"can_produce_statistics": "1"},
929+
),
930+
)
931+
],
932+
row_id_counter=2,
933+
)
934+
}
935+
),
936+
)
937+
892938
util_schema = SchemaCollection(
893939
scalar_functions_by_name=CaseInsensitiveDict(
894940
{
@@ -1065,6 +1111,7 @@ def in_out_handler(
10651111
)
10661112

10671113
library.databases_by_name[database_name].schemas_by_name["utils"] = util_schema
1114+
library.databases_by_name[database_name].schemas_by_name["static_data"] = static_data_schema
10681115

10691116
return iter([])
10701117
elif action.type == "drop_database":
@@ -1611,6 +1658,58 @@ def action_change_column_type(
16111658
schema_name=parameters.schema_name,
16121659
)[0]
16131660

1661+
def action_column_statistics(
1662+
self,
1663+
*,
1664+
context: base_server.CallContext[auth.Account, auth.AccountToken],
1665+
parameters: parameter_types.ColumnStatistics,
1666+
) -> pa.Table:
1667+
assert context.caller is not None
1668+
1669+
descriptor_parts = descriptor_unpack_(parameters.flight_descriptor)
1670+
library = self.contents[context.caller.token.token]
1671+
database = library.by_name(descriptor_parts.catalog_name)
1672+
schema = database.by_name(descriptor_parts.schema_name)
1673+
1674+
assert descriptor_parts.type == "table"
1675+
table = schema.by_name("table", descriptor_parts.name)
1676+
1677+
contents = table.version().column(parameters.column_name)
1678+
# Since the table is a Pyarrow table we need to produce some values.
1679+
not_null_count = pc.count(contents, "only_valid").as_py()
1680+
null_count = pc.count(contents, "only_null").as_py()
1681+
distinct_count = len(set(contents.to_pylist()))
1682+
sorted_contents = sorted(filter(lambda x: x is not None, contents.to_pylist()))
1683+
min_value = sorted_contents[0]
1684+
max_value = sorted_contents[-1]
1685+
1686+
if contents.type == pa.uuid():
1687+
# For UUIDs, we need to convert them to strings for the output.
1688+
min_value = min_value.bytes
1689+
max_value = max_value.bytes
1690+
1691+
result_table = pa.Table.from_pylist(
1692+
[
1693+
{
1694+
"has_not_null": not_null_count > 0,
1695+
"has_null": null_count > 0,
1696+
"distinct_count": distinct_count,
1697+
"min": min_value,
1698+
"max": max_value,
1699+
}
1700+
],
1701+
schema=pa.schema(
1702+
[
1703+
pa.field("has_not_null", pa.bool_()),
1704+
pa.field("has_null", pa.bool_()),
1705+
pa.field("distinct_count", pa.uint64()),
1706+
pa.field("min", contents.type),
1707+
pa.field("max", contents.type),
1708+
]
1709+
),
1710+
)
1711+
return result_table
1712+
16141713
def impl_do_get(
16151714
self,
16161715
*,

0 commit comments

Comments
 (0)