Skip to content

Commit f95ea63

Browse files
committed
histogram sql api working
1 parent 77180a6 commit f95ea63

File tree

4 files changed

+71
-15
lines changed

4 files changed

+71
-15
lines changed

ckanext/datastore/backend/postgres.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,14 +1649,15 @@ def search_data_buckets(context: Context, data_dict: dict[str, Any]):
16491649
edges_{index} AS (
16501650
SELECT DISTINCT
16511651
(data_stats_{index}.min_val + (
1652-
generate_series(0, {num_buckets}-1)
1652+
generate_series(0, {num_buckets})
16531653
* (data_stats_{index}.max_val - data_stats_{index}.min_val)
16541654
/ {num_buckets}
1655-
))::{ftype}
1655+
))::{ftype} e
16561656
FROM data_stats_{index}
1657+
ORDER BY e
16571658
),
16581659
data_{index} AS (
1659-
SELECT val, freq
1660+
SELECT val, coalesce(freq, 0) freq
16601661
FROM
16611662
unnest(array(select * from edges_{index})) with ordinality as val
16621663
FULL JOIN
@@ -1705,7 +1706,7 @@ def search_data_buckets(context: Context, data_dict: dict[str, Any]):
17051706
resource=identifier(resource_id),
17061707
ts_query=ts_query,
17071708
where=where_clause,
1708-
num_buckets=MAX_BUCKETS,
1709+
num_buckets=data_dict['buckets'],
17091710
ftype=ftype,
17101711
))
17111712

@@ -1730,8 +1731,19 @@ def search_data_buckets(context: Context, data_dict: dict[str, Any]):
17301731
).mappings().one()
17311732

17321733
for i, rf in enumerate(rfields, 1):
1733-
rf['buckets'] = result[f'freq_{i}']
1734-
rf['edges'] = result[f'edge_{i}']
1734+
buckets = result[f'freq_{i}']
1735+
edges = result[f'edge_{i}']
1736+
rf['nulls'] = 0
1737+
if result[f'edge_{i}'][-1:] == [None]:
1738+
edges.pop()
1739+
rf['nulls'] = buckets.pop()
1740+
if edges == [None]: # all nulls returns two null edges
1741+
edges = []
1742+
buckets = []
1743+
# last value returned contains count exactly matching max value
1744+
# combine with bucket before
1745+
rf['buckets'] = buckets[:-2] + ([sum(buckets[-2:])] if buckets else [])
1746+
rf['edges'] = edges
17351747

17361748
return {'fields': rfields}
17371749

ckanext/datastore/logic/action.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -777,11 +777,6 @@ def datastore_search_buckets(context: Context, data_dict: dict[str, Any]):
777777
:rtype: A dictionary with the following keys
778778
:param fields: fields/columns and their extra metadata
779779
:type fields: list of dictionaries
780-
:param filters: query filters
781-
:type filters: list of dictionaries
782-
:param buckets: dict of matching results
783-
:type buckets: dict of field ids and bucketed data
784-
785780
'''
786781
backend = DatastoreBackend.get_active_backend()
787782
schema = context.get('schema', dsschema.datastore_search_buckets_schema())

ckanext/datastore/logic/schema.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,11 @@ def datastore_search_buckets_schema() -> Schema:
224224
'resource_id': [not_missing, not_empty, unicode_safe],
225225
'id': [ignore_missing],
226226
'q': [ignore_missing, unicode_or_json_validator],
227-
'buckets': [default(12), int_validator],
227+
'buckets': [
228+
default(12),
229+
natural_number_validator,
230+
limit_to_configured_maximum('ckan.datastore.search.buckets_max', 300),
231+
],
228232
'plain': [ignore_missing, boolean_validator],
229233
'filters': [ignore_missing, json_validator],
230234
'language': [ignore_missing, unicode_safe],

ckanext/datastore/tests/test_histogram.py

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import pytest
44
import sqlalchemy as sa
55
import sqlalchemy.orm as orm
6-
import decimal
6+
from datetime import date, datetime
77
from unittest import mock
88

99
import ckan.logic as logic
@@ -37,11 +37,56 @@ def test_histograms():
3737
{"one": 1, "short": 2, "nums": 10, "days": "2026-02-01", "ts": "2026-02-01"},
3838
{"one": 1, "short": 8, "nums": 15, "days": "2026-01-06", "ts": "2026-01-06"},
3939
{"one": 1, "short": 9, "nums": 15, "days": "2026-02-01", "ts": "2026-02-01"},
40-
{"one": 2, "short": 3, "nums": -1, "days": "2026-01-06", "ts": "2026-01-06"},
40+
{"one": 1, "short": 3, "nums": -1, "days": "2026-01-18", "ts": "2026-01-18"},
4141
],
4242
)
4343
results = helpers.call_action(
4444
"datastore_search_buckets",
4545
resource_id=resource["id"],
46+
buckets=4,
4647
)
47-
assert results["fields"] == []
48+
assert results["fields"] == [
49+
{"id": "all_null", "buckets": [], "edges": [], "nulls": 6, "type": "int4"},
50+
{"id": "one", "buckets": [6], "edges": [1], "nulls": 0, "type": "numeric"},
51+
{
52+
"id": "short",
53+
"buckets": [3, 1, 0, 2],
54+
"edges": [2, 3, 5, 7, 9],
55+
"nulls": 0,
56+
"type": "int4",
57+
},
58+
{
59+
"id": "nums",
60+
"buckets": [2, 0, 1, 3],
61+
"edges": [-4, 2, 8, 14, 20],
62+
"nulls": 0,
63+
"type": "numeric",
64+
},
65+
{
66+
"id": "days",
67+
"buckets": [3, 0, 1, 2],
68+
"edges": [
69+
date(2026, 1, 1),
70+
date(2026, 1, 8),
71+
date(2026, 1, 16),
72+
date(2026, 1, 24),
73+
date(2026, 2, 1),
74+
],
75+
"nulls": 0,
76+
"type": "date",
77+
},
78+
{
79+
"id": "ts",
80+
"buckets": [3, 0, 1, 2],
81+
"edges": [
82+
datetime(2026, 1, 1, 0),
83+
datetime(2026, 1, 8, 18),
84+
datetime(2026, 1, 16, 12),
85+
datetime(2026, 1, 24, 6),
86+
datetime(2026, 2, 1, 0),
87+
88+
],
89+
"nulls": 0,
90+
"type": "timestamp",
91+
}
92+
]

0 commit comments

Comments
 (0)