Skip to content

Commit 4a53e01

Browse files
committed
add range_collection to pydough top
1 parent 721eecc commit 4a53e01

File tree

8 files changed

+144
-28
lines changed

8 files changed

+144
-28
lines changed
Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
"""A user-defined collection of integers in a specified range.
22
Usage:
3-
`pydough.range_collection(start, end, step, name, column_name)`
3+
`pydough.range_collection(name, column, *args)`
4+
args: start, end, step
45
56
This module defines a collection that generates integers from `start` to `end`
67
with a specified `step`. The user must specify the name of the collection and the
78
name of the column that will hold the integer values.
89
"""
910

10-
from pydough.qdag import PyDoughQDAG
1111
from pydough.types import NumericType
1212
from pydough.types.pydough_type import PyDoughType
1313

@@ -19,13 +19,14 @@
1919
class RangeGeneratedCollection(PyDoughUserGeneratedCollection):
2020
"""Integer range-based collection."""
2121

22+
# HA_Q: should start/end/step be int or PyDoughQDAG? Why?
2223
def __init__(
2324
self,
2425
name: str,
2526
column_name: str,
26-
start: PyDoughQDAG,
27-
end: PyDoughQDAG,
28-
step: PyDoughQDAG,
27+
start: int,
28+
end: int,
29+
step: int,
2930
) -> None:
3031
super().__init__(name=name, columns=[column_name])
3132
self.start = start
@@ -36,9 +37,14 @@ def __init__(
3637
def column_names_and_types(self) -> list[tuple[str, PyDoughType]]:
3738
return [(self.columns[0], NumericType())]
3839

40+
def __len__(self) -> int:
41+
if self.start >= self.end:
42+
return 0
43+
return (self.end - self.start + self.step - 1) // self.step
44+
3945
def is_empty(self) -> bool:
40-
pass # return self.start < self.end
41-
return False
46+
"""Check if the range collection is empty."""
47+
return len(self) == 0
4248

4349
def to_string(self) -> str:
4450
return f"RangeCollection({self.name}: {self.columns[0]} from {self.start} to {self.end} step {self.step})"

pydough/qdag/node_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def build_generated_collection(
404404
preceding_context: PyDoughCollectionQDAG,
405405
name: str,
406406
column_name: list[str],
407-
args: list[PyDoughQDAG],
407+
args: list[int],
408408
) -> PyDoughUserGeneratedCollectionQDag:
409409
"""
410410
Creates a new user-defined collection.

pydough/unqualified/qualification.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,22 +1287,22 @@ def qualify_generated_collection(
12871287
The PyDough QDAG object for the qualified collection node.
12881288
12891289
"""
1290-
qualified_args: list[PyDoughQDAG] = []
1290+
# qualified_args: list[PyDoughQDAG] = []
12911291

1292-
# TODO: how to handle dataframe case.
1293-
for arg in unqualified._parcel[2]:
1294-
assert isinstance(arg, UnqualifiedNode)
1295-
qualified_arg: PyDoughQDAG = self.qualify_node(
1296-
arg, context, [], is_child, is_cross
1297-
)
1298-
qualified_args.append(qualified_arg)
1292+
# # TODO: how to handle dataframe case.
1293+
# for arg in unqualified._parcel[2]:
1294+
# assert isinstance(arg, UnqualifiedNode)
1295+
# qualified_arg: PyDoughQDAG = self.qualify_node(
1296+
# arg, context, [], is_child, is_cross
1297+
# )
1298+
# qualified_args.append(qualified_arg)
12991299

13001300
generated_collection_qdag: PyDoughUserGeneratedCollectionQDag = (
13011301
self.builder.build_generated_collection(
13021302
context,
13031303
unqualified._parcel[0],
13041304
unqualified._parcel[1],
1305-
qualified_args,
1305+
unqualified._parcel[2],
13061306
)
13071307
)
13081308
return generated_collection_qdag # .collection

pydough/unqualified/unqualified_node.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -450,11 +450,12 @@ def range_collection(
450450
"""
451451
Method used to create a user-generated range collection node.
452452
"""
453-
range_args: list[UnqualifiedNode] = [
454-
self.coerce_to_unqualified(start),
455-
self.coerce_to_unqualified(stop),
456-
self.coerce_to_unqualified(step),
457-
]
453+
# range_args: list[UnqualifiedNode] = [
454+
# self.coerce_to_unqualified(start),
455+
# self.coerce_to_unqualified(stop),
456+
# self.coerce_to_unqualified(step),
457+
# ]
458+
range_args: list[int] = [start, stop, step]
458459
return UnqualifiedGeneratedCollection(name, column, range_args)
459460

460461

@@ -857,11 +858,12 @@ def __init__(
857858
class UnqualifiedGeneratedCollection(UnqualifiedNode):
858859
"""Represents a user-generated collection of values."""
859860

860-
def __init__(self, name: str, column: list[str], args: list[UnqualifiedNode]):
861-
self._parcel: tuple[str, list[str], list[UnqualifiedNode]] = (
861+
def __init__(self, name: str, column: list[str], args: list[int]):
862+
self._parcel: tuple[str, list[str], list[int]] = (
862863
name,
863864
column,
864-
[UnqualifiedLiteral(arg, NumericType()) for arg in args],
865+
# [UnqualifiedLiteral(arg, NumericType()) for arg in args],
866+
args,
865867
)
866868

867869

@@ -964,10 +966,10 @@ def display_raw(unqualified: UnqualifiedNode) -> str:
964966
return result + ")"
965967
case UnqualifiedGeneratedCollection():
966968
result = "generated_collection("
967-
result += f"name={unqualified._parcel[1]!r}, "
968-
result += f"columns=[{', '.join(display_raw(column) for column in unqualified._parcel[2])}],"
969+
result += f"name={unqualified._parcel[0]!r}, "
970+
result += f"columns=[{', '.join(unqualified._parcel[1])}],"
969971
result += (
970-
f"data=({', '.join(display_raw(arg) for arg in unqualified._parcel[3])})"
972+
f"data=({', '.join(unqualified._parcel[2])})"
971973
if unqualified._parcel[3:]
972974
else ""
973975
)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""
2+
Module of PyDough dealing with APIs used for user generated collections.
3+
"""
4+
5+
__all__ = ["range_collection"]
6+
7+
from .range_collection import range_collection
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""
2+
Implementation of the `pydough.range_collection` function, which provides
3+
a way to create a collection of integer ranges over a specified column in PyDough.
4+
"""
5+
6+
__all__ = ["range_collection"]
7+
8+
from pydough.qdag.collections.range_collection import RangeGeneratedCollection
9+
10+
11+
def range_collection(name: str, column: str, *args: int) -> RangeGeneratedCollection:
12+
"""
13+
Creates a collection of integer ranges over a specified column.
14+
15+
Args:
16+
`name` : The name of the collection.
17+
`column` : The column to create ranges for.
18+
`*args` : Variable length arguments that specify the range parameters.
19+
Supported formats:
20+
- `range_collection(end)`: generates a range from 0 to `end-1`
21+
with a step of 1.
22+
- `range_collection(start, end)`: generates a range from `start`
23+
to `end-1` with a step of 1.
24+
- `range_collection(start, end, step)`: generates a range from
25+
`start` to `end-1` with the specified step.
26+
Returns:
27+
A collection of integer ranges.
28+
"""
29+
if not isinstance(name, str):
30+
raise TypeError(f"Expected 'name' to be a string, got {type(name).__name__}")
31+
if not isinstance(column, str):
32+
raise TypeError(
33+
f"Expected 'column' to be a string, got {type(column).__name__}"
34+
)
35+
if len(args) == 1:
36+
end = args[0]
37+
start = 0
38+
step = 1
39+
elif len(args) == 2:
40+
start, end = args
41+
step = 1
42+
elif len(args) == 3:
43+
start, end, step = args
44+
else:
45+
raise ValueError(f"Expected 1 to 3 arguments, got {len(args)}")
46+
if not isinstance(start, int):
47+
raise TypeError(
48+
f"Expected 'start' to be an integer, got {type(start).__name__}"
49+
)
50+
if not isinstance(end, int):
51+
raise TypeError(f"Expected 'end' to be an integer, got {type(end).__name__}")
52+
if not isinstance(step, int):
53+
raise TypeError(f"Expected 'step' to be an integer, got {type(step).__name__}")
54+
if start >= end:
55+
raise ValueError(f"Expected 'start' ({start}) to be less than 'end' ({end})")
56+
if step == 0:
57+
raise ValueError("Expected 'step' to be a non-zero integer")
58+
if start < 0:
59+
raise ValueError(f"Expected 'start' to be a non-negative integer, got {start}")
60+
if end < 0:
61+
raise ValueError(f"Expected 'end' to be a non-negative integer, got {end}")
62+
# TODO: support negative step values
63+
if step <= 0:
64+
raise ValueError(f"Expected 'step' to be a positive integer, got {step}")
65+
66+
return RangeGeneratedCollection(
67+
name=name,
68+
column_name=column,
69+
start=start,
70+
end=end,
71+
step=step,
72+
)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
"""
2+
Various functions containing user generated collections as
3+
PyDough code snippets for testing purposes.
4+
"""
5+
# ruff: noqa
6+
# mypy: ignore-errors
7+
# ruff & mypy should not try to typecheck or verify any of this
8+
9+
import pandas as pd
10+
import datetime
11+
12+
import pydough
13+
14+
15+
def simple_range():
16+
return pydough.range_collection(
17+
"simple_range",
18+
"value",
19+
10, # end value
20+
).ORDER_BY(value.ASC())

tests/test_qualification.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363
impl_tpch_q21,
6464
impl_tpch_q22,
6565
)
66+
from tests.test_pydough_functions.user_collections import (
67+
simple_range,
68+
)
6669
from tests.testing_utilities import (
6770
graph_fetcher,
6871
)
@@ -942,6 +945,12 @@
942945
""",
943946
id="simple_cross_6",
944947
),
948+
pytest.param(
949+
simple_range,
950+
"""
951+
""",
952+
id="simple_range",
953+
),
945954
],
946955
)
947956
def test_qualify_node_to_ast_string(

0 commit comments

Comments
 (0)