Skip to content

Commit 069d895

Browse files
committed
range collections base, initial try
1 parent f122cdb commit 069d895

File tree

9 files changed

+355
-1
lines changed

9 files changed

+355
-1
lines changed

pydough/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"get_logger",
1212
"init_pydough_context",
1313
"parse_json_metadata_from_file",
14+
"range_collection",
1415
"to_df",
1516
"to_sql",
1617
]
@@ -20,6 +21,7 @@
2021
from .exploration import explain, explain_structure, explain_term
2122
from .logger import get_logger
2223
from .metadata import parse_json_metadata_from_file
24+
from .qdag.collections import range_collection
2325
from .unqualified import display_raw, init_pydough_context
2426

2527
# Create a default session for the user to interact with.

pydough/qdag/collections/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"TableCollection",
2222
"TopK",
2323
"Where",
24+
"range_collection",
2425
]
2526

2627
from .augmenting_child_operator import AugmentingChildOperator
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""A user-defined collection of integers in a specified range.
2+
Usage:
3+
`pydough.range_collection(start, end, step, name, column_name)`
4+
5+
This module defines a collection that generates integers from `start` to `end`
6+
with a specified `step`. The user must specify the name of the collection and the
7+
name of the column that will hold the integer values.
8+
"""
9+
10+
from pydough.qdag import PyDoughQDAG
11+
from pydough.types import NumericType
12+
from pydough.types.pydough_type import PyDoughType
13+
14+
from .user_collections import PyDoughUserGeneratedCollection
15+
16+
all = ["RangeGeneratedCollection"]
17+
18+
19+
class RangeGeneratedCollection(PyDoughUserGeneratedCollection):
20+
"""Integer range-based collection."""
21+
22+
def __init__(
23+
self,
24+
name: str,
25+
column_name: str,
26+
start: PyDoughQDAG,
27+
end: PyDoughQDAG,
28+
step: PyDoughQDAG,
29+
) -> None:
30+
super().__init__(name=name, columns=[column_name])
31+
self.start = start
32+
self.end = end
33+
self.step = step
34+
35+
@property
36+
def column_names_and_types(self) -> list[tuple[str, PyDoughType]]:
37+
return [(self.columns[0], NumericType())]
38+
39+
def is_empty(self) -> bool:
40+
pass # return self.start < self.end
41+
return False
42+
43+
def to_string(self) -> str:
44+
return f"RangeCollection({self.name}: {self.columns[0]} from {self.start} to {self.end} step {self.step})"
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
from pydough.qdag import PyDoughCollectionQDAG
2+
from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG
3+
from pydough.qdag.errors import PyDoughQDAGException
4+
from pydough.qdag.expressions.collation_expression import CollationExpression
5+
from pydough.qdag.expressions.reference import Reference
6+
7+
from .child_access import ChildAccess
8+
from .collection_tree_form import CollectionTreeForm
9+
from .user_collections import PyDoughUserGeneratedCollection
10+
11+
12+
# or should it be CollectionAccess?
13+
class PyDoughUserGeneratedCollectionQDag(ChildAccess):
14+
def __init__(
15+
self,
16+
ancestor: PyDoughCollectionQDAG,
17+
collection: PyDoughUserGeneratedCollection,
18+
):
19+
assert ancestor is not None
20+
super().__init__(ancestor)
21+
self._collection = collection
22+
23+
def clone_with_parent(
24+
self, new_ancestor: PyDoughCollectionQDAG
25+
) -> "PyDoughUserGeneratedCollectionQDag":
26+
"""
27+
Copies `self` but with a new ancestor node that presumably has the
28+
original ancestor in its predecessor chain.
29+
30+
Args:
31+
`new_ancestor`: the node to use as the new parent of the clone.
32+
33+
Returns:
34+
The cloned version of `self`.
35+
"""
36+
return PyDoughUserGeneratedCollectionQDag(new_ancestor, self._collection)
37+
38+
def to_tree_form(self, is_last: bool) -> CollectionTreeForm:
39+
raise NotImplementedError
40+
41+
@property
42+
def collection(self) -> PyDoughUserGeneratedCollection:
43+
"""
44+
The metadata for the table that is being referenced by the collection
45+
node.
46+
"""
47+
return self._collection
48+
49+
@property
50+
def name(self) -> str:
51+
return self.collection.name
52+
53+
@property
54+
def preceding_context(self) -> PyDoughCollectionQDAG | None:
55+
return None
56+
57+
@property
58+
def ordering(self) -> list[CollationExpression] | None:
59+
return None
60+
61+
@property
62+
def calc_terms(self) -> set[str]:
63+
return set(self.collection.columns)
64+
65+
@property
66+
def all_terms(self) -> set[str]:
67+
"""
68+
The set of expression/subcollection names accessible by the context.
69+
"""
70+
return self.calc_terms
71+
72+
@property
73+
def ancestral_mapping(self) -> dict[str, int]:
74+
return self._ancestor.ancestral_mapping
75+
76+
@property
77+
def inherited_downstreamed_terms(self) -> set[str]:
78+
if self._ancestor:
79+
return self._ancestor.inherited_downstreamed_terms
80+
else:
81+
return set()
82+
83+
def is_singular(self, context: "PyDoughCollectionQDAG") -> bool:
84+
return False
85+
86+
def get_term(self, term_name: str) -> PyDoughQDAG:
87+
if term_name not in self.collection.columns:
88+
raise PyDoughQDAGException(self.name_mismatch_error(term_name))
89+
90+
return Reference(self._ancestor, term_name)
91+
92+
def get_expression_position(self, expr_name: str) -> int:
93+
raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}")
94+
95+
@property
96+
def unique_terms(self) -> list[str]:
97+
return self.collection.columns
98+
99+
@property
100+
def standalone_string(self) -> str:
101+
"""
102+
Returns a string representation of the collection in a standalone form.
103+
This is used for debugging and logging purposes.
104+
"""
105+
return f"UserGeneratedCollection({self.name}, {', '.join(self.collection.columns)})"
106+
107+
@property
108+
def key(self) -> str:
109+
return f"USER_GENERATED_COLLECTION-{self.name}"
110+
111+
def to_string(self) -> str:
112+
# Stringify as "name(column_name)
113+
return f"{self.name}({', '.join(self.collection.columns)})"
114+
115+
@property
116+
def tree_item_string(self) -> str:
117+
return f"UserGeneratedCollection({self.name})"
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
Base definition of PyDough QDAG collection type for accesses to a user defined
3+
collection of the current context.
4+
"""
5+
6+
from abc import ABC, abstractmethod
7+
8+
from pydough.types.pydough_type import PyDoughType
9+
10+
__all__ = ["PyDoughUserGeneratedCollection"]
11+
12+
13+
class PyDoughUserGeneratedCollection(ABC):
14+
"""
15+
Abstract base class for a user defined table collection.
16+
This class defines the interface for accessing a user defined table collection
17+
directly, without any specific implementation details.
18+
It is intended to be subclassed by specific implementations that provide
19+
the actual behavior and properties of the collection.
20+
"""
21+
22+
def __init__(self, name: str, columns: list[str]) -> None:
23+
self._name = name
24+
self._columns = columns
25+
26+
def __eq__(self, other) -> bool:
27+
return isinstance(other, PyDoughUserGeneratedCollection) and repr(self) == repr(
28+
other
29+
)
30+
31+
def __repr__(self) -> str:
32+
return self.to_string()
33+
34+
def __hash__(self) -> int:
35+
return hash(repr(self))
36+
37+
def __str__(self) -> str:
38+
return self.to_string()
39+
40+
@property
41+
def name(self) -> str:
42+
"""Return the name used for the collection."""
43+
return self._name
44+
45+
@property
46+
def columns(self) -> list[str]:
47+
"""Return column names."""
48+
return self._columns
49+
50+
@property
51+
@abstractmethod
52+
def column_names_and_types(self) -> list[tuple[str, PyDoughType]]:
53+
"""Return column names and their types."""
54+
55+
@abstractmethod
56+
def is_empty(self) -> bool:
57+
"""Check if the collection is empty."""
58+
59+
@abstractmethod
60+
def to_string(self) -> str:
61+
"""Return a string representation of the collection."""

pydough/qdag/node_builder.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
PyDoughOperator,
1919
builtin_registered_operators,
2020
)
21+
from pydough.qdag.collections.range_collection import RangeGeneratedCollection
22+
from pydough.qdag.collections.user_collection_qdag import (
23+
PyDoughUserGeneratedCollectionQDag,
24+
)
25+
from pydough.qdag.collections.user_collections import PyDoughUserGeneratedCollection
2126
from pydough.types import PyDoughType
2227

2328
from .abstract_pydough_qdag import PyDoughQDAG
@@ -392,3 +397,37 @@ def build_singular(
392397
The newly created PyDough SINGULAR instance.
393398
"""
394399
return Singular(preceding_context)
400+
401+
def build_generated_collection(
402+
self,
403+
preceding_context: PyDoughCollectionQDAG,
404+
name: str,
405+
column_name: list[str],
406+
args: list[PyDoughQDAG],
407+
) -> PyDoughUserGeneratedCollectionQDag:
408+
"""
409+
Creates a new user-defined collection.
410+
411+
Args:
412+
`name`: the name of the collection.
413+
`column_name`: the name of the column that will hold the integer values.
414+
`args`: the arguments that define the collection, such as a range of
415+
integers or a dataframe.
416+
417+
Returns:
418+
The newly created user-defined collection.
419+
"""
420+
# TODO: case range vs. dataframe
421+
if len(args) != 3:
422+
raise PyDoughQDAGException(
423+
f"Expected 3 arguments for range collection, got {len(args)}"
424+
)
425+
collection: PyDoughUserGeneratedCollection = RangeGeneratedCollection(
426+
name, column_name[0], args[0], args[1], args[2]
427+
)
428+
collection_qdag: PyDoughUserGeneratedCollectionQDag = (
429+
PyDoughUserGeneratedCollectionQDag(
430+
ancestor=preceding_context, collection=collection
431+
)
432+
)
433+
return collection_qdag

pydough/unqualified/qualification.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
Where,
3939
WindowCall,
4040
)
41+
from pydough.qdag.collections.user_collection_qdag import (
42+
PyDoughUserGeneratedCollectionQDag,
43+
)
4144
from pydough.types import PyDoughType
4245

4346
from .errors import PyDoughUnqualifiedException
@@ -48,6 +51,7 @@
4851
UnqualifiedCalculate,
4952
UnqualifiedCollation,
5053
UnqualifiedCross,
54+
UnqualifiedGeneratedCollection,
5155
UnqualifiedLiteral,
5256
UnqualifiedNode,
5357
UnqualifiedOperation,
@@ -1260,6 +1264,48 @@ def qualify_cross(
12601264
)
12611265
return qualified_child
12621266

1267+
def qualify_generated_collection(
1268+
self,
1269+
unqualified: UnqualifiedGeneratedCollection,
1270+
context: PyDoughCollectionQDAG,
1271+
is_child: bool,
1272+
is_cross: bool,
1273+
) -> PyDoughUserGeneratedCollectionQDag:
1274+
"""
1275+
Transforms an `UnqualifiedGeneratedCollection` into a PyDoughCollectionQDAG node.
1276+
1277+
Args:
1278+
`unqualified`: the UnqualifiedGeneratedCollection instance to be transformed.
1279+
`context`: the collection QDAG whose context the collection is being
1280+
evaluated within.
1281+
`is_child`: whether the collection is being qualified as a child
1282+
of a child operator context, such as CALCULATE or PARTITION.
1283+
`is_cross`: whether the collection being qualified is a CROSS JOIN operation
1284+
1285+
Returns:
1286+
The PyDough QDAG object for the qualified collection node.
1287+
1288+
"""
1289+
qualified_args: list[PyDoughQDAG] = []
1290+
1291+
# TODO: how to handle dataframe case.
1292+
for arg in unqualified._parcel[2]:
1293+
assert isinstance(arg, UnqualifiedNode)
1294+
qualified_arg: PyDoughQDAG = self.qualify_node(
1295+
arg, context, [], is_child, is_cross
1296+
)
1297+
qualified_args.append(qualified_arg)
1298+
1299+
generated_collection_qdag: PyDoughUserGeneratedCollectionQDag = (
1300+
self.builder.build_generated_collection(
1301+
context,
1302+
unqualified._parcel[0],
1303+
unqualified._parcel[1],
1304+
qualified_args,
1305+
)
1306+
)
1307+
return generated_collection_qdag # .collection
1308+
12631309
def qualify_node(
12641310
self,
12651311
unqualified: UnqualifiedNode,
@@ -1332,6 +1378,10 @@ def qualify_node(
13321378
answer = self.qualify_best(unqualified, context, is_child, is_cross)
13331379
case UnqualifiedCross():
13341380
answer = self.qualify_cross(unqualified, context, is_child, is_cross)
1381+
case UnqualifiedGeneratedCollection():
1382+
answer = self.qualify_generated_collection(
1383+
unqualified, context, is_child, is_cross
1384+
)
13351385
case _:
13361386
raise PyDoughUnqualifiedException(
13371387
f"Cannot qualify {unqualified.__class__.__name__}: {unqualified!r}"

0 commit comments

Comments
 (0)