Skip to content

Commit 721eecc

Browse files
committed
range collections base, initial try
1 parent 77f63d1 commit 721eecc

File tree

9 files changed

+355
-1
lines changed

9 files changed

+355
-1
lines changed

pydough/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
"get_logger",
1313
"init_pydough_context",
1414
"parse_json_metadata_from_file",
15+
"range_collection",
1516
"to_df",
1617
"to_sql",
1718
]
@@ -22,6 +23,7 @@
2223
from .logger import get_logger
2324
from .metadata import parse_json_metadata_from_file
2425
from .unqualified import display_raw, from_string, init_pydough_context
26+
from .qdag.collections import range_collection
2527

2628
# Create a default session for the user to interact with.
2729
# In most situations users will just use this session and

pydough/qdag/collections/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
"TableCollection",
2222
"TopK",
2323
"Where",
24+
"range_collection",
2425
]
2526

2627
from .augmenting_child_operator import AugmentingChildOperator
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""A user-defined collection of integers in a specified range.
2+
Usage:
3+
`pydough.range_collection(start, end, step, name, column_name)`
4+
5+
This module defines a collection that generates integers from `start` to `end`
6+
with a specified `step`. The user must specify the name of the collection and the
7+
name of the column that will hold the integer values.
8+
"""
9+
10+
from pydough.qdag import PyDoughQDAG
11+
from pydough.types import NumericType
12+
from pydough.types.pydough_type import PyDoughType
13+
14+
from .user_collections import PyDoughUserGeneratedCollection
15+
16+
all = ["RangeGeneratedCollection"]
17+
18+
19+
class RangeGeneratedCollection(PyDoughUserGeneratedCollection):
20+
"""Integer range-based collection."""
21+
22+
def __init__(
23+
self,
24+
name: str,
25+
column_name: str,
26+
start: PyDoughQDAG,
27+
end: PyDoughQDAG,
28+
step: PyDoughQDAG,
29+
) -> None:
30+
super().__init__(name=name, columns=[column_name])
31+
self.start = start
32+
self.end = end
33+
self.step = step
34+
35+
@property
36+
def column_names_and_types(self) -> list[tuple[str, PyDoughType]]:
37+
return [(self.columns[0], NumericType())]
38+
39+
def is_empty(self) -> bool:
40+
pass # return self.start < self.end
41+
return False
42+
43+
def to_string(self) -> str:
44+
return f"RangeCollection({self.name}: {self.columns[0]} from {self.start} to {self.end} step {self.step})"
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
from pydough.qdag import PyDoughCollectionQDAG
2+
from pydough.qdag.abstract_pydough_qdag import PyDoughQDAG
3+
from pydough.qdag.errors import PyDoughQDAGException
4+
from pydough.qdag.expressions.collation_expression import CollationExpression
5+
from pydough.qdag.expressions.reference import Reference
6+
7+
from .child_access import ChildAccess
8+
from .collection_tree_form import CollectionTreeForm
9+
from .user_collections import PyDoughUserGeneratedCollection
10+
11+
12+
# or should it be CollectionAccess?
13+
class PyDoughUserGeneratedCollectionQDag(ChildAccess):
14+
def __init__(
15+
self,
16+
ancestor: PyDoughCollectionQDAG,
17+
collection: PyDoughUserGeneratedCollection,
18+
):
19+
assert ancestor is not None
20+
super().__init__(ancestor)
21+
self._collection = collection
22+
23+
def clone_with_parent(
24+
self, new_ancestor: PyDoughCollectionQDAG
25+
) -> "PyDoughUserGeneratedCollectionQDag":
26+
"""
27+
Copies `self` but with a new ancestor node that presumably has the
28+
original ancestor in its predecessor chain.
29+
30+
Args:
31+
`new_ancestor`: the node to use as the new parent of the clone.
32+
33+
Returns:
34+
The cloned version of `self`.
35+
"""
36+
return PyDoughUserGeneratedCollectionQDag(new_ancestor, self._collection)
37+
38+
def to_tree_form(self, is_last: bool) -> CollectionTreeForm:
39+
raise NotImplementedError
40+
41+
@property
42+
def collection(self) -> PyDoughUserGeneratedCollection:
43+
"""
44+
The metadata for the table that is being referenced by the collection
45+
node.
46+
"""
47+
return self._collection
48+
49+
@property
50+
def name(self) -> str:
51+
return self.collection.name
52+
53+
@property
54+
def preceding_context(self) -> PyDoughCollectionQDAG | None:
55+
return None
56+
57+
@property
58+
def ordering(self) -> list[CollationExpression] | None:
59+
return None
60+
61+
@property
62+
def calc_terms(self) -> set[str]:
63+
return set(self.collection.columns)
64+
65+
@property
66+
def all_terms(self) -> set[str]:
67+
"""
68+
The set of expression/subcollection names accessible by the context.
69+
"""
70+
return self.calc_terms
71+
72+
@property
73+
def ancestral_mapping(self) -> dict[str, int]:
74+
return self._ancestor.ancestral_mapping
75+
76+
@property
77+
def inherited_downstreamed_terms(self) -> set[str]:
78+
if self._ancestor:
79+
return self._ancestor.inherited_downstreamed_terms
80+
else:
81+
return set()
82+
83+
def is_singular(self, context: "PyDoughCollectionQDAG") -> bool:
84+
return False
85+
86+
def get_term(self, term_name: str) -> PyDoughQDAG:
87+
if term_name not in self.collection.columns:
88+
raise PyDoughQDAGException(self.name_mismatch_error(term_name))
89+
90+
return Reference(self._ancestor, term_name)
91+
92+
def get_expression_position(self, expr_name: str) -> int:
93+
raise PyDoughQDAGException(f"Cannot call get_expression_position on {self!r}")
94+
95+
@property
96+
def unique_terms(self) -> list[str]:
97+
return self.collection.columns
98+
99+
@property
100+
def standalone_string(self) -> str:
101+
"""
102+
Returns a string representation of the collection in a standalone form.
103+
This is used for debugging and logging purposes.
104+
"""
105+
return f"UserGeneratedCollection({self.name}, {', '.join(self.collection.columns)})"
106+
107+
@property
108+
def key(self) -> str:
109+
return f"USER_GENERATED_COLLECTION-{self.name}"
110+
111+
def to_string(self) -> str:
112+
# Stringify as "name(column_name)
113+
return f"{self.name}({', '.join(self.collection.columns)})"
114+
115+
@property
116+
def tree_item_string(self) -> str:
117+
return f"UserGeneratedCollection({self.name})"
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
Base definition of PyDough QDAG collection type for accesses to a user defined
3+
collection of the current context.
4+
"""
5+
6+
from abc import ABC, abstractmethod
7+
8+
from pydough.types.pydough_type import PyDoughType
9+
10+
__all__ = ["PyDoughUserGeneratedCollection"]
11+
12+
13+
class PyDoughUserGeneratedCollection(ABC):
14+
"""
15+
Abstract base class for a user defined table collection.
16+
This class defines the interface for accessing a user defined table collection
17+
directly, without any specific implementation details.
18+
It is intended to be subclassed by specific implementations that provide
19+
the actual behavior and properties of the collection.
20+
"""
21+
22+
def __init__(self, name: str, columns: list[str]) -> None:
23+
self._name = name
24+
self._columns = columns
25+
26+
def __eq__(self, other) -> bool:
27+
return isinstance(other, PyDoughUserGeneratedCollection) and repr(self) == repr(
28+
other
29+
)
30+
31+
def __repr__(self) -> str:
32+
return self.to_string()
33+
34+
def __hash__(self) -> int:
35+
return hash(repr(self))
36+
37+
def __str__(self) -> str:
38+
return self.to_string()
39+
40+
@property
41+
def name(self) -> str:
42+
"""Return the name used for the collection."""
43+
return self._name
44+
45+
@property
46+
def columns(self) -> list[str]:
47+
"""Return column names."""
48+
return self._columns
49+
50+
@property
51+
@abstractmethod
52+
def column_names_and_types(self) -> list[tuple[str, PyDoughType]]:
53+
"""Return column names and their types."""
54+
55+
@abstractmethod
56+
def is_empty(self) -> bool:
57+
"""Check if the collection is empty."""
58+
59+
@abstractmethod
60+
def to_string(self) -> str:
61+
"""Return a string representation of the collection."""

pydough/qdag/node_builder.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
PyDoughOperator,
1919
builtin_registered_operators,
2020
)
21+
from pydough.qdag.collections.range_collection import RangeGeneratedCollection
22+
from pydough.qdag.collections.user_collection_qdag import (
23+
PyDoughUserGeneratedCollectionQDag,
24+
)
25+
from pydough.qdag.collections.user_collections import PyDoughUserGeneratedCollection
2126
from pydough.types import PyDoughType
2227

2328
from .abstract_pydough_qdag import PyDoughQDAG
@@ -393,3 +398,37 @@ def build_singular(
393398
The newly created PyDough SINGULAR instance.
394399
"""
395400
return Singular(preceding_context)
401+
402+
def build_generated_collection(
403+
self,
404+
preceding_context: PyDoughCollectionQDAG,
405+
name: str,
406+
column_name: list[str],
407+
args: list[PyDoughQDAG],
408+
) -> PyDoughUserGeneratedCollectionQDag:
409+
"""
410+
Creates a new user-defined collection.
411+
412+
Args:
413+
`name`: the name of the collection.
414+
`column_name`: the name of the column that will hold the integer values.
415+
`args`: the arguments that define the collection, such as a range of
416+
integers or a dataframe.
417+
418+
Returns:
419+
The newly created user-defined collection.
420+
"""
421+
# TODO: case range vs. dataframe
422+
if len(args) != 3:
423+
raise PyDoughQDAGException(
424+
f"Expected 3 arguments for range collection, got {len(args)}"
425+
)
426+
collection: PyDoughUserGeneratedCollection = RangeGeneratedCollection(
427+
name, column_name[0], args[0], args[1], args[2]
428+
)
429+
collection_qdag: PyDoughUserGeneratedCollectionQDag = (
430+
PyDoughUserGeneratedCollectionQDag(
431+
ancestor=preceding_context, collection=collection
432+
)
433+
)
434+
return collection_qdag

pydough/unqualified/qualification.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
Where,
3939
WindowCall,
4040
)
41+
from pydough.qdag.collections.user_collection_qdag import (
42+
PyDoughUserGeneratedCollectionQDag,
43+
)
4144
from pydough.types import PyDoughType
4245

4346
from .errors import PyDoughUnqualifiedException
@@ -48,6 +51,7 @@
4851
UnqualifiedCalculate,
4952
UnqualifiedCollation,
5053
UnqualifiedCross,
54+
UnqualifiedGeneratedCollection,
5155
UnqualifiedLiteral,
5256
UnqualifiedNode,
5357
UnqualifiedOperation,
@@ -1261,6 +1265,48 @@ def qualify_cross(
12611265
)
12621266
return qualified_child
12631267

1268+
def qualify_generated_collection(
1269+
self,
1270+
unqualified: UnqualifiedGeneratedCollection,
1271+
context: PyDoughCollectionQDAG,
1272+
is_child: bool,
1273+
is_cross: bool,
1274+
) -> PyDoughUserGeneratedCollectionQDag:
1275+
"""
1276+
Transforms an `UnqualifiedGeneratedCollection` into a PyDoughCollectionQDAG node.
1277+
1278+
Args:
1279+
`unqualified`: the UnqualifiedGeneratedCollection instance to be transformed.
1280+
`context`: the collection QDAG whose context the collection is being
1281+
evaluated within.
1282+
`is_child`: whether the collection is being qualified as a child
1283+
of a child operator context, such as CALCULATE or PARTITION.
1284+
`is_cross`: whether the collection being qualified is a CROSS JOIN operation
1285+
1286+
Returns:
1287+
The PyDough QDAG object for the qualified collection node.
1288+
1289+
"""
1290+
qualified_args: list[PyDoughQDAG] = []
1291+
1292+
# TODO: how to handle dataframe case.
1293+
for arg in unqualified._parcel[2]:
1294+
assert isinstance(arg, UnqualifiedNode)
1295+
qualified_arg: PyDoughQDAG = self.qualify_node(
1296+
arg, context, [], is_child, is_cross
1297+
)
1298+
qualified_args.append(qualified_arg)
1299+
1300+
generated_collection_qdag: PyDoughUserGeneratedCollectionQDag = (
1301+
self.builder.build_generated_collection(
1302+
context,
1303+
unqualified._parcel[0],
1304+
unqualified._parcel[1],
1305+
qualified_args,
1306+
)
1307+
)
1308+
return generated_collection_qdag # .collection
1309+
12641310
def qualify_node(
12651311
self,
12661312
unqualified: UnqualifiedNode,
@@ -1333,6 +1379,10 @@ def qualify_node(
13331379
answer = self.qualify_best(unqualified, context, is_child, is_cross)
13341380
case UnqualifiedCross():
13351381
answer = self.qualify_cross(unqualified, context, is_child, is_cross)
1382+
case UnqualifiedGeneratedCollection():
1383+
answer = self.qualify_generated_collection(
1384+
unqualified, context, is_child, is_cross
1385+
)
13361386
case _:
13371387
raise PyDoughUnqualifiedException(
13381388
f"Cannot qualify {unqualified.__class__.__name__}: {unqualified!r}"

0 commit comments

Comments
 (0)