Skip to content

Commit 67044fb

Browse files
authored
Merge branch 'main' into copilot/create-fast-json-lines-decoder
2 parents 3833757 + 5b8f4b8 commit 67044fb

File tree

7 files changed

+228
-155
lines changed

7 files changed

+228
-155
lines changed

opteryx/compiled/joins/cross_join.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,11 @@ cpdef tuple numpy_build_filtered_rows_indices_and_column(numpy.ndarray column_da
161161

162162
# Handle set initialization based on element dtype
163163
if numpy.issubdtype(element_dtype, numpy.integer):
164-
valid_values_typed = set([int(v) for v in valid_values])
164+
valid_values_typed = {int(v) for v in valid_values}
165165
elif numpy.issubdtype(element_dtype, numpy.floating):
166-
valid_values_typed = set([parse_fast_float(v) for v in valid_values])
166+
valid_values_typed = {parse_fast_float(v) for v in valid_values}
167167
elif numpy.issubdtype(element_dtype, numpy.str_):
168-
valid_values_typed = set([unicode(v) for v in valid_values])
168+
valid_values_typed = {unicode(v) for v in valid_values}
169169
else:
170170
valid_values_typed = valid_values # Fallback to generic Python set
171171

opteryx/cursor.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,25 @@ def execute_to_arrow(
336336
if isinstance(result_data, pyarrow.Table):
337337
return result_data
338338
try:
339-
return pyarrow.concat_tables(result_data, promote_options="permissive")
339+
# arrow allows duplicate column names, but not when concatting
340+
from itertools import chain
341+
342+
first_table = next(result_data, None)
343+
if first_table is not None:
344+
column_names = first_table.column_names
345+
if len(column_names) != len(set(column_names)):
346+
temporary_names = [f"col_{i}" for i in range(len(column_names))]
347+
first_table = first_table.rename_columns(temporary_names)
348+
return_table = pyarrow.concat_tables(
349+
chain(
350+
[first_table], (t.rename_columns(temporary_names) for t in result_data)
351+
),
352+
promote_options="permissive",
353+
)
354+
return return_table.rename_columns(column_names)
355+
return pyarrow.concat_tables(
356+
chain([first_table], result_data), promote_options="permissive"
357+
)
340358
except (
341359
pyarrow.ArrowInvalid,
342360
pyarrow.ArrowTypeError,

opteryx/operators/exit_node.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def __init__(self, properties: QueryProperties, **parameters):
4040
final_names = []
4141
for column in self.columns:
4242
final_columns.append(column.schema_column.identity)
43-
final_names.append(column.current_name)
43+
final_names.append(column.alias)
4444

4545
if len(final_columns) != len(set(final_columns)): # pragma: no cover
4646
from collections import Counter
@@ -57,7 +57,7 @@ def __init__(self, properties: QueryProperties, **parameters):
5757
# if column.schema_column.origin:
5858
# final_names.append(f"{column.schema_column.origin[0]}.{column.current_name}")
5959
# else:
60-
final_names.append(column.qualified_name)
60+
final_names.append(column.alias)
6161

6262
self.final_columns = final_columns
6363
self.final_names = final_names

opteryx/planner/binder/binder_visitor.py

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -380,14 +380,7 @@ def visit_exit(self, node: Node, context: BindingContext) -> Tuple[Node, Binding
380380
# clear the derived schema
381381
context.schemas.pop("$derived", None)
382382

383-
seen = set()
384-
needs_qualifier = len(context.schemas) > 1 or any(
385-
column.name in seen or seen.add(column.name) is not None # type: ignore
386-
for schema in context.schemas.values()
387-
for column in schema.columns
388-
)
389-
390-
def name_column(qualifier, column):
383+
def name_column(column):
391384
for projection_column in node.columns:
392385
if (
393386
projection_column.schema_column
@@ -396,20 +389,11 @@ def name_column(qualifier, column):
396389
if projection_column.alias:
397390
return projection_column.alias
398391

399-
if len(context.relations) > 1 or needs_qualifier:
400-
if isinstance(projection_column, LogicalColumn):
401-
if qualifier:
402-
projection_column.source = qualifier
403-
return projection_column.qualified_name
404-
return f"{qualifier}.{column.name}"
405-
406392
if projection_column.query_column:
407393
return str(projection_column.query_column)
408394
if projection_column.current_name:
409395
return projection_column.current_name
410396

411-
if needs_qualifier:
412-
return f"{qualifier}.{column.name}"
413397
return column.name
414398

415399
def keep_column(column, identities):
@@ -441,15 +425,15 @@ def keep_column(column, identities):
441425
identities.append(column.identity)
442426

443427
columns = []
444-
for qualifier, schema in context.schemas.items():
428+
for _, schema in context.schemas.items():
445429
for column in schema.columns:
446430
if keep_column(column, identities):
447-
column_name = name_column(qualifier=qualifier, column=column)
431+
column_name = name_column(column=column)
448432
column_reference = LogicalColumn(
449433
node_type=NodeType.IDENTIFIER,
450434
source_column=column_name,
451435
source=None,
452-
alias=None,
436+
alias=column_name,
453437
schema_column=column,
454438
)
455439
columns.append(column_reference)

opteryx/planner/logical_planner/logical_planner_builders.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,12 +271,21 @@ def ceiling(value, alias: Optional[List[str]] = None, key=None):
271271

272272

273273
def compound_identifier(branch, alias: Optional[List[str]] = None, key=None):
274-
return LogicalColumn(
274+
column = LogicalColumn(
275275
node_type=NodeType.IDENTIFIER, # column type
276276
alias=alias, # type: ignore
277277
source_column=branch[-1]["value"], # the source column
278278
source=".".join(p["value"] for p in branch[:-1]), # the source relation
279279
)
280+
alias_name = alias[0] if isinstance(alias, list) and alias else alias
281+
if alias_name:
282+
column.query_column = alias_name
283+
else:
284+
qualifier = column.source
285+
column.query_column = (
286+
f"{qualifier}.{column.source_column}" if qualifier else column.source_column
287+
)
288+
return column
280289

281290

282291
def expression_with_alias(branch, alias: Optional[List[str]] = None, key=None):
@@ -424,11 +433,14 @@ def identifier(branch, alias: Optional[List[str]] = None, key=None):
424433
"""idenitifier doesn't have a qualifier (recorded in source)"""
425434
if "Identifier" in branch:
426435
return build(branch["Identifier"], alias=alias)
427-
return LogicalColumn(
436+
column = LogicalColumn(
428437
node_type=NodeType.IDENTIFIER, # column type
429438
alias=alias, # type: ignore
430439
source_column=branch["value"], # the source column
431440
)
441+
alias_name = alias[0] if isinstance(alias, list) and alias else alias
442+
column.query_column = alias_name or column.source_column
443+
return column
432444

433445

434446
def in_list(branch, alias: Optional[List[str]] = None, key=None):

0 commit comments

Comments
 (0)