Skip to content

Commit 2f06485

Browse files
authored
SNOW-1800374: adding support for options and partition_by to dataframewriter (#2841)
<!--- Please answer these questions before creating your pull request. Thanks! ---> 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. <!--- In this section, please add a Snowflake Jira issue number. Note that if a corresponding GitHub issue exists, you should still include the Snowflake Jira issue number. For example, for GitHub issue #1400, you should add "SNOW-1335071" here. ---> Fixes SNOW-1800374 2. Fill out the following pre-review checklist: - [x] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. - [x] I acknowledge that I have ensured my changes to be thread-safe. Follow the link for more information: [Thread-safe Developer Guidelines](https://github.com/snowflakedb/snowpark-python/blob/main/CONTRIBUTING.md#thread-safe-development) 3. Please describe how your code solves the related issue. Please write a short description of how your code change solves the related issue.
1 parent 01b8f33 commit 2f06485

File tree

3 files changed

+566
-107
lines changed

3 files changed

+566
-107
lines changed

src/snowflake/snowpark/_internal/proto/ast.proto

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ message PythonTimeZone {
8989
int64 offset_seconds = 2;
9090
}
9191

92-
// sp-type.ir:70
92+
// sp-type.ir:71
9393
message SpCallable {
9494
int64 id = 1;
9595
string name = 2;
@@ -136,71 +136,71 @@ message SpDataType {
136136
}
137137
}
138138

139-
// sp-type.ir:27
139+
// sp-type.ir:28
140140
message SpArrayType {
141141
bool structured = 1;
142142
SpDataType ty = 2;
143143
}
144144

145-
// sp-type.ir:31
145+
// sp-type.ir:32
146146
message SpColumnIdentifier {
147147
string name = 1;
148148
}
149149

150-
// sp-type.ir:33
150+
// sp-type.ir:34
151151
message SpDecimalType {
152152
int64 precision = 1;
153153
int64 scale = 2;
154154
}
155155

156-
// sp-type.ir:40
156+
// sp-type.ir:41
157157
message SpMapType {
158158
SpDataType key_ty = 1;
159159
bool structured = 2;
160160
SpDataType value_ty = 3;
161161
}
162162

163-
// sp-type.ir:43
163+
// sp-type.ir:44
164164
message SpStringType {
165165
google.protobuf.Int64Value length = 1;
166166
}
167167

168-
// sp-type.ir:44
168+
// sp-type.ir:45
169169
message SpStructField {
170170
SpColumnIdentifier column_identifier = 1;
171171
SpDataType data_type = 2;
172172
bool nullable = 3;
173173
}
174174

175-
// sp-type.ir:45
175+
// sp-type.ir:46
176176
message SpStructType {
177177
repeated SpStructField fields = 1;
178178
bool structured = 2;
179179
}
180180

181-
// sp-type.ir:47
181+
// sp-type.ir:48
182182
message SpTimestampType {
183183
SpTimestampTimeZone time_zone = 1;
184184
}
185185

186-
// sp-type.ir:49
186+
// sp-type.ir:50
187187
message SpVectorType {
188188
int64 dimension = 1;
189189
SpDataType ty = 2;
190190
}
191191

192-
// sp-type.ir:51
192+
// sp-type.ir:52
193193
message SpPandasSeriesType {
194194
SpDataType el_ty = 1;
195195
}
196196

197-
// sp-type.ir:52
197+
// sp-type.ir:53
198198
message SpPandasDataFrameType {
199199
repeated string col_names = 1;
200200
repeated SpDataType col_types = 2;
201201
}
202202

203-
// sp-type.ir:59
203+
// sp-type.ir:60
204204
message SpDataframeData {
205205
oneof sealed_value {
206206
SpDataframeData_List sp_dataframe_data__list = 1;
@@ -209,35 +209,35 @@ message SpDataframeData {
209209
}
210210
}
211211

212-
// sp-type.ir:60
212+
// sp-type.ir:61
213213
message SpDataframeData_List {
214214
repeated Expr vs = 1;
215215
}
216216

217-
// sp-type.ir:61
217+
// sp-type.ir:62
218218
message SpDataframeData_Tuple {
219219
repeated Expr vs = 1;
220220
}
221221

222-
// sp-type.ir:62
222+
// sp-type.ir:63
223223
message SpDataframeData_Pandas {
224224
StagedPandasDataframe v = 1;
225225
}
226226

227-
// sp-type.ir:65
227+
// sp-type.ir:66
228228
message SpDataframeSchema {
229229
oneof sealed_value {
230230
SpDataframeSchema_List sp_dataframe_schema__list = 1;
231231
SpDataframeSchema_Struct sp_dataframe_schema__struct = 2;
232232
}
233233
}
234234

235-
// sp-type.ir:66
235+
// sp-type.ir:67
236236
message SpDataframeSchema_List {
237237
repeated string vs = 1;
238238
}
239239

240-
// sp-type.ir:67
240+
// sp-type.ir:68
241241
message SpDataframeSchema_Struct {
242242
SpStructType v = 1;
243243
}
@@ -292,20 +292,20 @@ message SpNullOrder {
292292
}
293293
}
294294

295-
// sp-type.ir:82
295+
// sp-type.ir:83
296296
message SpPivotValue {
297297
oneof sealed_value {
298298
SpPivotValue_Dataframe sp_pivot_value__dataframe = 1;
299299
SpPivotValue_Expr sp_pivot_value__expr = 2;
300300
}
301301
}
302302

303-
// sp-type.ir:83
303+
// sp-type.ir:84
304304
message SpPivotValue_Expr {
305305
Expr v = 1;
306306
}
307307

308-
// sp-type.ir:84
308+
// sp-type.ir:85
309309
message SpPivotValue_Dataframe {
310310
SpDataframeRef v = 1;
311311
}
@@ -363,7 +363,7 @@ message SrcPosition {
363363
int64 start_line = 5;
364364
}
365365

366-
// sp-type.ir:55
366+
// sp-type.ir:56
367367
message StagedPandasDataframe {
368368
SpNameRef temp_table = 1;
369369
}
@@ -1585,7 +1585,7 @@ message SpDataframeApply {
15851585
SrcPosition src = 3;
15861586
}
15871587

1588-
// sp-df-io.ir:185
1588+
// sp-df-io.ir:187
15891589
message SpDataframeCacheResult {
15901590
SpDataframeExpr df = 1;
15911591
SrcPosition src = 2;
@@ -1610,7 +1610,7 @@ message SpDataframeCollect {
16101610
repeated Tuple_String_String statement_params = 7;
16111611
}
16121612

1613-
// sp-df-io.ir:167
1613+
// sp-df-io.ir:169
16141614
message SpDataframeCopyIntoTable {
16151615
repeated Tuple_String_Expr copy_options = 1;
16161616
SpDataframeExpr df = 2;
@@ -1634,7 +1634,7 @@ message SpDataframeCount {
16341634
repeated Tuple_String_String statement_params = 4;
16351635
}
16361636

1637-
// sp-df-io.ir:151
1637+
// sp-df-io.ir:153
16381638
message SpDataframeCreateOrReplaceDynamicTable {
16391639
List_Expr clustering_keys = 1;
16401640
google.protobuf.StringValue comment = 2;
@@ -1652,7 +1652,7 @@ message SpDataframeCreateOrReplaceDynamicTable {
16521652
string warehouse = 14;
16531653
}
16541654

1655-
// sp-df-io.ir:143
1655+
// sp-df-io.ir:145
16561656
message SpDataframeCreateOrReplaceView {
16571657
google.protobuf.StringValue comment = 1;
16581658
SpDataframeExpr df = 2;
@@ -2248,8 +2248,10 @@ message SpDataframeWithColumns {
22482248
// sp-df-io.ir:84
22492249
message SpDataframeWrite {
22502250
SpDataframeExpr df = 1;
2251-
SpSaveMode save_mode = 2;
2252-
SrcPosition src = 3;
2251+
repeated Tuple_String_Expr options = 2;
2252+
Expr partition_by = 3;
2253+
SpSaveMode save_mode = 4;
2254+
SrcPosition src = 5;
22532255
}
22542256

22552257
message SpDataframeWriter {
@@ -2609,7 +2611,7 @@ message SpWindowSpecRowsBetween {
26092611
message SpWindowType {
26102612
}
26112613

2612-
// sp-df-io.ir:133
2614+
// sp-df-io.ir:135
26132615
message SpWriteCopyIntoLocation {
26142616
bool block = 1;
26152617
repeated Tuple_String_Expr copy_options = 2;
@@ -2624,7 +2626,7 @@ message SpWriteCopyIntoLocation {
26242626
repeated Tuple_String_String statement_params = 11;
26252627
}
26262628

2627-
// sp-df-io.ir:100
2629+
// sp-df-io.ir:102
26282630
message SpWriteCsv {
26292631
bool block = 1;
26302632
repeated Tuple_String_Expr copy_options = 2;
@@ -2646,7 +2648,7 @@ message SpWriteFile {
26462648
}
26472649
}
26482650

2649-
// sp-df-io.ir:104
2651+
// sp-df-io.ir:106
26502652
message SpWriteJson {
26512653
bool block = 1;
26522654
repeated Tuple_String_Expr copy_options = 2;
@@ -2676,7 +2678,7 @@ message SpWritePandas {
26762678
string table_type = 13;
26772679
}
26782680

2679-
// sp-df-io.ir:108
2681+
// sp-df-io.ir:110
26802682
message SpWriteParquet {
26812683
bool block = 1;
26822684
repeated Tuple_String_Expr copy_options = 2;
@@ -2689,7 +2691,7 @@ message SpWriteParquet {
26892691
repeated Tuple_String_String statement_params = 9;
26902692
}
26912693

2692-
// sp-df-io.ir:112
2694+
// sp-df-io.ir:114
26932695
message SpWriteTable {
26942696
bool block = 1;
26952697
google.protobuf.BoolValue change_tracking = 2;

src/snowflake/snowpark/dataframe_writer.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
)
1717
from snowflake.snowpark._internal.ast.utils import (
1818
build_expr_from_snowpark_column_or_col_name,
19+
build_expr_from_snowpark_column_or_sql_str,
20+
build_expr_from_snowpark_column_or_python_val,
1921
debug_check_missing_ast,
2022
fill_sp_save_mode,
2123
fill_sp_write_file,
@@ -130,19 +132,37 @@ def mode(self, save_mode: str, _emit_ast: bool = True) -> "DataFrameWriter":
130132

131133
return self
132134

133-
def partition_by(self, expr: ColumnOrSqlExpr) -> "DataFrameWriter":
135+
def partition_by(
136+
self, expr: ColumnOrSqlExpr, _emit_ast: bool = True
137+
) -> "DataFrameWriter":
134138
"""Specifies an expression used to partition the unloaded table rows into separate files. It can be a
135139
:class:`Column`, a column name, or a SQL expression.
136140
"""
137141
self._partition_by = expr
142+
143+
# Update AST if it exists.
144+
if _emit_ast:
145+
if self._ast_stmt is not None:
146+
build_expr_from_snowpark_column_or_sql_str(
147+
self._ast_stmt.expr.sp_dataframe_write.partition_by, expr
148+
)
149+
138150
return self
139151

140-
def option(self, key: str, value: Any) -> "DataFrameWriter":
152+
def option(self, key: str, value: Any, _emit_ast: bool = True) -> "DataFrameWriter":
141153
"""Depending on the ``file_format_type`` specified, you can include more format specific options.
142154
Use the options documented in the `Format Type Options <https://docs.snowflake.com/en/sql-reference/sql/copy-into-location.html#format-type-options-formattypeoptions>`__.
143155
"""
144156
aliased_key = get_aliased_option_name(key, WRITER_OPTIONS_ALIAS_MAP)
145157
self._cur_options[aliased_key] = value
158+
159+
# Update AST if it exists.
160+
if _emit_ast:
161+
if self._ast_stmt is not None:
162+
t = self._ast_stmt.expr.sp_dataframe_write.options.add()
163+
t._1 = aliased_key
164+
build_expr_from_snowpark_column_or_python_val(t._2, value)
165+
146166
return self
147167

148168
def options(self, configs: Optional[Dict] = None, **kwargs) -> "DataFrameWriter":

0 commit comments

Comments
 (0)