Skip to content

Commit e926919

Browse files
authored
polars open: exposing the ability to configure hive settings. (nushell#15255)
# Description Exposes parameters for working with [hive](https://docs.pola.rs/user-guide/io/hive/#scanning-hive-partitioned-data) partitioning. # User-Facing Changes - Added flags `--hive-enabled`, `--hive-start-idx`, `--hive-schema`, `--hive-try-parse-dates` to `polars open`
1 parent 8d5d01b commit e926919

File tree

1 file changed

+52
-4
lines changed
  • crates/nu_plugin_polars/src/dataframe/command/core

1 file changed

+52
-4
lines changed

crates/nu_plugin_polars/src/dataframe/command/core/open.rs

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::{
77
use log::debug;
88
use nu_utils::perf;
99

10-
use nu_plugin::PluginCommand;
10+
use nu_plugin::{EvaluatedCall, PluginCommand};
1111
use nu_protocol::{
1212
shell_error::io::IoError, Category, Example, LabeledError, PipelineData, ShellError, Signature,
1313
Span, Spanned, SyntaxShape, Type, Value,
@@ -90,6 +90,28 @@ impl PluginCommand for OpenDataFrame {
9090
r#"Polars Schema in format [{name: str}]. CSV, JSON, and JSONL files"#,
9191
Some('s')
9292
)
93+
.switch(
94+
"hive-enabled",
95+
"Enable hive support. Parquet and Arrow files",
96+
None,
97+
)
98+
.named(
99+
"hive-start-idx",
100+
SyntaxShape::Number,
101+
"Start index of hive partitioning. Parquet and Arrow files",
102+
None,
103+
)
104+
.named(
105+
"hive-schema",
106+
SyntaxShape::Record(vec![]),
107+
r#"Hive schema in format [{name: str}]. Parquet and Arrow files"#,
108+
None,
109+
)
110+
.switch(
111+
"hive-try-parse-dates",
112+
"Try to parse dates in hive partitioning. Parquet and Arrow files",
113+
None,
114+
)
93115
.switch("truncate-ragged-lines", "Truncate lines that are longer than the schema. CSV file", None)
94116
.input_output_type(Type::Any, Type::Custom("dataframe".into()))
95117
.category(Category::Custom("dataframe".into()))
@@ -141,13 +163,19 @@ fn command(
141163
});
142164
}
143165

166+
let hive_options = build_hive_options(call)?;
167+
144168
match type_option {
145169
Some((ext, blamed)) => match PolarsFileType::from(ext.as_str()) {
146170
PolarsFileType::Csv | PolarsFileType::Tsv => {
147171
from_csv(plugin, engine, call, resource, is_eager)
148172
}
149-
PolarsFileType::Parquet => from_parquet(plugin, engine, call, resource, is_eager),
150-
PolarsFileType::Arrow => from_arrow(plugin, engine, call, resource, is_eager),
173+
PolarsFileType::Parquet => {
174+
from_parquet(plugin, engine, call, resource, is_eager, hive_options)
175+
}
176+
PolarsFileType::Arrow => {
177+
from_arrow(plugin, engine, call, resource, is_eager, hive_options)
178+
}
151179
PolarsFileType::Json => from_json(plugin, engine, call, resource, is_eager),
152180
PolarsFileType::NdJson => from_ndjson(plugin, engine, call, resource, is_eager),
153181
PolarsFileType::Avro => from_avro(plugin, engine, call, resource, is_eager),
@@ -180,12 +208,14 @@ fn from_parquet(
180208
call: &nu_plugin::EvaluatedCall,
181209
resource: Resource,
182210
is_eager: bool,
211+
hive_options: HiveOptions,
183212
) -> Result<Value, ShellError> {
184213
let file_path = resource.path;
185214
let file_span = resource.span;
186215
if !is_eager {
187216
let args = ScanArgsParquet {
188217
cloud_options: resource.cloud_options,
218+
hive_options,
189219
..Default::default()
190220
};
191221
let df: NuLazyFrame = LazyFrame::scan_parquet(file_path, args)
@@ -279,6 +309,7 @@ fn from_arrow(
279309
call: &nu_plugin::EvaluatedCall,
280310
resource: Resource,
281311
is_eager: bool,
312+
hive_options: HiveOptions,
282313
) -> Result<Value, ShellError> {
283314
let file_path = resource.path;
284315
let file_span = resource.span;
@@ -290,7 +321,7 @@ fn from_arrow(
290321
row_index: None,
291322
cloud_options: resource.cloud_options,
292323
include_file_paths: None,
293-
hive_options: HiveOptions::default(),
324+
hive_options,
294325
};
295326

296327
let df: NuLazyFrame = LazyFrame::scan_ipc(file_path, args)
@@ -595,3 +626,20 @@ fn cloud_not_supported(file_type: PolarsFileType, span: Span) -> ShellError {
595626
inner: vec![],
596627
}
597628
}
629+
630+
fn build_hive_options(call: &EvaluatedCall) -> Result<HiveOptions, ShellError> {
631+
let enabled: Option<bool> = call.get_flag("hive-enabled")?;
632+
let hive_start_idx: Option<usize> = call.get_flag("hive-start-idx")?;
633+
let schema: Option<NuSchema> = call
634+
.get_flag::<Value>("hive-schema")?
635+
.map(|schema| NuSchema::try_from(&schema))
636+
.transpose()?;
637+
let try_parse_dates: bool = call.has_flag("hive-try-parse-dates")?;
638+
639+
Ok(HiveOptions {
640+
enabled,
641+
hive_start_idx: hive_start_idx.unwrap_or(0),
642+
schema: schema.map(|s| s.into()),
643+
try_parse_dates,
644+
})
645+
}

0 commit comments

Comments
 (0)