Skip to content

Commit 0529b66

Browse files
committed
WIP: Add support for remote Parquet writer with openDAL
1 parent 27c4d54 commit 0529b66

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

native/core/src/execution/operators/parquet_writer.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@ mod tests {
553553
use std::sync::Arc;
554554

555555
/// Helper function to create a test RecordBatch with 1000 rows of (int, string) data
556+
/// Example batch_id 1 -> 0..1000, 2 -> 1001..2000
556557
fn create_test_record_batch(batch_id: i32) -> Result<RecordBatch> {
557558
assert!(batch_id > 0, "batch_id must be greater than 0");
558559
let num_rows = batch_id * 1000;
@@ -577,6 +578,7 @@ mod tests {
577578

578579
#[tokio::test]
579580
#[cfg(feature = "hdfs-opendal")]
581+
#[ignore = "This test requires a running HDFS cluster"]
580582
async fn test_write_to_hdfs_sync() -> Result<()> {
581583
use opendal::services::Hdfs;
582584
use opendal::Operator;
@@ -627,6 +629,7 @@ mod tests {
627629

628630
#[tokio::test]
629631
#[cfg(feature = "hdfs-opendal")]
632+
#[ignore = "This test requires a running HDFS cluster"]
630633
async fn test_write_to_hdfs_streaming() -> Result<()> {
631634
use opendal::services::Hdfs;
632635
use opendal::Operator;
@@ -707,6 +710,7 @@ mod tests {
707710

708711
#[tokio::test]
709712
#[cfg(feature = "hdfs-opendal")]
713+
#[ignore = "This test requires a running HDFS cluster"]
710714
async fn test_parquet_writer_streaming() -> Result<()> {
711715
// Configure output path
712716
let output_path = "/user/test_parquet_writer_streaming/data.parquet";
@@ -755,6 +759,7 @@ mod tests {
755759

756760
#[tokio::test]
757761
#[cfg(feature = "hdfs-opendal")]
762+
#[ignore = "This test requires a running HDFS cluster"]
758763
async fn test_parquet_writer_exec_with_memory_input() -> Result<()> {
759764
use datafusion::datasource::memory::MemorySourceConfig;
760765
use datafusion::datasource::source::DataSourceExec;

native/core/src/parquet/parquet_support.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,20 +496,21 @@ mod tests {
496496
use object_store::path::Path;
497497
use std::collections::HashMap;
498498
use std::sync::Arc;
499-
use url::Url;
500499

501500
/// Parses the url, registers the object store, and returns a tuple of the object store url and object store path
501+
#[cfg(all(not(feature = "hdfs"), not(feature = "hdfs-opendal")))]
502502
pub(crate) fn prepare_object_store(
503503
runtime_env: Arc<RuntimeEnv>,
504504
url: String,
505505
) -> Result<(ObjectStoreUrl, Path), ExecutionError> {
506506
prepare_object_store_with_configs(runtime_env, url, &HashMap::new())
507507
}
508508

509-
#[cfg(not(feature = "hdfs"))]
509+
#[cfg(all(not(feature = "hdfs"), not(feature = "hdfs-opendal")))]
510510
#[test]
511511
fn test_prepare_object_store() {
512512
use crate::execution::operators::ExecutionError;
513+
use url::Url;
513514

514515
let local_file_system_url = "file:///comet/spark-warehouse/part-00000.snappy.parquet";
515516
let hdfs_url = "hdfs://localhost:8020/comet/spark-warehouse/part-00000.snappy.parquet";

0 commit comments

Comments
 (0)