|
4 | 4 |
|
5 | 5 | #include <arrow/api.h> |
6 | 6 | #include <arrow/io/api.h> |
| 7 | +#include <arrow/status.h> |
7 | 8 | #include <parquet/arrow/reader.h> |
8 | 9 | #include <parquet/arrow/writer.h> |
9 | 10 | #include <parquet/exception.h> |
|
29 | 30 | error "Missing the <filesystem> header." |
30 | 31 | #endif |
31 | 32 |
|
32 | | -std::shared_ptr<arrow::Table> get_arrow_table(const std::string& file_path) { |
33 | | - |
| 33 | +arrow::Result<std::shared_ptr<arrow::Table>> get_arrow_table(const std::string& file_path) { |
34 | 34 | auto file_extension = fs::path(file_path).extension().u8string(); |
35 | 35 |
|
36 | 36 | if (file_extension == ".parquet") { |
37 | 37 | arrow::MemoryPool* pool = arrow::default_memory_pool(); |
38 | 38 |
|
39 | | - std::shared_ptr<arrow::io::RandomAccessFile> input; |
| 39 | + ARROW_ASSIGN_OR_RAISE(auto input, arrow::io::ReadableFile::Open(file_path)); |
40 | 40 |
|
41 | | - //auto status = this->open(input, file_path); |
42 | | - input = arrow::io::ReadableFile::Open(file_path).ValueOrDie(); |
43 | | - |
44 | 41 | std::unique_ptr<parquet::arrow::FileReader> arrow_reader; |
| 42 | + ARROW_ASSIGN_OR_RAISE(arrow_reader, parquet::arrow::OpenFile(input, pool)); |
45 | 43 |
|
46 | | - auto status = parquet::arrow::OpenFile(input, pool, &arrow_reader); |
47 | | - |
48 | | - if (!status.ok()) { |
49 | | - // Handle read error |
50 | | - auto err = status.ToString(); |
51 | | - throw std::runtime_error("Error reading Arrow file: " + err); |
52 | | - } |
53 | | - |
54 | | - // Read entire file as a single Arrow table |
55 | 44 | std::shared_ptr<arrow::Table> table; |
56 | | - |
57 | | - status = arrow_reader->ReadTable(&table); |
58 | | - |
59 | | - if (!status.ok()) { |
60 | | - // Handle read error |
61 | | - auto err = status.ToString(); |
62 | | - throw std::runtime_error("Error reading Arrow file: " + err); |
63 | | - } |
| 45 | + ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table)); |
64 | 46 |
|
65 | 47 | return table; |
66 | 48 |
|
67 | 49 | } else if (file_extension == ".arrow") { |
68 | | - |
69 | | - // Create a memory-mapped file for reading. |
70 | | - |
71 | | - std::shared_ptr<arrow::io::ReadableFile> input; |
72 | | - |
73 | | - input = arrow::io::ReadableFile::Open(file_path).ValueOrDie(); |
| 50 | + ARROW_ASSIGN_OR_RAISE(auto input, arrow::io::ReadableFile::Open(file_path)); |
74 | 51 |
|
75 | | - // Create an IPC reader. |
76 | | - auto result = arrow::ipc::RecordBatchFileReader::Open(input.get()); |
77 | | - |
78 | | - if (!result.ok()) { |
79 | | - std::cerr << "Error opening IPC file: " << result.status().ToString() << std::endl; |
80 | | - } |
| 52 | + std::shared_ptr<arrow::ipc::RecordBatchFileReader> reader; |
| 53 | + ARROW_ASSIGN_OR_RAISE(reader, arrow::ipc::RecordBatchFileReader::Open(input.get())); |
81 | 54 |
|
82 | 55 | std::vector<std::shared_ptr<arrow::RecordBatch>> batches; |
83 | | - |
84 | | - auto reader = result.ValueOrDie(); |
85 | | - |
86 | | - for(int i = 0; i < reader->num_record_batches(); ++i) { |
87 | | - auto batch = reader->ReadRecordBatch(i).ValueOrDie(); |
88 | | - |
| 56 | + for (int i = 0; i < reader->num_record_batches(); ++i) { |
| 57 | + ARROW_ASSIGN_OR_RAISE(auto batch, reader->ReadRecordBatch(i)); |
89 | 58 | batches.push_back(batch); |
90 | 59 | } |
91 | 60 |
|
92 | | - auto table = arrow::Table::FromRecordBatches(batches).ValueOrDie(); |
93 | | - |
| 61 | + ARROW_ASSIGN_OR_RAISE(auto table, arrow::Table::FromRecordBatches(batches)); |
94 | 62 | return table; |
95 | | - |
| 63 | + |
96 | 64 | } else { |
97 | | - throw std::invalid_argument("Error: file must either be an Arrow or Parquet file."); |
| 65 | + return arrow::Status::Invalid("Error: file must either be an Arrow or Parquet file."); |
98 | 66 | } |
99 | | - |
100 | 67 | } |
101 | 68 |
|
102 | 69 | std::shared_ptr<arrow::Table> create_features_table(const std::vector<std::string> &header, |
@@ -253,7 +220,11 @@ void test_arrow() { |
253 | 220 | FAIL() << "Error closing Arrow file: " << msg2.value() << std::endl; |
254 | 221 | } |
255 | 222 |
|
256 | | - auto results_table = get_arrow_table(outputPath); |
| 223 | + auto results_table_result = get_arrow_table(outputPath); |
| 224 | + if (!results_table_result.ok()) { |
| 225 | + FAIL() << "Error reading Arrow file: " << results_table_result.status().ToString() << std::endl; |
| 226 | + } |
| 227 | + auto results_table = results_table_result.ValueOrDie(); |
257 | 228 |
|
258 | 229 | auto& row_data = std::get<1>(features); |
259 | 230 | std::vector<std::string> string_columns; |
@@ -319,7 +290,11 @@ void test_parquet() { |
319 | 290 | FAIL() << "Error closing Arrow file: " << msg2.value() << std::endl; |
320 | 291 | } |
321 | 292 |
|
322 | | - auto results_table = get_arrow_table(outputPath); |
| 293 | + auto results_table_result = get_arrow_table(outputPath); |
| 294 | + if (!results_table_result.ok()) { |
| 295 | + FAIL() << "Error reading Parquet file: " << results_table_result.status().ToString() << std::endl; |
| 296 | + } |
| 297 | + auto results_table = results_table_result.ValueOrDie(); |
323 | 298 |
|
324 | 299 | auto& row_data = std::get<1>(features); |
325 | 300 | std::vector<std::string> string_columns; |
|
0 commit comments