Skip to content

Commit 216e1de

Browse files
committed
chore: debug
1 parent c8e0fd3 commit 216e1de

File tree

4 files changed

+11
-6
lines changed

4 files changed

+11
-6
lines changed

.github/workflows/l-libcommon.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,4 @@ jobs:
3838
with:
3939
working-directory: libs/libviewer
4040
poetry-args: "--with dev"
41+
secrets: inherit

libs/libcommon/src/libcommon/parquet_utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from datasets.table import cast_table_to_schema
1616
from datasets.utils.py_utils import size_str
1717
from fsspec.implementations.http import HTTPFile, HTTPFileSystem
18-
from huggingface_hub import HfFileSystem
1918
from pyarrow.lib import ArrowInvalid
2019

2120
from libcommon.constants import CONFIG_PARQUET_METADATA_KIND
@@ -405,7 +404,7 @@ def __init__(
405404
dataset: str,
406405
config: str,
407406
split: str,
408-
httpfs: HfFileSystem,
407+
httpfs: HTTPFileSystem,
409408
parquet_metadata_directory: StrPath,
410409
max_arrow_data_in_memory: int,
411410
max_scan_size: int,
@@ -467,7 +466,7 @@ def _init_dataset_info(self, parquet_metadata_directory: StrPath) -> None:
467466

468467
def _init_parquet_index(
469468
self,
470-
httpfs: HfFileSystem,
469+
httpfs: HTTPFileSystem,
471470
parquet_metadata_directory: StrPath,
472471
max_arrow_data_in_memory: int,
473472
) -> None:

libs/libviewer/src/dataset.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ pub enum DatasetError {
3030
#[error("Arrow error: {0}")]
3131
Arrow(#[from] arrow::error::ArrowError),
3232

33-
#[error("Parquet error: {0}")]
33+
#[error("{0}")]
3434
Parquet(#[from] ::parquet::errors::ParquetError),
3535

3636
#[error("Object store error: {0}")]

libs/libviewer/src/parquet.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ pub async fn read_metadata(
7777
path: impl Into<Path>,
7878
) -> Result<Arc<ParquetMetaData>> {
7979
let path = path.into();
80-
println!("Reading parquet metadata for {:?} on store {:?}", path, store);
8180

8281
let mut object_reader = ParquetObjectReader::new(store, path.clone());
8382
let metadata_reader = ParquetMetaDataReader::new()
@@ -94,7 +93,13 @@ pub async fn read_metadata(
9493

9594
let metadata = metadata_reader
9695
.load_via_suffix_and_finish(&mut object_reader)
97-
.await?;
96+
.await
97+
.map_err(|e| {
98+
ParquetError::General(format!(
99+
"Failed to read metadata from path '{}' in store: {}: {}",
100+
path, store, e
101+
))
102+
})?;
98103

99104
Ok(Arc::new(metadata))
100105
}

0 commit comments

Comments
 (0)