Skip to content

Commit 28e3ede

Browse files
delete invalid parquet and arrow files (#835)
include file name and stream name to the error log Fixes: #834
1 parent 47eb08d commit 28e3ede

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

server/src/storage/staging.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ impl StorageDir {
154154
pub fn arrow_files_grouped_exclude_time(
155155
&self,
156156
exclude: NaiveDateTime,
157+
stream: &str,
157158
) -> HashMap<PathBuf, Vec<PathBuf>> {
158159
let mut grouped_arrow_file: HashMap<PathBuf, Vec<PathBuf>> = HashMap::new();
159160
let mut arrow_files = self.arrow_files();
@@ -170,8 +171,9 @@ impl StorageDir {
170171
for arrow_file_path in arrow_files {
171172
if arrow_file_path.metadata().unwrap().len() == 0 {
172173
log::error!(
173-
"Invalid arrow file detected, removing it: {:?}",
174-
arrow_file_path
174+
"Invalid arrow file {:?} detected for stream {}, removing it",
175+
&arrow_file_path,
176+
stream
175177
);
176178
fs::remove_file(&arrow_file_path).unwrap();
177179
} else {
@@ -225,7 +227,7 @@ pub fn convert_disk_files_to_parquet(
225227
let mut schemas = Vec::new();
226228

227229
let time = chrono::Utc::now().naive_utc();
228-
let staging_files = dir.arrow_files_grouped_exclude_time(time);
230+
let staging_files = dir.arrow_files_grouped_exclude_time(time, stream);
229231
if staging_files.is_empty() {
230232
metrics::STAGING_FILES.with_label_values(&[stream]).set(0);
231233
metrics::STORAGE_SIZE
@@ -279,7 +281,11 @@ pub fn convert_disk_files_to_parquet(
279281

280282
writer.close()?;
281283
if parquet_file.metadata().unwrap().len() == 0 {
282-
log::error!("Invalid parquet file detected, removing it");
284+
log::error!(
285+
"Invalid parquet file {:?} detected for stream {}, removing it",
286+
&parquet_path,
287+
stream
288+
);
283289
fs::remove_file(parquet_path).unwrap();
284290
} else {
285291
for file in files {

0 commit comments

Comments
 (0)