Skip to content

Commit f49d225

Browse files
RiskRunner0meta-codesync[bot]
authored andcommitted
Adding buck_targets_with_distance.jsonl.zst to list, checking zstd magic number
Summary: Adding check for buck_targets_with_distance.jsonl.zst. Also adding a magic number check in case the file extension also isn't correct. https://fb.workplace.com/groups/td.users/permalink/3848149105481080/ When I ran this from the example: ``` $ arc citrace --verbose --job-id 2981382953332327521 --job-id 3625397700046428817 fbcode//ai_codesign/component_warehouse/tools/automation/test/cogwheel:cogwheel_e2e_geordi-test ``` the artifacts downloaded didn't have extension .zst, but WERE actually zstd: ``` $ file /tmp/citrace/2981382953332327521/buck_targets_with_distance /tmp/citrace/2981382953332327521/buck_targets_with_distance: Zstandard compressed data (v0.8+), Dictionary ID: None ``` Differential Revision: D92464258 fbshipit-source-id: bbc38407e6b5c712020121cd44dbe3cd91a0e8bc
1 parent 741b20e commit f49d225

File tree

3 files changed

+25
-2
lines changed

3 files changed

+25
-2
lines changed

td_util/src/file_io.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use std::path::Path;
1919
use anyhow::Context;
2020

2121
use crate::json::BUFFER_SIZE;
22+
use crate::zstd::has_zstd_magic;
2223
use crate::zstd::is_zstd;
2324

2425
pub fn file_writer(file_path: &Path) -> anyhow::Result<Box<dyn Write>> {
@@ -41,7 +42,7 @@ pub fn file_reader(file_path: &Path) -> anyhow::Result<Box<dyn BufRead + Send>>
4142
let file = File::open(file_path)
4243
.with_context(|| format!("Unable to open file `{}` for reading", file_path.display()))?;
4344

44-
if is_zstd(file_path) {
45+
if is_zstd(file_path) || has_zstd_magic(file_path) {
4546
let decoder = zstd::Decoder::new(file)?;
4647
Ok(Box::new(BufReader::with_capacity(BUFFER_SIZE, decoder)))
4748
} else {

td_util/src/json.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use rayon::prelude::*;
2525
use serde::Deserialize;
2626
use serde::Serialize;
2727

28+
use crate::zstd::has_zstd_magic;
2829
use crate::zstd::is_zstd;
2930

3031
/// Buffer size for reading files (10MB)
@@ -38,7 +39,7 @@ fn parse_line<T: for<'a> Deserialize<'a>>(x: Result<String, io::Error>) -> anyho
3839

3940
fn open_file(filename: &Path) -> anyhow::Result<Box<dyn Read + Send>> {
4041
let file = File::open(filename)?;
41-
if is_zstd(filename) {
42+
if is_zstd(filename) || has_zstd_magic(filename) {
4243
Ok(Box::new(zstd::Decoder::new(file)?))
4344
} else {
4445
Ok(Box::new(file))

td_util/src/zstd.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,33 @@
1010

1111
//! Utilities for working with zstd compressed files.
1212
13+
use std::fs::File;
14+
use std::io::Read;
1315
use std::path::Path;
1416

17+
/// Zstd frame magic number per RFC 8878 Section 3.1.1
18+
/// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1
19+
const ZSTD_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
20+
1521
/// Check if a file has a zstd extension (.zst)
1622
pub fn is_zstd(filename: &Path) -> bool {
1723
match filename.extension() {
1824
Some(x) => x == "zst",
1925
None => false,
2026
}
2127
}
28+
29+
/// Check if a file contains zstd-compressed data by reading its magic bytes.
30+
/// Returns true if the file starts with the zstd magic bytes.
31+
pub fn has_zstd_magic(file_path: &Path) -> bool {
32+
let Ok(mut file) = File::open(file_path) else {
33+
return false;
34+
};
35+
36+
let mut magic = [0u8; 4];
37+
if file.read_exact(&mut magic).is_ok() {
38+
magic == ZSTD_MAGIC
39+
} else {
40+
false
41+
}
42+
}

0 commit comments

Comments
 (0)