Skip to content

Commit 2d1a8cf

Browse files
committed
split out a lib file
1 parent 0ed1d50 commit 2d1a8cf

File tree

2 files changed

+332
-322
lines changed

2 files changed

+332
-322
lines changed

ziplinter/src/lib.rs

Lines changed: 331 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,331 @@
1+
use std::{fs::File, rc::Rc, sync::Mutex};
2+
3+
use rc_zip::{
4+
chrono::{DateTime, Utc},
5+
encoding::Encoding,
6+
fsm::{AexData, ParsedRanges},
7+
parse::{EndOfCentralDirectory, Entry, ExtraAexField, Method, MethodSpecific, Mode, Version},
8+
};
9+
use rc_zip_sync::{ArchiveHandle, EntryHandle, HasCursor, ReadZip};
10+
use serde::ser::SerializeStruct;
11+
12+
#[derive(serde::Serialize)]
13+
pub struct CentralDirectoryFileHeader {
14+
/// version made by
15+
pub creator_version: Version,
16+
17+
/// version needed to extract
18+
pub reader_version: Version,
19+
20+
/// general purpose bit flag
21+
pub flags: u16,
22+
23+
/// compression method
24+
pub method: Method,
25+
26+
/// last mod file datetime
27+
pub modified: DateTime<Utc>,
28+
29+
/// crc32 hash
30+
pub crc32: u32,
31+
32+
/// compressed size
33+
pub compressed_size: u32,
34+
35+
/// uncompressed size
36+
pub uncompressed_size: u32,
37+
38+
/// disk number start
39+
pub disk_nbr_start: u16,
40+
41+
/// internal file attributes
42+
pub internal_attrs: u16,
43+
44+
/// external file attributes
45+
pub external_attrs: u32,
46+
47+
/// relative offset of local header
48+
pub header_offset: u32,
49+
50+
/// name field
51+
pub name: String,
52+
53+
/// extra field
54+
pub extra: Vec<u8>,
55+
56+
/// comment field
57+
pub comment: String,
58+
59+
/// File mode.
60+
pub mode: Mode,
61+
62+
#[serde(skip_serializing_if = "Option::is_none")]
63+
pub aex: Option<ExtraAexField>,
64+
}
65+
66+
impl CentralDirectoryFileHeader {
67+
fn from_rc_zip(value: &rc_zip::parse::CentralDirectoryFileHeader<'_>, entry: &Entry) -> Self {
68+
CentralDirectoryFileHeader {
69+
creator_version: value.creator_version,
70+
reader_version: value.reader_version,
71+
flags: value.flags,
72+
method: value.method,
73+
modified: entry.modified,
74+
crc32: value.crc32,
75+
compressed_size: value.compressed_size,
76+
uncompressed_size: value.uncompressed_size,
77+
disk_nbr_start: value.disk_nbr_start,
78+
internal_attrs: value.internal_attrs,
79+
external_attrs: value.external_attrs,
80+
header_offset: value.header_offset,
81+
name: entry.name.clone(),
82+
extra: value.extra.to_vec(),
83+
comment: entry.comment.clone(),
84+
mode: entry.mode,
85+
aex: entry.aex,
86+
}
87+
}
88+
}
89+
90+
#[derive(serde::Serialize)]
91+
pub struct LocalFileHeader {
92+
/// version needed to extract
93+
pub reader_version: Version,
94+
95+
/// general purpose bit flag
96+
pub flags: u16,
97+
98+
/// compression method
99+
pub method: Method,
100+
101+
/// last mod file datetime
102+
pub modified: DateTime<Utc>,
103+
104+
/// This entry's "created" timestamp, if available.
105+
///
106+
/// See [Self::modified] for caveats.
107+
pub created: Option<DateTime<Utc>>,
108+
109+
/// This entry's "last accessed" timestamp, if available.
110+
///
111+
/// See [Self::accessed] for caveats.
112+
pub accessed: Option<DateTime<Utc>>,
113+
114+
/// crc-32
115+
pub crc32: u32,
116+
117+
/// compressed size
118+
pub compressed_size: u64,
119+
120+
/// uncompressed size
121+
pub uncompressed_size: u64,
122+
123+
/// Offset of the local file header in the zip file
124+
///
125+
/// ```text
126+
/// [optional non-zip data]
127+
/// [local file header 1] <------ header_offset points here
128+
/// [encryption header 1]
129+
/// [file data 1]
130+
/// [data descriptor 1]
131+
/// ...
132+
/// [central directory]
133+
/// [optional zip64 end of central directory info]
134+
/// [end of central directory record]
135+
/// ```
136+
pub header_offset: u64,
137+
138+
/// Unix user ID
139+
///
140+
/// Only present if a Unix extra field or New Unix extra field was found.
141+
pub uid: Option<u32>,
142+
143+
/// Unix group ID
144+
///
145+
/// Only present if a Unix extra field or New Unix extra field was found.
146+
pub gid: Option<u32>,
147+
148+
/// file name
149+
pub name: String,
150+
151+
/// extra field
152+
pub extra: Vec<u8>,
153+
154+
/// method-specific fields
155+
pub method_specific: MethodSpecific,
156+
157+
/// File mode.
158+
pub mode: Mode,
159+
160+
#[serde(skip_serializing_if = "Option::is_none")]
161+
pub aex: Option<ExtraAexField>,
162+
163+
#[serde(skip_serializing_if = "Option::is_none")]
164+
pub aex_data: Option<AexData>,
165+
}
166+
167+
impl LocalFileHeader {
168+
fn from_rc_zip<F: HasCursor>(
169+
entry: EntryHandle<'_, F>,
170+
parsed_ranges: Rc<Mutex<ParsedRanges>>,
171+
) -> Result<Self, Error> {
172+
let (value, aex_data) = entry.local_header(parsed_ranges)?.ok_or(Error {
173+
error: format!("Can't get local file header for \"{}\"", entry.name),
174+
})?;
175+
let entry = value.as_entry()?;
176+
177+
Ok(LocalFileHeader {
178+
reader_version: value.reader_version,
179+
flags: value.flags,
180+
method: value.method,
181+
modified: entry.modified,
182+
created: entry.created,
183+
accessed: entry.accessed,
184+
crc32: value.crc32,
185+
compressed_size: entry.compressed_size,
186+
uncompressed_size: entry.uncompressed_size,
187+
gid: entry.gid,
188+
uid: entry.uid,
189+
header_offset: entry.header_offset,
190+
name: entry.name,
191+
extra: value.extra.to_vec(),
192+
method_specific: value.method_specific,
193+
mode: entry.mode,
194+
aex: entry.aex,
195+
aex_data: aex_data.to_owned(),
196+
})
197+
}
198+
}
199+
200+
/// File metadata which consists of an `Entry`, and some additional data from the`CentralDirectoryFileHeader`
201+
struct FileMetadata {
202+
central: CentralDirectoryFileHeader,
203+
local: Result<LocalFileHeader, Error>,
204+
}
205+
206+
impl serde::Serialize for FileMetadata {
207+
// custom serialize implementation to unpack Result type
208+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
209+
where
210+
S: serde::Serializer,
211+
{
212+
let mut file_metadata = serializer.serialize_struct("FileMetadata", 2)?;
213+
file_metadata.serialize_field("central", &self.central)?;
214+
match &self.local {
215+
Ok(local) => file_metadata.serialize_field("local", &local)?,
216+
Err(error) => file_metadata.serialize_field("local", &error)?,
217+
}
218+
file_metadata.end()
219+
}
220+
}
221+
222+
#[derive(serde::Serialize)]
223+
struct ZipMetadata<'a> {
224+
eocd: &'a EndOfCentralDirectory<'static>,
225+
encoding: Encoding,
226+
size: u64,
227+
comment: &'a String,
228+
contents: Vec<FileMetadata>,
229+
parsed_ranges: ParsedRanges,
230+
}
231+
232+
impl<'a, F> From<&'a mut ArchiveHandle<'a, F>> for ZipMetadata<'a>
233+
where
234+
F: HasCursor,
235+
{
236+
fn from(archive: &'a mut ArchiveHandle<'a, F>) -> Self {
237+
let contents = archive
238+
.entries()
239+
.zip(archive.directory_headers.iter())
240+
.map(|(entry, directory_header)| FileMetadata {
241+
central: CentralDirectoryFileHeader::from_rc_zip(directory_header, entry.entry),
242+
local: LocalFileHeader::from_rc_zip(entry, archive.parsed_ranges.clone()),
243+
})
244+
.collect();
245+
246+
ZipMetadata {
247+
eocd: &archive.eocd,
248+
encoding: archive.encoding,
249+
size: archive.size,
250+
comment: &archive.comment,
251+
contents,
252+
parsed_ranges: archive.parsed_ranges.try_lock().unwrap().clone(),
253+
}
254+
}
255+
}
256+
257+
#[derive(serde::Serialize)]
258+
struct Error {
259+
error: String,
260+
}
261+
262+
impl<T: std::fmt::Debug> From<T> for Error {
263+
fn from(error: T) -> Self {
264+
Error {
265+
error: format!("{:?}", error),
266+
}
267+
}
268+
}
269+
270+
pub fn parse_bytes(bytes: &[u8]) -> serde_json::Value {
271+
match bytes.read_zip() {
272+
Ok(mut archive) => serde_json::to_value(ZipMetadata::from(&mut archive)).unwrap(),
273+
Err(error) => serde_json::to_value(Error::from(error)).unwrap(),
274+
}
275+
}
276+
277+
pub fn parse_file(file: &File) -> serde_json::Value {
278+
match file.read_zip() {
279+
Ok(mut archive) => serde_json::to_value(ZipMetadata::from(&mut archive)).unwrap(),
280+
Err(error) => serde_json::to_value(Error::from(error)).unwrap(),
281+
}
282+
}
283+
284+
#[cfg(test)]
285+
mod test {
286+
use super::*;
287+
288+
use insta::assert_json_snapshot;
289+
use std::{error::Error, path::Path};
290+
291+
fn process_zip_file(zip_path: &Path) -> Result<serde_json::Value, Box<dyn Error>> {
292+
let file = std::fs::File::open(zip_path).unwrap();
293+
let mut archive = file.read_zip()?;
294+
295+
let metadata = ZipMetadata::from(&mut archive);
296+
Ok(serde_json::to_value(metadata)?)
297+
}
298+
299+
#[test]
300+
fn snapshot_zip_files() {
301+
let fixtures_dir = std::env::current_dir()
302+
.unwrap()
303+
.join("../testdata")
304+
.canonicalize()
305+
.unwrap();
306+
307+
println!("fixtures_dir: {}", fixtures_dir.display());
308+
309+
for entry in std::fs::read_dir(fixtures_dir).expect("Failed to read fixtures directory") {
310+
let entry = entry.expect("Failed to read entry");
311+
let path = entry.path();
312+
313+
if path
314+
.extension()
315+
.is_some_and(|e| e.eq_ignore_ascii_case("zip"))
316+
{
317+
let Ok(result) = process_zip_file(&path) else {
318+
continue;
319+
};
320+
println!(
321+
"current file: {}",
322+
Path::new(path.file_name().unwrap()).display()
323+
);
324+
assert_json_snapshot!(
325+
format!("{}", Path::new(path.file_name().unwrap()).display()),
326+
result
327+
);
328+
}
329+
}
330+
}
331+
}

0 commit comments

Comments
 (0)