Skip to content

Commit a300d2f

Browse files
committed
feat(mft): add MftRecordIter for zero-copy iteration over MFT records
refactor(mft_file): improve entry size calculations and update size handling refactor(mft_record): enhance deref implementation and add from_bytes_unchecked method
1 parent 5ce9ecc commit a300d2f

File tree

5 files changed

+137
-29
lines changed

5 files changed

+137
-29
lines changed

src/mft/mft_file.rs

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::mft::fast_fixup::FixupStats;
22
use crate::mft::fast_fixup::apply_fixups_parallel;
3+
use crate::mft::mft_record_iter::MftRecordIter;
34
use bytes::Bytes;
45
use eyre::Context;
56
use std::fmt::Debug;
@@ -10,8 +11,8 @@ use std::time::Instant;
1011
use thousands::Separable;
1112
use tracing::debug;
1213
use tracing::instrument;
13-
use uom::si::f64::Information;
1414
use uom::si::information::byte;
15+
use uom::si::usize::Information;
1516

1617
pub struct MftFile {
1718
bytes: Bytes,
@@ -33,17 +34,18 @@ impl Deref for MftFile {
3334
}
3435
impl MftFile {
3536
pub fn size(&self) -> Information {
36-
Information::new::<byte>(self.bytes.len() as f64)
37+
Information::new::<byte>(self.bytes.len())
3738
}
3839
pub fn entry_size(&self) -> Information {
3940
if self.len() < 0x20 {
40-
return Information::new::<byte>(1024.0);
41+
return Information::new::<byte>(1024);
4142
}
42-
let size = u32::from_le_bytes([self[0x1C], self[0x1D], self[0x1E], self[0x1F]]);
43+
let size = u32::from_le_bytes([self[0x1C], self[0x1D], self[0x1E], self[0x1F]]) as usize;
4344
if size == 0 {
44-
Information::new::<byte>(1024.0)
45+
// Information::new::<byte>(1024)
46+
panic!("MFT entry size field is zero (invalid/unknown)");
4547
} else {
46-
Information::new::<byte>(size as f64)
48+
Information::new::<byte>(size)
4749
}
4850
}
4951
pub fn entry_count(&self) -> usize {
@@ -64,20 +66,22 @@ impl MftFile {
6466
debug!("Opened MFT file: {}", mft_file_path.display());
6567

6668
// Determine file size
67-
let file_size_bytes = file
68-
.metadata()
69-
.wrap_err_with(|| format!("Failed to get metadata for {}", mft_file_path.display()))?
70-
.len() as usize;
71-
let mft_file_size = Information::new::<byte>(file_size_bytes as f64);
72-
if file_size_bytes < 1024 {
69+
let mft_file_size = Information::new::<byte>(
70+
file.metadata()
71+
.wrap_err_with(|| {
72+
format!("Failed to get metadata for {}", mft_file_path.display())
73+
})?
74+
.len() as usize,
75+
);
76+
if mft_file_size < Information::new::<byte>(1024) {
7377
eyre::bail!("MFT file too small: {}", mft_file_path.display());
7478
}
7579

7680
// Read all bytes
7781
debug!("Reading cached bytes: {}", mft_file_size.get_human());
7882
let read_start = Instant::now();
7983
let bytes = {
80-
let mut buf = Vec::with_capacity(file_size_bytes);
84+
let mut buf = Vec::with_capacity(mft_file_size.get::<byte>());
8185
let mut reader = std::io::BufReader::new(&file);
8286
reader
8387
.read_to_end(&mut buf)
@@ -133,4 +137,15 @@ impl MftFile {
133137
bytes: Bytes::from(raw),
134138
})
135139
}
140+
141+
/// Iterate over fixed-size records contained in this MFT file.
142+
///
143+
/// This creates zero-copy `MftRecord` instances by slicing the shared
144+
/// `Bytes` buffer. No signature validation is performed.
145+
/// The caller is responsible for ensuring fixups were already applied
146+
/// (handled by `MftFile::from_bytes`/`from_path`).
147+
#[inline]
148+
pub fn iter_records(&self) -> MftRecordIter {
149+
MftRecordIter::new(self.bytes.clone(), self.entry_size())
150+
}
136151
}

src/mft/mft_record.rs

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use crate::mft::mft_record_attribute_iter::MftRecordAttributeIter;
22
use crate::mft::mft_record_location::MftRecordLocationOnDisk;
33
use crate::mft::mft_record_number::MftRecordNumber;
4+
use bytes::Bytes;
45
use eyre::bail;
6+
use std::ops::Deref;
57
use teamy_windows::file::HandleReadExt;
68
use uom::si::information::byte;
7-
use std::ops::Deref;
8-
use bytes::Bytes;
99

1010
/// https://digitalinvestigator.blogspot.com/2022/03/the-ntfs-master-file-table-mft.html?m=1
1111
/// "On a standard hard drive with 512-byte sectors, the MFT is structured as a series of 1,024-byte records,
@@ -22,8 +22,10 @@ pub struct MftRecord {
2222
}
2323

2424
impl Deref for MftRecord {
25-
type Target = [u8];
26-
fn deref(&self) -> &Self::Target { self.data.as_ref() }
25+
type Target = Bytes;
26+
fn deref(&self) -> &Self::Target {
27+
&self.data
28+
}
2729
}
2830

2931
impl std::fmt::Debug for MftRecord {
@@ -38,6 +40,15 @@ impl std::fmt::Debug for MftRecord {
3840
}
3941

4042
impl MftRecord {
43+
/// Construct a record without validating the signature.
44+
///
45+
/// Use this when the caller already ensured the slice is a single MFT record
46+
/// with fixups applied. This avoids redundant checks and copying.
47+
#[inline]
48+
pub fn from_bytes_unchecked(bytes: Bytes) -> Self {
49+
Self { data: bytes }
50+
}
51+
4152
// ---- Raw field offset constants (for clarity & reuse) ----
4253
const OFFSET_FOR_SIGNATURE: usize = 0x00;
4354
const OFFSET_FOR_UPDATE_SEQUENCE_ARRAY_OFFSET: usize = 0x04; // u16
@@ -54,24 +65,28 @@ impl MftRecord {
5465
// 0x2A padding
5566
const OFFSET_FOR_RECORD_NUMBER: usize = 0x2C; // u32 on-disk
5667

57-
5868
/// Read a single MFT record from the given drive handle at the specified location.
5969
/// Validates the "FILE" signature.
60-
///
70+
///
6171
/// Useful for reading the $MFT record itself (record 0) or other known record numbers.
6272
pub fn try_from_handle(
6373
drive_handle: impl HandleReadExt,
6474
mft_record_location: MftRecordLocationOnDisk,
6575
) -> eyre::Result<Self> {
6676
let mut data = [0u8; MFT_RECORD_SIZE as usize];
67-
drive_handle.try_read_exact(mft_record_location.get::<byte>() as i64, data.as_mut_slice())?;
77+
drive_handle.try_read_exact(
78+
mft_record_location.get::<byte>() as i64,
79+
data.as_mut_slice(),
80+
)?;
6881
if &data[0..4] != b"FILE" {
6982
bail!(
7083
"Invalid MFT record signature: expected 'FILE', got {:?}",
7184
String::from_utf8_lossy(&data[0..4])
7285
);
7386
}
74-
Ok(Self { data: Bytes::from(data.to_vec()) })
87+
Ok(Self {
88+
data: Bytes::from(data.to_vec()),
89+
})
7590
}
7691

7792
/// Zero-copy access to the 4-byte signature.
@@ -175,5 +190,7 @@ impl MftRecord {
175190
}
176191

177192
/// Iterate raw attribute slices (header + body) in this record.
178-
pub fn iter_attributes(&self) -> MftRecordAttributeIter<'_> { MftRecordAttributeIter::new(self) }
193+
pub fn iter_attributes(&self) -> MftRecordAttributeIter<'_> {
194+
MftRecordAttributeIter::new(self)
195+
}
179196
}

src/mft/mft_record_iter.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
use crate::mft::mft_record::MftRecord;
2+
use bytes::Bytes;
3+
use uom::ConstZero;
4+
use uom::si::information::byte;
5+
use uom::si::usize::Information;
6+
7+
/// Zero-copy iterator over MFT records stored contiguously in a `Bytes` buffer.
8+
pub struct MftRecordIter {
9+
bytes: Bytes,
10+
entry_size: Information,
11+
index: usize,
12+
total: usize,
13+
}
14+
15+
impl MftRecordIter {
16+
pub fn new(bytes: Bytes, entry_size: Information) -> Self {
17+
let total = if entry_size == Information::ZERO {
18+
panic!("MFT entry size cannot be zero");
19+
} else {
20+
bytes.len() / entry_size.get::<byte>()
21+
};
22+
Self {
23+
bytes,
24+
entry_size,
25+
index: 0,
26+
total,
27+
}
28+
}
29+
}
30+
31+
impl Iterator for MftRecordIter {
32+
type Item = MftRecord;
33+
fn next(&mut self) -> Option<Self::Item> {
34+
if self.index >= self.total {
35+
return None;
36+
}
37+
let start = self.index * self.entry_size;
38+
let end = start + self.entry_size;
39+
self.index += 1;
40+
Some(MftRecord::from_bytes_unchecked(
41+
self.bytes.slice(start.get::<byte>()..end.get::<byte>()),
42+
))
43+
}
44+
45+
fn size_hint(&self) -> (usize, Option<usize>) {
46+
let remaining = self.total.saturating_sub(self.index);
47+
(remaining, Some(remaining))
48+
}
49+
}
50+
51+
impl ExactSizeIterator for MftRecordIter {}
52+
impl core::iter::FusedIterator for MftRecordIter {}
53+
54+
#[cfg(test)]
55+
mod tests {
56+
use super::*;
57+
58+
#[test]
59+
fn iter_records_yields_zero_copy_records() {
60+
const ENTRY_SIZE: usize = 1024;
61+
let mut buf = vec![0u8; ENTRY_SIZE * 2];
62+
// Write 'FILE' signature for both records
63+
buf[0..4].copy_from_slice(b"FILE");
64+
buf[ENTRY_SIZE..ENTRY_SIZE + 4].copy_from_slice(b"FILE");
65+
let bytes = Bytes::from(buf);
66+
let mut it = MftRecordIter::new(bytes.clone(), Information::new::<byte>(ENTRY_SIZE));
67+
let r1 = it.next().expect("first record");
68+
let r2 = it.next().expect("second record");
69+
assert!(it.next().is_none());
70+
assert_eq!(r1.get_signature(), b"FILE");
71+
assert_eq!(r2.get_signature(), b"FILE");
72+
assert_eq!(bytes.len(), ENTRY_SIZE * 2);
73+
}
74+
}

src/mft/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub mod mft_record_reference;
1111
pub mod mft_sequence_number;
1212
pub mod mft_physical_read;
1313
pub mod mft_file;
14+
pub mod mft_record_iter;
1415
pub mod fast_fixup;
1516
pub mod fast_entry;
1617
pub mod path_resolve;

src/mft_process.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ use std::time::Instant;
88
use thousands::Separable;
99
use tracing::debug;
1010
use tracing::info;
11-
use uom::si::f64::Information;
1211
use uom::si::f64::InformationRate;
1312
use uom::si::f64::Ratio;
1413
use uom::si::f64::Time;
@@ -37,7 +36,9 @@ pub fn process_mft_file(
3736
let file_names =
3837
fast_entry::par_collect_filenames(&mft_file, mft_file.entry_size().get::<byte>() as usize);
3938
let scan_elapsed = Time::new::<second>(scan_start.elapsed().as_secs_f64());
40-
let scan_rate = InformationRate::from(mft_file.size() / scan_elapsed);
39+
let scan_rate = InformationRate::from(
40+
uom::si::f64::Information::new::<byte>(mft_file.size().get::<byte>() as f64) / scan_elapsed,
41+
);
4142
debug!(
4243
drive_letter = &drive_letter,
4344
"Took {} ({}) entries_with_names={}",
@@ -53,13 +54,12 @@ pub fn process_mft_file(
5354
file_names.x30_count().separate_with_commas()
5455
);
5556
let path_resolve_start = Instant::now();
56-
let multi = path_resolve::resolve_paths_all_parallel(&file_names)?;
57+
let multi = path_resolve::resolve_paths_all_parallel(&file_names)?;
5758
let path_resolve_elapsed = Time::new::<second>(path_resolve_start.elapsed().as_secs_f64());
5859
let total_paths = multi.total_paths();
5960
let resolved_entries = multi.0.iter().filter(|v| !v.is_empty()).count();
60-
let resolve_rate = InformationRate::from(
61-
Information::new::<byte>(resolved_entries as f64 * 256.0) / path_resolve_elapsed,
62-
);
61+
let resolved_size = uom::si::f64::Information::new::<byte>(resolved_entries as f64 * 256.0);
62+
let resolve_rate = InformationRate::from(resolved_size / path_resolve_elapsed);
6363
debug!(
6464
drive_letter = &drive_letter,
6565
"Took {} ({}) entries_resolved={} total_paths={}",
@@ -84,7 +84,8 @@ pub fn process_mft_file(
8484

8585
let elapsed = Time::new::<second>(start.elapsed().as_secs_f64());
8686
// aggregate performance statistics
87-
let total_data_rate = InformationRate::from(mft_file.size() / elapsed); // overall throughput
87+
let total_size = uom::si::f64::Information::new::<byte>(mft_file.size().get::<byte>() as f64);
88+
let total_data_rate = InformationRate::from(total_size / elapsed); // overall throughput
8889
let entries_rate = Ratio::new::<ratio>(mft_file.entry_count() as f64) / elapsed;
8990
debug!(
9091
drive_letter = &drive_letter,

0 commit comments

Comments
 (0)