4
4
//! to extract FILE_NAME (0x30) attributes with minimal overhead.
5
5
6
6
use crate :: mft:: fast_fixup:: detect_entry_size;
7
+ use crate :: mft:: mft_file:: MftFile ;
8
+ use rayon:: prelude:: * ;
7
9
8
10
pub const ATTR_TYPE_FILE_NAME : u32 = 0x30 ;
9
11
const ATTRIBUTE_TYPE_END : u32 = 0xFFFF_FFFF ;
10
12
11
13
#[ derive( Clone , Copy , Debug ) ]
12
14
pub struct FileNameRef < ' a > {
13
15
pub entry_id : u32 ,
14
- pub parent_ref : u64 , // raw 64-bit reference (contains sequence)
16
+ pub parent_ref : u64 , // raw 64-bit reference (contains sequence)
15
17
pub namespace : u8 ,
16
18
pub name_utf16 : & ' a [ u16 ] ,
17
19
}
18
20
19
21
/// Collection of FILE_NAME attributes extracted from MFT data.
20
- ///
22
+ ///
21
23
/// This structure provides organized access to all filename references found
22
24
/// in an MFT, with efficient lookup by entry ID.
23
25
#[ derive( Clone , Debug ) ]
24
26
pub struct FileNameCollection < ' a > {
25
27
/// All FILE_NAME references found across all entries
26
28
pub all_filenames : Vec < FileNameRef < ' a > > ,
27
- /// Index mapping where `per_entry[entry_id]` contains indices
29
+ /// Index mapping where `per_entry[entry_id]` contains indices
28
30
/// into `all_filenames` for all filenames belonging to that entry
29
31
pub per_entry_indices : Vec < Vec < usize > > ,
30
32
}
31
33
32
34
impl < ' a > FileNameCollection < ' a > {
33
35
/// Get all filename references for a specific entry ID.
34
- ///
36
+ ///
35
37
/// # Arguments
36
- ///
38
+ ///
37
39
/// * `entry_id` - The MFT entry ID to look up
38
- ///
40
+ ///
39
41
/// # Returns
40
- ///
42
+ ///
41
43
/// An iterator over all `FileNameRef` instances for the given entry,
42
44
/// or an empty iterator if the entry ID is not found.
43
- ///
45
+ ///
44
46
/// # Example
45
- ///
46
- /// ```rust
47
- /// # use mft::fast_entry::{par_collect_filenames, FileNameCollection};
48
- /// # let data = &[0u8; 2048]; // Mock MFT data
49
- /// let collection = par_collect_filenames(data, 1024);
50
- ///
47
+ ///
48
+ /// ```rust,no_run
49
+ /// use teamy_mft::mft::{fast_entry, mft_file::MftFile};
50
+ /// # fn demo() -> eyre::Result<()> {
51
+ /// // Load an MFT file and collect all filename (x30) attributes
52
+ /// let mft = MftFile::from_path(std::path::Path::new("C:\\path\\to\\cached.mft"))?;
53
+ /// let collection = fast_entry::par_collect_filenames_typed(&mft);
51
54
/// for filename in collection.filenames_for_entry(5) {
52
55
/// println!("Entry 5 filename: {:?}", filename);
53
56
/// }
57
+ /// # Ok(()) }
58
+ /// # let _ = demo();
54
59
/// ```
55
60
pub fn filenames_for_entry ( & self , entry_id : u32 ) -> impl Iterator < Item = & FileNameRef < ' a > > {
56
61
self . per_entry_indices
57
62
. get ( entry_id as usize )
58
- . map ( |indices| indices. iter ( ) . filter_map ( |& idx| self . all_filenames . get ( idx) ) )
63
+ . map ( |indices| {
64
+ indices
65
+ . iter ( )
66
+ . filter_map ( |& idx| self . all_filenames . get ( idx) )
67
+ } )
59
68
. into_iter ( )
60
69
. flatten ( )
61
70
}
62
-
71
+
63
72
/// Get the total number of filename references collected.
64
73
pub fn x30_count ( & self ) -> usize {
65
74
self . all_filenames . len ( )
66
75
}
67
-
76
+
68
77
/// Get the number of entries that have filename attributes.
69
78
pub fn entry_count ( & self ) -> usize {
70
79
self . per_entry_indices . len ( )
@@ -73,44 +82,83 @@ impl<'a> FileNameCollection<'a> {
73
82
74
83
#[ inline]
75
84
fn read_u16 ( bytes : & [ u8 ] , off : usize ) -> Option < u16 > {
76
- bytes. get ( off..off+2 ) . map ( |b| u16:: from_le_bytes ( [ b[ 0 ] , b[ 1 ] ] ) )
85
+ bytes
86
+ . get ( off..off + 2 )
87
+ . map ( |b| u16:: from_le_bytes ( [ b[ 0 ] , b[ 1 ] ] ) )
77
88
}
78
89
#[ inline]
79
90
fn read_u32 ( bytes : & [ u8 ] , off : usize ) -> Option < u32 > {
80
- bytes. get ( off..off+4 ) . map ( |b| u32:: from_le_bytes ( [ b[ 0 ] , b[ 1 ] , b[ 2 ] , b[ 3 ] ] ) )
91
+ bytes
92
+ . get ( off..off + 4 )
93
+ . map ( |b| u32:: from_le_bytes ( [ b[ 0 ] , b[ 1 ] , b[ 2 ] , b[ 3 ] ] ) )
81
94
}
82
95
#[ inline]
83
96
fn read_u64 ( bytes : & [ u8 ] , off : usize ) -> Option < u64 > {
84
- bytes. get ( off..off+8 ) . map ( |b| u64:: from_le_bytes ( [ b[ 0 ] , b[ 1 ] , b[ 2 ] , b[ 3 ] , b[ 4 ] , b[ 5 ] , b[ 6 ] , b[ 7 ] ] ) )
97
+ bytes
98
+ . get ( off..off + 8 )
99
+ . map ( |b| u64:: from_le_bytes ( [ b[ 0 ] , b[ 1 ] , b[ 2 ] , b[ 3 ] , b[ 4 ] , b[ 5 ] , b[ 6 ] , b[ 7 ] ] ) )
85
100
}
86
101
87
102
/// Parse the total entry size from the first entry slice (delegates to fast_fixup helper)
88
103
#[ inline]
89
- pub fn parse_first_entry_size ( first_entry : & [ u8 ] ) -> Option < u32 > { detect_entry_size ( first_entry) }
104
+ pub fn parse_first_entry_size ( first_entry : & [ u8 ] ) -> Option < u32 > {
105
+ detect_entry_size ( first_entry)
106
+ }
90
107
91
108
/// Iterate all FILE_NAME attributes in an entry, invoking callback for each.
92
109
/// Returns number of filename attributes found.
93
- pub fn for_each_filename < ' a , F : FnMut ( FileNameRef < ' a > ) > ( entry_bytes : & ' a [ u8 ] , entry_id : u32 , mut f : F ) -> usize {
110
+ pub fn for_each_filename < ' a , F : FnMut ( FileNameRef < ' a > ) > (
111
+ entry_bytes : & ' a [ u8 ] ,
112
+ entry_id : u32 ,
113
+ mut f : F ,
114
+ ) -> usize {
94
115
// Signature check
95
- if entry_bytes. len ( ) < 0x18 || & entry_bytes[ 0 ..4 ] != b"FILE" { return 0 ; }
96
- let first_attr_off = match read_u16 ( entry_bytes, 0x14 ) { Some ( v) => v as usize , None => return 0 } ;
97
- if first_attr_off == 0 || first_attr_off >= entry_bytes. len ( ) { return 0 ; }
116
+ if entry_bytes. len ( ) < 0x18 || & entry_bytes[ 0 ..4 ] != b"FILE" {
117
+ return 0 ;
118
+ }
119
+ let first_attr_off = match read_u16 ( entry_bytes, 0x14 ) {
120
+ Some ( v) => v as usize ,
121
+ None => return 0 ,
122
+ } ;
123
+ if first_attr_off == 0 || first_attr_off >= entry_bytes. len ( ) {
124
+ return 0 ;
125
+ }
98
126
99
127
let mut offset = first_attr_off;
100
128
let mut count = 0 ;
101
- while offset + 16 <= entry_bytes. len ( ) { // minimal attribute header length guard
102
- let attr_type = match read_u32 ( entry_bytes, offset) { Some ( v) => v, None => break } ;
103
- if attr_type == ATTRIBUTE_TYPE_END { break ; }
104
- let attr_len = match read_u32 ( entry_bytes, offset + 4 ) { Some ( v) => v as usize , None => break } ;
105
- if attr_len == 0 || offset + attr_len > entry_bytes. len ( ) { break ; }
129
+ while offset + 16 <= entry_bytes. len ( ) {
130
+ // minimal attribute header length guard
131
+ let attr_type = match read_u32 ( entry_bytes, offset) {
132
+ Some ( v) => v,
133
+ None => break ,
134
+ } ;
135
+ if attr_type == ATTRIBUTE_TYPE_END {
136
+ break ;
137
+ }
138
+ let attr_len = match read_u32 ( entry_bytes, offset + 4 ) {
139
+ Some ( v) => v as usize ,
140
+ None => break ,
141
+ } ;
142
+ if attr_len == 0 || offset + attr_len > entry_bytes. len ( ) {
143
+ break ;
144
+ }
106
145
let non_res_flag = entry_bytes. get ( offset + 8 ) . copied ( ) . unwrap_or ( 0 ) ;
107
146
if attr_type == ATTR_TYPE_FILE_NAME && non_res_flag == 0 {
108
147
// Resident attribute header layout (offsets relative to attribute start)
109
- if offset + 24 > entry_bytes. len ( ) { break ; }
110
- let value_len = match read_u32 ( entry_bytes, offset + 16 ) { Some ( v) => v as usize , None => break } ;
111
- let value_off = match read_u16 ( entry_bytes, offset + 20 ) { Some ( v) => v as usize , None => break } ;
148
+ if offset + 24 > entry_bytes. len ( ) {
149
+ break ;
150
+ }
151
+ let value_len = match read_u32 ( entry_bytes, offset + 16 ) {
152
+ Some ( v) => v as usize ,
153
+ None => break ,
154
+ } ;
155
+ let value_off = match read_u16 ( entry_bytes, offset + 20 ) {
156
+ Some ( v) => v as usize ,
157
+ None => break ,
158
+ } ;
112
159
let value_abs = offset + value_off;
113
- if value_abs + value_len > entry_bytes. len ( ) || value_len < 0x42 { /* need base struct */ } else {
160
+ if value_abs + value_len > entry_bytes. len ( ) || value_len < 0x42 { /* need base struct */
161
+ } else {
114
162
// FILE_NAME structure
115
163
if let Some ( parent_ref) = read_u64 ( entry_bytes, value_abs) {
116
164
let name_len = entry_bytes. get ( value_abs + 0x40 ) . copied ( ) . unwrap_or ( 0 ) as usize ;
@@ -119,8 +167,18 @@ pub fn for_each_filename<'a, F: FnMut(FileNameRef<'a>)>(entry_bytes: &'a [u8], e
119
167
let name_bytes_end = name_utf16_off + name_len * 2 ;
120
168
if name_bytes_end <= entry_bytes. len ( ) {
121
169
// SAFETY: constructing &[u16] from properly aligned bytes – alignment of u16 may be 2; slice.as_ptr() is aligned to 1. Use from_raw_parts_unaligned (stable?) -> fallback to copy if misaligned risk. Here we accept potential unaligned read; on x86 it's fine.
122
- let raw: & [ u16 ] = unsafe { std:: slice:: from_raw_parts ( entry_bytes[ name_utf16_off..name_bytes_end] . as_ptr ( ) as * const u16 , name_len) } ;
123
- f ( FileNameRef { entry_id, parent_ref, namespace, name_utf16 : raw } ) ;
170
+ let raw: & [ u16 ] = unsafe {
171
+ std:: slice:: from_raw_parts (
172
+ entry_bytes[ name_utf16_off..name_bytes_end] . as_ptr ( ) as * const u16 ,
173
+ name_len,
174
+ )
175
+ } ;
176
+ f ( FileNameRef {
177
+ entry_id,
178
+ parent_ref,
179
+ namespace,
180
+ name_utf16 : raw,
181
+ } ) ;
124
182
count += 1 ;
125
183
}
126
184
}
@@ -132,75 +190,65 @@ pub fn for_each_filename<'a, F: FnMut(FileNameRef<'a>)>(entry_bytes: &'a [u8], e
132
190
}
133
191
134
192
/// Parallel collection of all FILE_NAME attributes from MFT data.
135
- ///
136
- /// This function processes MFT entries in parallel using Rayon to extract all FILE_NAME
137
- /// attributes efficiently. It's particularly useful for large MFT files where sequential
138
- /// processing would be too slow.
139
- ///
140
- /// # Arguments
141
- ///
142
- /// * `mft_file` - Raw MFT data bytes (must be properly fixed up with `fast_fixup` first)
143
- /// * `entry_size` - Size of each MFT entry in bytes (typically 1024 bytes)
144
- ///
145
- /// # Returns
146
- ///
147
- /// A `FileNameCollection` containing all FILE_NAME references with efficient lookup by entry ID.
148
- ///
149
- /// # Requirements
150
- ///
151
- /// * Requires the `parallel` feature to be enabled
152
- /// * Input data must be evenly divisible by `entry_size`
153
- /// * MFT data must have fixups already applied (use `fast_fixup` module first)
154
- ///
193
+ ///
194
+ /// This function processes MFT entries in parallel to extract all FILE_NAME attributes efficiently.
195
+ /// It's particularly useful for large MFT files where sequential processing would be too slow.
196
+ ///
155
197
/// # Example
156
- ///
157
- /// ```rust
158
- /// # use mft::fast_entry::par_collect_filenames;
159
- /// # let data = &[0u8; 2048]; // Mock MFT data
160
- /// let entry_size = 1024;
161
- /// let collection = par_collect_filenames(data, entry_size);
162
- ///
198
+ ///
199
+ /// ```rust,no_run
200
+ /// use teamy_mft::mft::{fast_entry, mft_file::MftFile};
201
+ /// # fn demo() -> eyre::Result<()> {
202
+ /// let mft = MftFile::from_path(std::path::Path::new("C:\\path\\to\\cached.mft"))?;
203
+ /// let collection = fast_entry::par_collect_filenames_typed(&mft);
163
204
/// // Access all filenames for entry ID 5
164
205
/// for filename in collection.filenames_for_entry(5) {
165
206
/// println!("Entry 5 has filename: {:?}", filename);
166
207
/// }
167
- ///
168
- /// println!("Total filenames found: {}", collection.total_count());
208
+ /// println!("Total filenames found: {}", collection.x30_count());
209
+ /// # Ok(()) }
210
+ /// # let _ = demo();
169
211
/// ```
170
- ///
171
- /// # Performance
172
- ///
173
- /// This function uses Rayon's parallel iterator to process entries concurrently,
174
- /// making it significantly faster than sequential processing for large MFT files.
175
- /// The trade-off is higher memory usage and complexity in the returned data structure.
176
- pub fn par_collect_filenames < ' a > ( mft_file : & ' a [ u8 ] , entry_size : usize ) -> FileNameCollection < ' a > {
177
- use rayon:: prelude:: * ;
178
- let entries = mft_file. par_chunks_exact ( entry_size) . enumerate ( ) ;
179
- let per_thread: Vec < ( Vec < FileNameRef < ' a > > , Vec < ( u32 , usize ) > ) > = entries. map ( |( idx, entry) | {
180
- let mut list = Vec :: new ( ) ;
181
- let mut pairs = Vec :: new ( ) ;
182
- for_each_filename ( entry, idx as u32 , |fref| {
183
- let global_index = list. len ( ) ;
184
- list. push ( fref) ;
185
- pairs. push ( ( fref. entry_id , global_index) ) ;
186
- } ) ;
187
- ( list, pairs)
188
- } ) . collect ( ) ;
212
+ pub fn collect_filenames < ' a > ( mft : & ' a MftFile ) -> FileNameCollection < ' a > {
213
+ let full: & ' a [ u8 ] = & * mft; // borrow the entire bytes buffer
214
+ let entry_size = mft. entry_size ( ) . get :: < uom:: si:: information:: byte > ( ) ;
215
+ let entry_count = mft. entry_count ( ) ;
216
+
217
+ let per_thread: Vec < ( Vec < FileNameRef < ' a > > , Vec < ( u32 , usize ) > ) > = ( 0 ..entry_count)
218
+ . into_par_iter ( )
219
+ . map ( |idx| {
220
+ let mut list = Vec :: new ( ) ;
221
+ let mut pairs = Vec :: new ( ) ;
222
+ let start = idx * entry_size;
223
+ let end = start + entry_size;
224
+ let record_bytes: & ' a [ u8 ] = & full[ start..end] ;
225
+ for_each_filename ( record_bytes, idx as u32 , |fref| {
226
+ let global_index = list. len ( ) ;
227
+ list. push ( fref) ;
228
+ pairs. push ( ( fref. entry_id , global_index) ) ;
229
+ } ) ;
230
+ ( list, pairs)
231
+ } )
232
+ . collect ( ) ;
189
233
190
234
let total = per_thread. iter ( ) . map ( |( v, _) | v. len ( ) ) . sum ( ) ;
191
235
let mut file_names = Vec :: with_capacity ( total) ;
192
- for ( v, _) in & per_thread { file_names. extend_from_slice ( v) ; }
236
+ for ( v, _) in & per_thread {
237
+ file_names. extend_from_slice ( v) ;
238
+ }
193
239
194
- let entry_count = mft_file. len ( ) / entry_size;
195
240
let mut per_entry: Vec < Vec < usize > > = vec ! [ Vec :: new( ) ; entry_count] ;
196
241
let mut base = 0usize ;
197
242
for ( v, pairs) in per_thread {
198
243
for ( entry_id, local_idx) in pairs {
199
244
let global_idx = base + local_idx;
200
- if let Some ( vec) = per_entry. get_mut ( entry_id as usize ) { vec. push ( global_idx) ; }
245
+ if let Some ( vec) = per_entry. get_mut ( entry_id as usize ) {
246
+ vec. push ( global_idx) ;
247
+ }
201
248
}
202
249
base += v. len ( ) ;
203
250
}
251
+
204
252
FileNameCollection {
205
253
all_filenames : file_names,
206
254
per_entry_indices : per_entry,
0 commit comments