Skip to content

Commit 30d1655

Browse files
authored
Merge branch 'main' into dependabot/cargo/cargo-920260e05e
2 parents 58ef878 + a7a584c commit 30d1655

File tree

6 files changed

+248
-27
lines changed

6 files changed

+248
-27
lines changed

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ rusqlite = { version = "0.31", features = ["bundled"] }
4343

4444
# Memory-mapped I/O (Sprint 6.6)
4545
memmap2 = "0.9"
46-
rkyv = "0.8.14"
46+
47+
# Zero-copy serialization (rkyv migration)
48+
rkyv = { version = "0.8.14", features = ["std", "alloc"] }
4749

4850
# CSV export (promote to workspace)
4951
csv = "1.3"

crates/prtip-core/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,7 @@ pub use resource_monitor::{
136136
pub use retry::{retry_with_backoff, RetryConfig};
137137
pub use service_db::{ServiceMatch, ServiceProbe, ServiceProbeDb};
138138
pub use templates::{ScanTemplate, TemplateManager};
139-
pub use types::{PortRange, PortState, Protocol, ScanResult, ScanResultRkyv, ScanTarget, ScanType, TimingTemplate};
139+
pub use types::{
140+
PortRange, PortState, Protocol, ScanResult, ScanResultRkyv, ScanTarget, ScanType,
141+
TimingTemplate,
142+
};

crates/prtip-core/src/types.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,128 @@ impl fmt::Display for ScanResult {
641641
}
642642
}
643643

644+
/// rkyv-compatible serialization format for ScanResult
645+
///
646+
/// This type is optimized for zero-copy deserialization using rkyv.
647+
/// It stores all data in a format that can be directly interpreted from
648+
/// memory-mapped files without allocation.
649+
///
650+
/// # Alignment Requirements
651+
///
652+
/// This structure must maintain proper alignment for rkyv's zero-copy
653+
/// deserialization. The fixed-size entry buffer (512 bytes) provides
654+
/// adequate alignment for typical rkyv requirements.
655+
#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
656+
#[rkyv(derive(Debug))]
657+
pub struct ScanResultRkyv {
658+
/// Target IP address (16 bytes for IPv6 compatibility)
659+
pub target_ip_bytes: [u8; 16],
660+
/// Whether the IP is IPv4 (true) or IPv6 (false)
661+
pub is_ipv4: bool,
662+
/// Port number
663+
pub port: u16,
664+
/// Port state as u8 (Open=0, Closed=1, Filtered=2, Unknown=3)
665+
pub state: u8,
666+
/// Response time in nanoseconds (u64 to avoid truncation)
667+
pub response_time_nanos: u64,
668+
/// Timestamp in nanoseconds since Unix epoch
669+
pub timestamp_nanos: i64,
670+
/// Optional banner (max 128 bytes)
671+
pub banner: Option<String>,
672+
/// Optional service name (max 32 bytes)
673+
pub service: Option<String>,
674+
/// Optional service version (max 64 bytes)
675+
pub version: Option<String>,
676+
/// Optional raw response (limited to 256 bytes to fit in entry)
677+
pub raw_response: Option<Vec<u8>>,
678+
}
679+
680+
impl From<&ScanResult> for ScanResultRkyv {
681+
fn from(result: &ScanResult) -> Self {
682+
// Convert IpAddr to bytes
683+
let (target_ip_bytes, is_ipv4) = match result.target_ip {
684+
IpAddr::V4(ipv4) => {
685+
let mut bytes = [0u8; 16];
686+
bytes[..4].copy_from_slice(&ipv4.octets());
687+
(bytes, true)
688+
}
689+
IpAddr::V6(ipv6) => (ipv6.octets(), false),
690+
};
691+
692+
// Convert PortState to u8
693+
let state = match result.state {
694+
PortState::Open => 0,
695+
PortState::Closed => 1,
696+
PortState::Filtered => 2,
697+
PortState::Unknown => 3,
698+
};
699+
700+
// Convert response time to u64 nanoseconds (avoid truncation issues)
701+
// Note: u64 can represent up to ~584 years, which is more than sufficient
702+
// for network response times. We clamp to u64::MAX to avoid overflow.
703+
let response_time_nanos = result.response_time.as_nanos().min(u64::MAX as u128) as u64;
704+
705+
// Convert timestamp with proper error handling
706+
let timestamp_nanos = result
707+
.timestamp
708+
.timestamp_nanos_opt()
709+
.expect("timestamp out of range for nanosecond representation");
710+
711+
Self {
712+
target_ip_bytes,
713+
is_ipv4,
714+
port: result.port,
715+
state,
716+
response_time_nanos,
717+
timestamp_nanos,
718+
banner: result.banner.clone(),
719+
service: result.service.clone(),
720+
version: result.version.clone(),
721+
raw_response: result.raw_response.clone(),
722+
}
723+
}
724+
}
725+
726+
impl From<ScanResultRkyv> for ScanResult {
727+
fn from(rkyv: ScanResultRkyv) -> Self {
728+
// Convert bytes back to IpAddr
729+
let target_ip = if rkyv.is_ipv4 {
730+
let mut octets = [0u8; 4];
731+
octets.copy_from_slice(&rkyv.target_ip_bytes[..4]);
732+
IpAddr::V4(std::net::Ipv4Addr::from(octets))
733+
} else {
734+
IpAddr::V6(std::net::Ipv6Addr::from(rkyv.target_ip_bytes))
735+
};
736+
737+
// Convert u8 back to PortState
738+
let state = match rkyv.state {
739+
0 => PortState::Open,
740+
1 => PortState::Closed,
741+
2 => PortState::Filtered,
742+
_ => PortState::Unknown,
743+
};
744+
745+
// Convert u64 nanoseconds back to Duration
746+
// Safe: u64::MAX nanoseconds fits within Duration's range
747+
let response_time = Duration::from_nanos(rkyv.response_time_nanos);
748+
749+
// Convert i64 nanoseconds back to DateTime
750+
let timestamp = DateTime::from_timestamp_nanos(rkyv.timestamp_nanos);
751+
752+
Self {
753+
target_ip,
754+
port: rkyv.port,
755+
state,
756+
response_time,
757+
timestamp,
758+
banner: rkyv.banner,
759+
service: rkyv.service,
760+
version: rkyv.version,
761+
raw_response: rkyv.raw_response,
762+
}
763+
}
764+
}
765+
644766
/// Port filtering for exclusion/inclusion lists
645767
///
646768
/// Provides efficient port filtering using hash sets for O(1) lookups.

crates/prtip-scanner/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ pcap-file = "2.0"
8787

8888
# Memory-mapped I/O
8989
memmap2 = { workspace = true }
90-
rkyv = { workspace = true, features = ["alloc"] }
90+
rkyv = { workspace = true }
9191

9292
[dev-dependencies]
9393
tokio = { workspace = true }

crates/prtip-scanner/src/output/mmap_reader.rs

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use std::path::Path;
88

99
const HEADER_SIZE: usize = 64;
1010
const ENTRY_SIZE: usize = 512;
11-
const LENGTH_PREFIX_SIZE: usize = 8; // 8 bytes for length to maintain alignment
11+
const LENGTH_PREFIX_SIZE: usize = 8; // u64 length prefix for each entry
1212

1313
/// Memory-mapped result reader
1414
pub struct MmapResultReader {
@@ -34,10 +34,21 @@ impl MmapResultReader {
3434

3535
// Parse header
3636
let version = u64::from_le_bytes(mmap[0..8].try_into().unwrap());
37-
if version != 1 {
37+
if version == 1 {
3838
return Err(io::Error::new(
3939
io::ErrorKind::InvalidData,
40-
format!("Unsupported version: {}", version),
40+
"Incompatible file format: version 1 (bincode) is no longer supported. \
41+
This file was created with an older version of the scanner. \
42+
Please regenerate scan results with the current version (rkyv format, version 2).",
43+
));
44+
}
45+
if version != 2 {
46+
return Err(io::Error::new(
47+
io::ErrorKind::InvalidData,
48+
format!(
49+
"Unsupported version: {}. Expected version 2 (rkyv format).",
50+
version
51+
),
4152
));
4253
}
4354

@@ -85,14 +96,33 @@ impl MmapResultReader {
8596
return None;
8697
}
8798

88-
// Copy data to an aligned buffer (rkyv requires alignment)
89-
let entry_bytes = &self.mmap[offset + LENGTH_PREFIX_SIZE..offset + LENGTH_PREFIX_SIZE + len];
90-
let aligned_data: Vec<u8> = entry_bytes.to_vec();
99+
// Read length prefix (u64 in little-endian)
100+
let len = u64::from_le_bytes(
101+
entry_bytes[..LENGTH_PREFIX_SIZE]
102+
.try_into()
103+
.expect("LENGTH_PREFIX_SIZE is 8 bytes"),
104+
) as usize;
105+
106+
// Validate length
107+
if len == 0 || len + LENGTH_PREFIX_SIZE > self.entry_size {
108+
eprintln!(
109+
"MmapResultReader: invalid entry length {} at index {}",
110+
len, index
111+
);
112+
return None;
113+
}
91114

92-
// Deserialize using rkyv
93-
match rkyv::from_bytes::<ScanResultRkyv, rkyv::rancor::Error>(&aligned_data) {
94-
Ok(rkyv_result) => Some(rkyv_result.into()),
95-
Err(_) => None,
115+
// Use zero-copy deserialization without unnecessary allocation
116+
let data_bytes = &entry_bytes[LENGTH_PREFIX_SIZE..LENGTH_PREFIX_SIZE + len];
117+
match rkyv::from_bytes::<ScanResultRkyv, rkyv::rancor::Error>(data_bytes) {
118+
Ok(rkyv_result) => Some(ScanResult::from(rkyv_result)),
119+
Err(e) => {
120+
eprintln!(
121+
"MmapResultReader: failed to deserialize entry at index {} with length {}: {}",
122+
index, len, e
123+
);
124+
None
125+
}
96126
}
97127
}
98128

@@ -230,4 +260,43 @@ mod tests {
230260
assert!(reader.get_entry(1).is_none());
231261
assert!(reader.get_entry(100).is_none());
232262
}
263+
264+
#[test]
265+
fn test_mmap_version_1_rejected() {
266+
use std::io::Write;
267+
268+
let temp = NamedTempFile::new().unwrap();
269+
let path = temp.path().to_owned();
270+
271+
// Create a file with version 1 header (old bincode format)
272+
{
273+
let mut file = std::fs::OpenOptions::new()
274+
.write(true)
275+
.create(true)
276+
.truncate(true)
277+
.open(&path)
278+
.unwrap();
279+
280+
// Write a version 1 header
281+
file.write_all(&1u64.to_le_bytes()).unwrap(); // version = 1
282+
file.write_all(&0u64.to_le_bytes()).unwrap(); // entry_count = 0
283+
file.write_all(&(ENTRY_SIZE as u64).to_le_bytes()).unwrap(); // entry_size
284+
file.write_all(&0u64.to_le_bytes()).unwrap(); // checksum
285+
// Pad to HEADER_SIZE
286+
file.write_all(&[0u8; HEADER_SIZE - 32]).unwrap();
287+
}
288+
289+
// Attempt to open should fail with clear error message
290+
let result = MmapResultReader::open(&path);
291+
assert!(result.is_err());
292+
if let Err(err) = result {
293+
assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
294+
let err_msg = err.to_string();
295+
assert!(
296+
err_msg.contains("version 1") && err_msg.contains("bincode"),
297+
"Error message should mention version 1 and bincode format: {}",
298+
err_msg
299+
);
300+
}
301+
}
233302
}

crates/prtip-scanner/src/output/mmap_writer.rs

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33
//! Uses memory-mapped files to reduce RAM usage by 20-50% compared to
44
//! in-memory buffering. Results are written to a binary format with
55
//! fixed-size entries for zero-copy random access.
6+
//!
7+
//! # Alignment Requirements
8+
//!
9+
//! The ENTRY_SIZE (512 bytes) is carefully chosen to provide adequate alignment
10+
//! for rkyv's zero-copy deserialization. rkyv typically requires 8-byte alignment,
11+
//! and 512 is a multiple of 16 bytes, ensuring proper alignment for all common
12+
//! data types.
613
714
use memmap2::{MmapMut, MmapOptions};
815
use prtip_core::{ScanResult, ScanResultRkyv};
@@ -12,7 +19,19 @@ use std::path::Path;
1219

1320
const HEADER_SIZE: usize = 64; // Version, entry_count, entry_size, checksum
1421
const ENTRY_SIZE: usize = 512; // Fixed-size entries (padded if needed)
15-
const LENGTH_PREFIX_SIZE: usize = 8; // 8 bytes for length to maintain alignment
22+
const LENGTH_PREFIX_SIZE: usize = 8; // u64 length prefix for each entry
23+
24+
// Compile-time assertion to verify ENTRY_SIZE alignment
25+
const _: () = assert!(
26+
ENTRY_SIZE % 16 == 0,
27+
"ENTRY_SIZE must be a multiple of 16 bytes for rkyv alignment"
28+
);
29+
30+
// Compile-time assertion to verify LENGTH_PREFIX_SIZE alignment
31+
const _: () = assert!(
32+
LENGTH_PREFIX_SIZE == 8,
33+
"LENGTH_PREFIX_SIZE must be 8 bytes for proper alignment"
34+
);
1635

1736
/// Memory-mapped result writer
1837
pub struct MmapResultWriter {
@@ -57,31 +76,36 @@ impl MmapResultWriter {
5776
}
5877

5978
let offset = HEADER_SIZE + (self.entry_count * ENTRY_SIZE);
60-
61-
// Convert to rkyv-compatible type and serialize
62-
let rkyv_result: ScanResultRkyv = result.into();
63-
let entry_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&rkyv_result)
64-
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
6579

66-
// Check if data + length prefix fits
80+
// Convert to rkyv-compatible format
81+
let rkyv_result = ScanResultRkyv::from(result);
82+
83+
// Serialize using rkyv with improved error handling
84+
let entry_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&rkyv_result).map_err(|e| {
85+
let msg = format!("rkyv serialization error (rkyv::rancor::Error): {e:?}");
86+
io::Error::new(io::ErrorKind::InvalidData, msg)
87+
})?;
88+
89+
// Check if entry fits (accounting for length prefix)
6790
if entry_bytes.len() + LENGTH_PREFIX_SIZE > ENTRY_SIZE {
6891
return Err(io::Error::new(
6992
io::ErrorKind::InvalidData,
7093
format!(
71-
"Entry size {} (+{} length) exceeds maximum {}",
94+
"Entry size {} (+ {} length prefix) exceeds maximum {}",
7295
entry_bytes.len(),
7396
LENGTH_PREFIX_SIZE,
7497
ENTRY_SIZE
7598
),
7699
));
77100
}
78101

79-
// Write length prefix (8 bytes for alignment)
80-
let len = entry_bytes.len() as u64;
81-
self.mmap[offset..offset + LENGTH_PREFIX_SIZE].copy_from_slice(&len.to_le_bytes());
102+
// Write length prefix (u64 in little-endian)
103+
let len_bytes = (entry_bytes.len() as u64).to_le_bytes();
104+
self.mmap[offset..offset + LENGTH_PREFIX_SIZE].copy_from_slice(&len_bytes);
82105

83106
// Write serialized data after length prefix
84-
self.mmap[offset + LENGTH_PREFIX_SIZE..offset + LENGTH_PREFIX_SIZE + entry_bytes.len()].copy_from_slice(&entry_bytes);
107+
let data_offset = offset + LENGTH_PREFIX_SIZE;
108+
self.mmap[data_offset..data_offset + entry_bytes.len()].copy_from_slice(&entry_bytes);
85109

86110
// Zero-fill remaining space
87111
for i in (LENGTH_PREFIX_SIZE + entry_bytes.len())..ENTRY_SIZE {
@@ -113,8 +137,9 @@ impl MmapResultWriter {
113137
}
114138

115139
fn write_header(&mut self) -> io::Result<()> {
116-
// Version: 1
117-
self.mmap[0..8].copy_from_slice(&1u64.to_le_bytes());
140+
// Version: 2 (rkyv format with length prefix)
141+
// Version 1 was bincode format (deprecated)
142+
self.mmap[0..8].copy_from_slice(&2u64.to_le_bytes());
118143
// Entry count: 0
119144
self.mmap[8..16].copy_from_slice(&0u64.to_le_bytes());
120145
// Entry size: ENTRY_SIZE

0 commit comments

Comments
 (0)