Skip to content

Commit ea61e0b

Browse files
Merge pull request #7 from peterfication/restructure-code
Restructure the code
2 parents 85db269 + c2e09b2 commit ea61e0b

File tree

7 files changed

+159
-77
lines changed

7 files changed

+159
-77
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
## [0.1.1] - 2025-09-12
6+
7+
- Restructure the code
8+
59
## [0.1.0] - 2025-09-12
610

711
Initial Release

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "num-peek"
3-
version = "0.1.0"
3+
version = "0.1.1"
44
authors = ["Peter Morgenstern <mail@petergundel.de>"]
55
edition = "2024"
66
homepage = "https://github.com/peterfication/num-peek"

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ just run-float
5858
- Total Elements
5959
- Size/memory usage
6060
- Mean, Standard Deviation, Median
61-
- Improve code organization
62-
- Publish crate and add installation instructions
61+
- Different output formats: text, json, ...
6362

6463
## License
6564

src/analyze.rs

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
use std::collections::HashSet;
2+
3+
use ordered_float::OrderedFloat;
4+
5+
/// A struct to hold the results of the NPY file analysis.
6+
#[derive(Debug)]
7+
pub struct NpyAnalysis {
8+
pub dimensions: usize,
9+
pub shape: Vec<u64>,
10+
pub dtype_string: String,
11+
pub stats: Option<ValueStats>,
12+
}
13+
14+
/// An enum to hold statistics for different supported numeric types.
15+
#[derive(Debug)]
16+
pub enum ValueStats {
17+
F64 {
18+
unique_values: Vec<f64>,
19+
min: f64,
20+
max: f64,
21+
},
22+
I64 {
23+
unique_values: Vec<i64>,
24+
min: i64,
25+
max: i64,
26+
},
27+
}
28+
29+
/// Analyzes the NPY file and returns a struct with the results.
30+
pub fn analyze_npy(file_path: &str) -> Result<NpyAnalysis, Box<dyn std::error::Error>> {
31+
let bytes = std::fs::read(file_path)?;
32+
let npy = npyz::NpyFile::new(&bytes[..])?;
33+
34+
let header = npy.header();
35+
let dimensions = header.shape().len();
36+
let shape = header.shape().to_vec();
37+
let dtype = npy.header().dtype();
38+
39+
let (dtype_string, stats) = match dtype {
40+
npyz::DType::Plain(plain) => {
41+
let dtype_str = format!("{:?}{}", plain.type_char(), plain.size_field());
42+
43+
let stats = match (plain.type_char(), plain.size_field()) {
44+
(npyz::TypeChar::Float, 8) => {
45+
let data: Vec<f64> = npy.data::<f64>()?.collect::<Result<_, _>>()?;
46+
if data.is_empty() {
47+
None
48+
} else {
49+
let mut unique_numbers: Vec<_> = HashSet::<OrderedFloat<f64>>::from_iter(
50+
data.into_iter().map(OrderedFloat),
51+
)
52+
.into_iter()
53+
.collect();
54+
unique_numbers.sort_unstable();
55+
56+
match (unique_numbers.first(), unique_numbers.last()) {
57+
(Some(first), Some(last)) => Some(ValueStats::F64 {
58+
min: first.0,
59+
max: last.0,
60+
unique_values: unique_numbers.into_iter().map(|n| n.0).collect(),
61+
}),
62+
_ => unreachable!(
63+
"unique_numbers should not be empty due to is_empty check"
64+
),
65+
}
66+
}
67+
}
68+
(npyz::TypeChar::Int, 8) => {
69+
let data: Vec<i64> = npy.data::<i64>()?.collect::<Result<_, _>>()?;
70+
if data.is_empty() {
71+
None
72+
} else {
73+
let mut unique_numbers: Vec<_> =
74+
HashSet::<i64>::from_iter(data).into_iter().collect();
75+
unique_numbers.sort_unstable();
76+
77+
Some(ValueStats::I64 {
78+
min: *unique_numbers
79+
.first()
80+
.expect("unique_numbers should not be empty due to is_empty check"),
81+
max: *unique_numbers
82+
.last()
83+
.expect("unique_numbers should not be empty due to is_empty check"),
84+
unique_values: unique_numbers,
85+
})
86+
}
87+
}
88+
_ => None, // Unsupported type for detailed stats
89+
};
90+
(dtype_str, stats)
91+
}
92+
_ => (format!("{dtype:?}"), None),
93+
};
94+
95+
Ok(NpyAnalysis {
96+
dimensions,
97+
shape,
98+
dtype_string,
99+
stats,
100+
})
101+
}

src/main.rs

Lines changed: 9 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
use std::collections::HashSet;
2-
31
use clap::Parser;
4-
use ordered_float::OrderedFloat;
2+
3+
use crate::{analyze::analyze_npy, present::present_analysis};
4+
5+
pub mod analyze;
6+
pub mod present;
57

68
#[derive(Parser)]
79
#[command(author, version)]
@@ -24,80 +26,14 @@ fn validate_npy(string: &str) -> Result<String, String> {
2426
fn main() {
2527
let cli = Cli::parse();
2628

27-
// Make sure the file exists
28-
// If not, print an error message and exit
29+
// Make sure the file exists before proceeding.
2930
if !std::path::Path::new(&cli.file_path).exists() {
3031
eprintln!("Error: File '{}' does not exist.", cli.file_path);
3132
std::process::exit(1);
3233
}
3334

34-
println!("Peek into {}", cli.file_path);
35-
println!("----------------------------------------");
36-
analyze_npy(&cli.file_path).expect("Failed to analyze the npy file");
37-
}
38-
39-
fn analyze_npy(file_path: &str) -> Result<(), Box<dyn std::error::Error>> {
40-
let bytes = std::fs::read(file_path)?;
41-
42-
let npy = npyz::NpyFile::new(&bytes[..])?;
43-
44-
println!("Dimensions: {}", npy.header().shape().len());
45-
println!("Shape: {:?}", npy.header().shape());
46-
let dtype = npy.header().dtype();
47-
48-
match dtype {
49-
npyz::DType::Plain(plain) => {
50-
println!("Type: {:?}{}", plain.type_char(), plain.size_field());
51-
println!("----------------------------------------");
52-
53-
match plain.type_char() {
54-
npyz::TypeChar::Float if plain.size_field() == 8 => {
55-
let unique_numbers: HashSet<OrderedFloat<f64>> = npy
56-
.data::<f64>()?
57-
.map(|n| n.map(OrderedFloat))
58-
.collect::<Result<HashSet<_>, _>>()?;
59-
60-
// Sort the unique numbers for consistent output
61-
let unique_numbers: Vec<OrderedFloat<f64>> = {
62-
let mut nums: Vec<OrderedFloat<f64>> = unique_numbers.into_iter().collect();
63-
nums.sort_unstable();
64-
nums
65-
};
66-
67-
let min_value = unique_numbers.iter().min().unwrap();
68-
let max_value = unique_numbers.iter().max().unwrap();
69-
70-
println!("Number of unique values: {}", unique_numbers.len());
71-
println!("Unique values: {unique_numbers:?}");
72-
println!("Min value: {min_value:?}");
73-
println!("Max value: {max_value:?}");
74-
}
75-
npyz::TypeChar::Int if plain.size_field() == 8 => {
76-
let unique_numbers: HashSet<i64> =
77-
npy.data::<i64>()?.collect::<Result<HashSet<_>, _>>()?;
78-
79-
// Sort the unique numbers for consistent output
80-
let unique_numbers: Vec<i64> = {
81-
let mut nums: Vec<i64> = unique_numbers.into_iter().collect();
82-
nums.sort_unstable();
83-
nums
84-
};
85-
86-
let max_value = unique_numbers.iter().max().unwrap();
87-
let min_value = unique_numbers.iter().min().unwrap();
88-
89-
println!("Number of unique values: {}", unique_numbers.len());
90-
println!("Unique values: {unique_numbers:?}");
91-
println!("Min value: {min_value:?}");
92-
println!("Max value: {max_value:?}");
93-
}
94-
_ => {
95-
println!("Unsupported dtype for unique value calculation");
96-
}
97-
}
98-
}
99-
_ => return Err("Unsupported dtype".into()),
35+
match analyze_npy(&cli.file_path) {
36+
Ok(analysis) => present_analysis(&cli.file_path, &analysis),
37+
Err(error) => eprintln!("Error analyzing file: {error}"),
10038
}
101-
102-
Ok(())
10339
}

src/present.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use crate::analyze::{NpyAnalysis, ValueStats};
2+
3+
/// Presents the analysis results to the console.
4+
pub fn present_analysis(file_path: &str, analysis: &NpyAnalysis) {
5+
println!("Peek into {file_path}");
6+
println!("----------------------------------------");
7+
println!("Dimensions: {}", analysis.dimensions);
8+
println!("Shape: {:?}", analysis.shape);
9+
println!("Type: {}", analysis.dtype_string);
10+
println!("----------------------------------------");
11+
12+
match &analysis.stats {
13+
Some(ValueStats::F64 {
14+
unique_values,
15+
min,
16+
max,
17+
}) => {
18+
print_stats(unique_values, min, max);
19+
}
20+
Some(ValueStats::I64 {
21+
unique_values,
22+
min,
23+
max,
24+
}) => {
25+
print_stats(unique_values, min, max);
26+
}
27+
None => {
28+
println!("Unsupported dtype for unique value calculation");
29+
}
30+
}
31+
}
32+
33+
fn print_stats<T, U>(unique_values: &U, min: &T, max: &T)
34+
where
35+
T: std::fmt::Debug + std::fmt::Display,
36+
U: std::fmt::Debug + std::ops::Deref<Target = [T]>,
37+
{
38+
println!("Number of unique values: {}", unique_values.len());
39+
println!("Unique values: {unique_values:?}");
40+
println!("Min value: {min}");
41+
println!("Max value: {max}");
42+
}

0 commit comments

Comments
 (0)