Skip to content

Commit bbcaa8c

Browse files
committed
fix coloring issue and simplify some parts of the code
1 parent ea61eac commit bbcaa8c

File tree

2 files changed

+118
-108
lines changed

2 files changed

+118
-108
lines changed

src/utils.rs

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,46 @@
11
use ansi_term::Colour;
2-
use std::io::Write;
2+
use std::{io::Write, ops::Range};
33

44
use self::search::Matches;
55

66
pub mod file;
77
pub mod search;
88

9+
#[derive(Debug, Clone)]
10+
pub struct CustomRange {
11+
range: Range<usize>,
12+
}
13+
14+
impl CustomRange {
15+
pub fn new(range: Range<usize>) -> Self {
16+
Self { range }
17+
}
18+
19+
pub fn range(&self) -> Range<usize> {
20+
self.range.clone()
21+
}
22+
}
23+
24+
impl Eq for CustomRange {}
25+
26+
impl PartialEq for CustomRange {
27+
fn eq(&self, other: &Self) -> bool {
28+
self.range.start == other.range.start && self.range.end == other.range.end
29+
}
30+
}
31+
32+
impl PartialOrd for CustomRange {
33+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
34+
Some(self.cmp(other))
35+
}
36+
}
37+
38+
impl Ord for CustomRange {
39+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
40+
self.range.start.cmp(&other.range.start)
41+
}
42+
}
43+
944
#[derive(Debug, PartialEq)]
1045
pub enum PatternType {
1146
Str(String),
@@ -40,61 +75,60 @@ fn strip(src: &str, p: char) -> &str {
4075
}
4176

4277
pub fn print_hexdump_output(matches: &Matches, bytes_per_line: usize) {
43-
let mut offset_iter = matches.offset().iter();
4478
let mut ascii_repr = Vec::new();
4579

46-
let mut offset = 0;
47-
48-
for (i, &byte) in matches.data().iter().enumerate() {
49-
if (i as i64 - bytes_per_line as i64).abs() % bytes_per_line as i64 == 0 {
50-
offset = if let Some(&offset) = offset_iter.next() {
51-
offset
80+
for range in matches.context_bytes_indexes() {
81+
let offset = range.range().start;
82+
print!(
83+
"{}: ",
84+
Colour::Green.paint(format!("{:08X}", offset - (offset % bytes_per_line)))
85+
);
86+
for i in range.range() {
87+
let byte = matches.get_data(i % matches.data_len());
88+
89+
if matches.indexes().contains(&i) {
90+
print!("{} ", Colour::Red.bold().paint(format!("{:02X}", byte)));
91+
ascii_repr.push(format!(
92+
"{}",
93+
Colour::Red.bold().paint(to_ascii_repr(byte).to_string())
94+
));
5295
} else {
53-
offset + bytes_per_line
54-
};
55-
56-
print!(
57-
"{}: ",
58-
Colour::Green.paint(format!("{:08X}", offset - (offset % bytes_per_line)))
59-
);
60-
}
61-
62-
if matches.indexes().iter().any(|indexes| indexes.contains(&i)) {
63-
print!("{} ", Colour::Red.bold().paint(format!("{:02X}", byte)));
64-
ascii_repr.push(format!(
65-
"{}",
66-
Colour::Red.bold().paint(to_ascii_repr(byte).to_string())
67-
));
68-
} else {
69-
print!("{:02X} ", byte);
70-
ascii_repr.push(to_ascii_repr(byte).to_string());
71-
}
96+
print!("{:02X} ", byte);
97+
ascii_repr.push(to_ascii_repr(byte).to_string());
98+
}
7299

73-
if bytes_per_line >= 8 && (i + 1) % 8 == 0 {
74-
print!(" ");
75-
}
100+
if bytes_per_line >= 8 && (i + 1) % 8 == 0 {
101+
print!(" ");
102+
}
76103

77-
if (i + 1) % bytes_per_line == 0 {
78-
print_ascii_repr(&ascii_repr);
79-
ascii_repr.clear();
104+
if (i + 1) % bytes_per_line == 0 {
105+
print_ascii_repr(&ascii_repr);
106+
ascii_repr.clear();
107+
}
80108
}
81109
}
82110

83-
// fix alignment for ascii column when the data buffer lenght it's not multiple of 16
111+
// fix ascii column alignment
84112
if !ascii_repr.is_empty() {
85-
let remaining = bytes_per_line - ascii_repr.len();
86-
for _ in 0..remaining {
87-
print!(" ");
113+
let total_chars_in_line = bytes_per_line * 3 + 2;
114+
let total_chars_bytes_printed = if ascii_repr.len() > 8 {
115+
ascii_repr.len() * 3 + 1
116+
} else {
117+
ascii_repr.len() * 3
118+
};
119+
let total_spaces_to_print = total_chars_in_line - total_chars_bytes_printed;
120+
121+
for _ in 0..total_spaces_to_print {
122+
print!(" ");
88123
}
89-
print!(" ");
90124
print_ascii_repr(&ascii_repr);
91125
}
92126

93127
std::io::stdout().flush().unwrap();
94128
}
95129

96130
fn print_ascii_repr(ascii_repr: &[String]) {
97-
print!(" |");
131+
print!("|");
98132
for ascii in ascii_repr {
99133
print!("{}", ascii);
100134
}

src/utils/search.rs

Lines changed: 45 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,70 @@
1+
use std::collections::BTreeSet;
12
use std::fs::File;
23
use std::io::{BufReader, Read, Seek, SeekFrom};
3-
use std::ops::Range;
4+
5+
use super::CustomRange;
46

57
#[derive(Debug, Clone)]
68
pub struct Matches {
7-
indexes: Vec<Range<usize>>,
9+
indexes: Vec<usize>,
10+
context_bytes_indexes: BTreeSet<CustomRange>,
811
data: Vec<u8>,
9-
offset: Vec<usize>,
10-
pattern_len: usize,
1112
context_bytes_size: usize,
12-
curr_context_bytes_indexes: Option<Range<usize>>,
1313
}
1414

1515
impl Matches {
16-
pub fn new(pattern_len: usize, context_bytes_size: usize) -> Self {
16+
pub fn new(context_bytes_size: usize) -> Self {
1717
Self {
18-
pattern_len,
1918
context_bytes_size,
2019
indexes: Vec::new(),
20+
context_bytes_indexes: BTreeSet::new(),
2121
data: Vec::new(),
22-
offset: Vec::new(),
23-
curr_context_bytes_indexes: None,
2422
}
2523
}
2624

27-
pub fn offset(&self) -> &[usize] {
28-
&self.offset
25+
pub fn context_bytes_indexes(&self) -> &BTreeSet<CustomRange> {
26+
&self.context_bytes_indexes
2927
}
3028

3129
/// Get a reference to the match's index.
32-
pub fn indexes(&self) -> &[Range<usize>] {
30+
pub fn indexes(&self) -> &[usize] {
3331
&self.indexes
3432
}
3533

3634
/// Get a reference to the match's data.
37-
pub fn data(&self) -> &[u8] {
38-
&self.data
35+
pub fn get_data(&self, index: usize) -> u8 {
36+
*self.data.get(index).unwrap()
37+
}
38+
39+
pub fn data_len(&self) -> usize {
40+
self.data.len()
3941
}
4042

4143
pub fn is_empty(&self) -> bool {
4244
self.data.is_empty() && self.indexes.is_empty() && self.indexes.is_empty()
4345
}
4446

45-
fn populate_matches(&mut self, index: usize, pos_in_file: usize, buffer: &[u8]) {
46-
// index where we should start collecting bytes for context
47-
let offset = index - (index % self.context_bytes_size);
48-
49-
// search_for_slice only return the index where the match start so we need to
50-
// create a range with all indexes from the match
51-
let match_indexes = index..index + self.pattern_len;
52-
53-
// Creates the index range for the context bytes.
54-
// context_bytes_size_indexes can contain all the indexes for the match or partially, depends on
55-
// context_bytes_size and the pattern size
56-
let mut context_bytes_indexes = if offset + self.context_bytes_size <= buffer.len() {
57-
offset..offset + self.context_bytes_size
58-
} else {
59-
offset..buffer.len()
60-
};
61-
62-
// In case context_bytes_size doesn't contain all of the match indexes we
63-
// need to extend the end of the range
64-
if context_bytes_indexes.end < match_indexes.end {
65-
context_bytes_indexes.end += self.context_bytes_size;
66-
}
67-
68-
let context_bytes_indexes = Some(context_bytes_indexes);
69-
if context_bytes_indexes != self.curr_context_bytes_indexes { // Check if context_bytes_indexes was already added
70-
self.curr_context_bytes_indexes = context_bytes_indexes.clone();
71-
72-
// The actual bytes for context + the matching bytes
73-
// needed for printing the result
74-
self.data.extend_from_slice(&buffer[context_bytes_indexes.unwrap()]);
75-
76-
// The index is relative to the position in the current buffer we are
77-
// reading from the file, but we need to store the position relative to the
78-
// whole file
79-
self.offset.push(index + pos_in_file);
80-
}
81-
82-
// Now we need to know the indexes of the match inside of context_bytes
83-
let mut match_indexes = match_indexes.start % self.data.len()
84-
..match_indexes.end % self.data.len();
85-
86-
if match_indexes.end < match_indexes.start {
87-
match_indexes.end = match_indexes.start + self.pattern_len;
47+
fn populate_matches(&mut self, indexes: &[usize], buffer: &[u8]) {
48+
for index in indexes {
49+
// index where we should start collecting bytes for context
50+
let offset = index - (index % self.context_bytes_size);
51+
52+
// Creates the index range for the context bytes.
53+
let context_bytes_indexes = if offset + self.context_bytes_size <= buffer.len() {
54+
CustomRange::new(offset..offset + self.context_bytes_size)
55+
} else {
56+
CustomRange::new(offset..buffer.len())
57+
};
58+
59+
let bytes = &buffer[context_bytes_indexes.range.start..context_bytes_indexes.range.end];
60+
if self.context_bytes_indexes.insert(context_bytes_indexes) {
61+
// The actual bytes for context + the matching bytes
62+
// needed for printing the result
63+
self.data.extend_from_slice(bytes);
64+
}
8865
}
8966

90-
self.indexes.push(match_indexes);
67+
self.indexes.extend_from_slice(indexes);
9168
}
9269
}
9370

@@ -104,7 +81,7 @@ impl<'a> Searcher<'a> {
10481
pub fn new(pattern: &'a [u8], context_bytes_size: usize, skip_bytes: u64) -> Self {
10582
Self {
10683
pattern,
107-
matches: Matches::new(pattern.len(), context_bytes_size),
84+
matches: Matches::new(context_bytes_size),
10885
context_bytes_size,
10986
skip_bytes,
11087
}
@@ -114,21 +91,19 @@ impl<'a> Searcher<'a> {
11491
let mut file = File::open(filepath)?;
11592
let file_size = file.metadata().unwrap().len() as usize;
11693

117-
let mut pos_in_file = file.seek(SeekFrom::Start(self.skip_bytes)).unwrap_or(0) as usize;
94+
let _pos_in_file = file.seek(SeekFrom::Start(self.skip_bytes)).unwrap_or(0) as usize;
11895
let mut reader = BufReader::new(file);
11996

12097
if file_size < self.context_bytes_size {
12198
self.context_bytes_size = file_size;
12299
}
123100

124101
if file_size <= Self::BUFFER_SIZE {
125-
let mut buffer = Vec::new();
102+
let mut buffer = Vec::with_capacity(Self::BUFFER_SIZE);
126103
reader.read_to_end(&mut buffer)?;
127104

128105
let result = Self::search_slice(&buffer, self.pattern);
129-
for index in result {
130-
self.matches.populate_matches(index, 0, &buffer);
131-
}
106+
self.matches.populate_matches(&result, &buffer);
132107
} else {
133108
let mut buffer = [0; Self::BUFFER_SIZE];
134109
loop {
@@ -139,10 +114,9 @@ impl<'a> Searcher<'a> {
139114
}
140115

141116
let result = Self::search_slice(&buffer, self.pattern);
142-
for index in result {
143-
self.matches.populate_matches(index, pos_in_file, &buffer);
144-
}
145-
pos_in_file += Self::BUFFER_SIZE;
117+
self.matches.populate_matches(&result, &buffer);
118+
119+
// pos_in_file += Self::BUFFER_SIZE;
146120
}
147121
}
148122

@@ -164,7 +138,9 @@ impl<'a> Searcher<'a> {
164138

165139
if slice[curr_pos_pattern] == ch {
166140
if curr_pos_pattern == slice.len() - 1 {
167-
match_indexes.push(i - curr_pos_pattern);
141+
let pos = i - curr_pos_pattern;
142+
match_indexes
143+
.extend_from_slice(&(pos..pos + slice.len()).collect::<Vec<usize>>());
168144
curr_pos_pattern = table_of_ocurrencies[curr_pos_pattern];
169145
} else {
170146
curr_pos_pattern += 1;

0 commit comments

Comments
 (0)