Skip to content

Commit 7fcc571

Browse files
cargo fmt
1 parent 20bdd24 commit 7fcc571

File tree

12 files changed

+577
-295
lines changed

12 files changed

+577
-295
lines changed

src/agc_index.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@ impl fmt::Debug for AgcIndex {
2121
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2222
f.debug_struct("AgcIndex")
2323
.field("agc_paths", &self.agc_paths)
24-
.field("num_decompressors", &self.decompressors.lock().unwrap().len())
24+
.field(
25+
"num_decompressors",
26+
&self.decompressors.lock().unwrap().len(),
27+
)
2528
.finish_non_exhaustive()
2629
}
2730
}
@@ -43,7 +46,8 @@ impl AgcIndex {
4346
Arc::clone(arc)
4447
} else {
4548
let arc: Arc<str> = s.into();
46-
self.interned_strings.insert(s.to_string(), Arc::clone(&arc));
49+
self.interned_strings
50+
.insert(s.to_string(), Arc::clone(&arc));
4751
arc
4852
}
4953
}
@@ -100,22 +104,31 @@ impl AgcIndex {
100104
index.sample_contig_to_agc.insert(key, agc_idx);
101105

102106
// Key: contig alone (if unique)
103-
index.sample_contig_to_agc
107+
index
108+
.sample_contig_to_agc
104109
.entry(contig.clone())
105110
.or_insert(agc_idx);
106111

107112
// Contig -> (sample, contig, agc_idx) - values use Arc<str>
108-
index.contig_to_sample_info
113+
index
114+
.contig_to_sample_info
109115
.entry(contig.clone())
110116
.or_insert((Arc::clone(&sample_arc), Arc::clone(&contig_arc), agc_idx));
111117

112118
// Handle short contig name if different
113119
let short_contig = Self::extract_short_contig_name(&contig);
114120
if short_contig != contig {
115121
let short_key = format!("{short_contig}@{sample}");
116-
index.sample_contig_to_agc.entry(short_key).or_insert(agc_idx);
117-
index.sample_contig_to_agc.entry(short_contig.to_string()).or_insert(agc_idx);
118-
index.contig_to_sample_info
122+
index
123+
.sample_contig_to_agc
124+
.entry(short_key)
125+
.or_insert(agc_idx);
126+
index
127+
.sample_contig_to_agc
128+
.entry(short_contig.to_string())
129+
.or_insert(agc_idx);
130+
index
131+
.contig_to_sample_info
119132
.entry(short_contig.to_string())
120133
.or_insert((Arc::clone(&sample_arc), Arc::clone(&contig_arc), agc_idx));
121134
}
@@ -143,7 +156,9 @@ impl AgcIndex {
143156
if let Some((contig, sample)) = seq_name.split_once('@') {
144157
let agc_idx = self.sample_contig_to_agc.get(seq_name).copied();
145158
(sample.to_string(), contig.to_string(), agc_idx)
146-
} else if let Some((sample, full_contig, agc_idx)) = self.contig_to_sample_info.get(seq_name) {
159+
} else if let Some((sample, full_contig, agc_idx)) =
160+
self.contig_to_sample_info.get(seq_name)
161+
{
147162
(sample.to_string(), full_contig.to_string(), Some(*agc_idx))
148163
} else {
149164
(String::new(), seq_name.to_string(), None)
@@ -197,9 +212,7 @@ impl AgcIndex {
197212
let length = decompressors[agc_idx]
198213
.get_contig_length(&sample, &contig)
199214
.map_err(|e| {
200-
io::Error::other(format!(
201-
"Failed to get length for '{contig}@{sample}': {e}"
202-
))
215+
io::Error::other(format!("Failed to get length for '{contig}@{sample}': {e}"))
203216
})?;
204217

205218
Ok(length)

src/commands/align.rs

Lines changed: 46 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,9 @@ fn sketch_sequence(sequence: &[u8], k: usize, sketch_size: usize) -> Vec<u64> {
238238
let kmer = &sequence[i..i + k];
239239

240240
// Skip k-mers with non-ACGT characters
241-
if kmer.iter().any(|&b| !matches!(b, b'A' | b'C' | b'G' | b'T' | b'a' | b'c' | b'g' | b't'))
241+
if kmer
242+
.iter()
243+
.any(|&b| !matches!(b, b'A' | b'C' | b'G' | b'T' | b'a' | b'c' | b'g' | b't'))
242244
{
243245
continue;
244246
}
@@ -464,51 +466,57 @@ fn load_sequences(
464466
) -> io::Result<Vec<SequenceInfo>> {
465467
let sequences = Mutex::new(Vec::new());
466468

467-
fasta_files.par_iter().try_for_each(|path| -> io::Result<()> {
468-
let file = File::open(path)?;
469-
let (reader, _format) = niffler::get_reader(Box::new(file))
470-
.map_err(|e| io::Error::other(format!("Failed to open {}: {}", path, e)))?;
471-
let reader = BufReader::new(reader);
472-
473-
let mut current_name: Option<String> = None;
474-
let mut current_seq = Vec::new();
475-
476-
for line in reader.lines() {
477-
let line = line?;
478-
if let Some(header) = line.strip_prefix('>') {
479-
// Process previous sequence
480-
if let Some(name) = current_name.take() {
481-
let sketch = sketch_sequence(&current_seq, kmer_size, sketch_size);
482-
sequences.lock().unwrap().push(SequenceInfo {
483-
name,
484-
path: path.clone(),
485-
sketch,
486-
});
469+
fasta_files
470+
.par_iter()
471+
.try_for_each(|path| -> io::Result<()> {
472+
let file = File::open(path)?;
473+
let (reader, _format) = niffler::get_reader(Box::new(file))
474+
.map_err(|e| io::Error::other(format!("Failed to open {}: {}", path, e)))?;
475+
let reader = BufReader::new(reader);
476+
477+
let mut current_name: Option<String> = None;
478+
let mut current_seq = Vec::new();
479+
480+
for line in reader.lines() {
481+
let line = line?;
482+
if let Some(header) = line.strip_prefix('>') {
483+
// Process previous sequence
484+
if let Some(name) = current_name.take() {
485+
let sketch = sketch_sequence(&current_seq, kmer_size, sketch_size);
486+
sequences.lock().unwrap().push(SequenceInfo {
487+
name,
488+
path: path.clone(),
489+
sketch,
490+
});
491+
}
492+
current_name = Some(header.split_whitespace().next().unwrap_or("").to_string());
493+
current_seq.clear();
494+
} else {
495+
current_seq.extend(line.trim().as_bytes());
487496
}
488-
current_name = Some(header.split_whitespace().next().unwrap_or("").to_string());
489-
current_seq.clear();
490-
} else {
491-
current_seq.extend(line.trim().as_bytes());
492497
}
493-
}
494498

495-
// Don't forget last sequence
496-
if let Some(name) = current_name {
497-
let sketch = sketch_sequence(&current_seq, kmer_size, sketch_size);
498-
sequences.lock().unwrap().push(SequenceInfo {
499-
name,
500-
path: path.clone(),
501-
sketch,
502-
});
503-
}
499+
// Don't forget last sequence
500+
if let Some(name) = current_name {
501+
let sketch = sketch_sequence(&current_seq, kmer_size, sketch_size);
502+
sequences.lock().unwrap().push(SequenceInfo {
503+
name,
504+
path: path.clone(),
505+
sketch,
506+
});
507+
}
504508

505-
Ok(())
506-
})?;
509+
Ok(())
510+
})?;
507511

508512
let mut result = sequences.into_inner().unwrap();
509513

510514
if show_progress {
511-
info!("Loaded {} sequences from {} files", result.len(), fasta_files.len());
515+
info!(
516+
"Loaded {} sequences from {} files",
517+
result.len(),
518+
fasta_files.len()
519+
);
512520
}
513521

514522
// Sort by name for reproducibility

src/commands/graph.rs

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,13 @@ impl Default for GraphBuildConfig {
9797
// Filtering options
9898
no_filter: false,
9999
num_mappings: "1:1".to_string(),
100-
scaffold_jump: 50_000, // 50kb default scaffold gap
101-
scaffold_mass: 10_000, // 10kb minimum scaffold length
102-
scaffold_filter: "1:1".to_string(), // 1:1 scaffold filtering (now fixed in sweepga 608547a)
100+
scaffold_jump: 50_000, // 50kb default scaffold gap
101+
scaffold_mass: 10_000, // 10kb minimum scaffold length
102+
scaffold_filter: "1:1".to_string(), // 1:1 scaffold filtering (now fixed in sweepga 608547a)
103103
overlap: 0.95,
104104
min_identity: 0.0,
105-
scaffold_dist: 0, // No deviation limit by default
106-
min_mapping_length: 0, // No minimum mapping length by default
105+
scaffold_dist: 0, // No deviation limit by default
106+
min_mapping_length: 0, // No minimum mapping length by default
107107
}
108108
}
109109
}
@@ -119,8 +119,9 @@ fn count_sequences_and_genomes_in_fasta(fasta_paths: &[String]) -> io::Result<(u
119119
for path in fasta_paths {
120120
let file = File::open(path)?;
121121
// Use niffler to auto-detect compression
122-
let (reader, _format) = niffler::get_reader(Box::new(file))
123-
.map_err(|e| io::Error::other(format!("Failed to open reader for '{}': {}", path, e)))?;
122+
let (reader, _format) = niffler::get_reader(Box::new(file)).map_err(|e| {
123+
io::Error::other(format!("Failed to open reader for '{}': {}", path, e))
124+
})?;
124125
let reader = BufReader::new(reader);
125126

126127
for line in reader.lines() {
@@ -231,10 +232,7 @@ pub fn build_graph<W: Write>(
231232
// Also configure seqwish's internal temp file handling
232233
seqwish::tempfile::set_dir(temp_dir);
233234
if config.show_progress {
234-
info!(
235-
"[graph::temp] Using temp directory: {}",
236-
temp_dir
237-
);
235+
info!("[graph::temp] Using temp directory: {}", temp_dir);
238236
}
239237
}
240238

@@ -266,16 +264,15 @@ pub fn build_graph<W: Write>(
266264
// 2) Create combined FASTA for alignment
267265
// sweepga's FastGA needs a single combined FASTA file for all-vs-all alignment
268266
// FastGA requires .fa extension to recognize the file format
269-
let combined_fasta = tempfile::Builder::new()
270-
.suffix(".fa")
271-
.tempfile()?;
267+
let combined_fasta = tempfile::Builder::new().suffix(".fa").tempfile()?;
272268
{
273269
let mut writer = BufWriter::new(&combined_fasta);
274270
for path in fasta_files {
275271
let file = File::open(path)?;
276272
// Use niffler to auto-detect compression
277-
let (reader, _format) = niffler::get_reader(Box::new(file))
278-
.map_err(|e| io::Error::other(format!("Failed to open reader for '{}': {}", path, e)))?;
273+
let (reader, _format) = niffler::get_reader(Box::new(file)).map_err(|e| {
274+
io::Error::other(format!("Failed to open reader for '{}': {}", path, e))
275+
})?;
279276
let reader = BufReader::new(reader);
280277
for line in reader.lines() {
281278
let line: String = line?;
@@ -302,9 +299,7 @@ pub fn build_graph<W: Write>(
302299
input_paf
303300
);
304301
}
305-
let paf_temp = tempfile::Builder::new()
306-
.suffix(".paf")
307-
.tempfile()?;
302+
let paf_temp = tempfile::Builder::new().suffix(".paf").tempfile()?;
308303
std::fs::copy(input_paf, paf_temp.path())?;
309304
paf_temp
310305
} else {
@@ -606,8 +601,12 @@ pub fn build_graph<W: Write>(
606601
)?;
607602
}
608603

609-
let gfa_string = String::from_utf8(gfa_buffer)
610-
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("Invalid UTF-8 in GFA: {}", e)))?;
604+
let gfa_string = String::from_utf8(gfa_buffer).map_err(|e| {
605+
io::Error::new(
606+
io::ErrorKind::InvalidData,
607+
format!("Invalid UTF-8 in GFA: {}", e),
608+
)
609+
})?;
611610

612611
// 10) Sort GFA using gfasort's Ygs pipeline (path-guided SGD + grooming + topological sort)
613612
if config.show_progress {

src/commands/partition.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ pub fn partition_alignments(
239239
min_identity,
240240
sequence_index,
241241
approximate_mode,
242-
None, // No subset filter for partition
242+
None, // No subset filter for partition
243243
)
244244
} else {
245245
impg.query_transitive_bfs(
@@ -255,7 +255,7 @@ pub fn partition_alignments(
255255
min_identity,
256256
sequence_index,
257257
approximate_mode,
258-
None, // No subset filter for partition
258+
None, // No subset filter for partition
259259
)
260260
};
261261
//let query_time = query_start.elapsed();

src/graph.rs

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -334,12 +334,15 @@ pub fn prepare_poa_graph_and_sequences(
334334
let mut processed_sequences: Vec<(String, SequenceMetadata)> = results
335335
.par_iter()
336336
.map(|interval| -> io::Result<(String, SequenceMetadata)> {
337-
let seq_name = impg.seq_index().get_name(interval.metadata).ok_or_else(|| {
338-
io::Error::new(
339-
io::ErrorKind::NotFound,
340-
format!("Sequence name not found for ID {}", interval.metadata),
341-
)
342-
})?;
337+
let seq_name = impg
338+
.seq_index()
339+
.get_name(interval.metadata)
340+
.ok_or_else(|| {
341+
io::Error::new(
342+
io::ErrorKind::NotFound,
343+
format!("Sequence name not found for ID {}", interval.metadata),
344+
)
345+
})?;
343346

344347
// Get total sequence length
345348
let total_length = impg
@@ -432,12 +435,15 @@ pub fn prepare_sequences(
432435
.par_iter()
433436
.map(|interval| -> std::io::Result<(String, SequenceMetadata)> {
434437
// Resolve sequence name
435-
let seq_name = impg.seq_index().get_name(interval.metadata).ok_or_else(|| {
436-
std::io::Error::new(
437-
std::io::ErrorKind::NotFound,
438-
format!("Sequence name not found for ID {}", interval.metadata),
439-
)
440-
})?;
438+
let seq_name = impg
439+
.seq_index()
440+
.get_name(interval.metadata)
441+
.ok_or_else(|| {
442+
std::io::Error::new(
443+
std::io::ErrorKind::NotFound,
444+
format!("Sequence name not found for ID {}", interval.metadata),
445+
)
446+
})?;
441447

442448
// Resolve total contig length
443449
let total_length = impg
@@ -695,11 +701,16 @@ pub fn sort_gfa(gfa_content: &str, num_threads: usize) -> io::Result<String> {
695701

696702
// Write sorted GFA to string
697703
let mut sorted_output = Vec::new();
698-
graph.write_gfa(&mut sorted_output)
704+
graph
705+
.write_gfa(&mut sorted_output)
699706
.map_err(|e| io::Error::other(format!("Failed to write sorted GFA: {}", e)))?;
700707

701-
String::from_utf8(sorted_output)
702-
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("Invalid UTF-8 in sorted GFA: {}", e)))
708+
String::from_utf8(sorted_output).map_err(|e| {
709+
io::Error::new(
710+
io::ErrorKind::InvalidData,
711+
format!("Invalid UTF-8 in sorted GFA: {}", e),
712+
)
713+
})
703714
}
704715

705716
/// Configuration for seqwish-based GFA generation
@@ -889,8 +900,12 @@ pub fn generate_gfa_seqwish_from_intervals(
889900
)?;
890901
}
891902

892-
let gfa_string = String::from_utf8(gfa_output)
893-
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, format!("Invalid UTF-8 in GFA: {}", e)))?;
903+
let gfa_string = String::from_utf8(gfa_output).map_err(|e| {
904+
io::Error::new(
905+
io::ErrorKind::InvalidData,
906+
format!("Invalid UTF-8 in GFA: {}", e),
907+
)
908+
})?;
894909

895910
// Clean up seqwish temp files before returning
896911
seqwish::tempfile::cleanup();

0 commit comments

Comments
 (0)