Skip to content

Commit 5166931

Browse files
committed
Allow indexing by names
1 parent 76de6a6 commit 5166931

File tree

2 files changed

+69
-3
lines changed

2 files changed

+69
-3
lines changed

src/main.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ enum Commands {
5757
#[arg(long, required_unless_present = "rows")]
5858
cols: Option<String>,
5959

60+
/// Index by row names rather than indices
61+
#[arg(long)]
62+
rownames: bool,
63+
64+
/// Index by column names rather than indices
65+
#[arg(long)]
66+
colnames: bool,
67+
6068
/// Do not reindex the output matrix (keep original indices)
6169
#[arg(long)]
6270
no_reindex: bool,
@@ -89,6 +97,8 @@ fn main() -> Result<(), Box<dyn Error>> {
8997
output,
9098
rows,
9199
cols,
100+
rownames,
101+
colnames,
92102
no_reindex,
93103
} => {
94104

@@ -97,6 +107,8 @@ fn main() -> Result<(), Box<dyn Error>> {
97107
&output,
98108
rows,
99109
cols,
110+
rownames,
111+
colnames,
100112
no_reindex,
101113
)?;
102114
}

src/subset.rs

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ pub fn subset_matrix(
1010
output: &str,
1111
rows_file: Option<String>,
1212
cols_file: Option<String>,
13+
rownames: bool,
14+
colnames: bool,
1315
no_reindex: bool,
1416
) -> Result<(), Box<dyn Error>> {
1517

@@ -21,15 +23,29 @@ pub fn subset_matrix(
2123
// check if input is a file or directory
2224
let is_directory = input.as_ref().map_or(false, |path| Path::new(path).is_dir());
2325

26+
if rownames || colnames {
27+
if is_directory {
28+
return Err("Input must be a file when using names".into());
29+
}
30+
}
31+
2432
// Read the indices to retain
2533
let rows_to_retain = if let Some(ref file_path) = rows_file {
26-
read_indices(file_path)?
34+
if rownames {
35+
map_names_to_indices(&input, true, file_path)?
36+
} else {
37+
read_indices(file_path)?
38+
}
2739
} else {
28-
Vec::new() // Empty vector indicates all rows are retained
40+
Vec::new()
2941
};
3042

3143
let cols_to_retain = if let Some(ref file_path) = cols_file {
32-
read_indices(file_path)?
44+
if colnames {
45+
map_names_to_indices(&input, false, file_path)?
46+
} else {
47+
read_indices(file_path)?
48+
}
3349
} else {
3450
Vec::new() // Empty vector indicates all columns are retained
3551
};
@@ -311,4 +327,42 @@ fn parse_header_line(header_lines: &Vec<String>) -> Result<(usize, usize, usize)
311327
}
312328
}
313329
Err("Error parsing header line for matrix dimensions.".into())
330+
}
331+
332+
fn map_names_to_indices(
333+
input_dir: &Option<String>,
334+
is_rows: bool,
335+
names_file: &str,
336+
) -> Result<Vec<usize>, Box<dyn Error>> {
337+
let dir = input_dir.as_ref().ok_or("Input directory required when using names")?;
338+
339+
// Determine which file to read based on whether we're mapping rows or columns
340+
let mapping_file = if is_rows {
341+
format!("{}/features.tsv.gz", dir)
342+
} else {
343+
format!("{}/barcodes.tsv.gz", dir)
344+
};
345+
346+
// Read the mapping file and create name->index mapping
347+
let mut name_to_index = HashMap::<String, usize>::new();
348+
let reader = io_utils::get_reader(&mapping_file)?;
349+
350+
for (idx, line_result) in reader.lines().enumerate() {
351+
let line = line_result?;
352+
let parts: Vec<&str> = line.split('\t').collect();
353+
if !parts.is_empty() {
354+
let name = parts[0].trim().to_string();
355+
name_to_index.insert(name, idx + 1);
356+
}
357+
}
358+
359+
// Read the names file and convert to indices
360+
let names = io_utils::get_reader(&names_file)?;
361+
let mut indices = Vec::new();
362+
for line_result in names.lines() {
363+
let line = line_result?;
364+
let name = line.trim().to_string();
365+
indices.push(name_to_index.get(&name).copied().unwrap_or(0));
366+
}
367+
Ok(indices)
314368
}

0 commit comments

Comments
 (0)