Skip to content

Commit f11789e

Browse files
KuthuruTejaswiniMacBook Airpre-commit-ci[bot]
authored
Added compressed parameter to read_10x_mtx to support STARsolo output (#3564)
Co-authored-by: MacBook Air <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 569bf05 commit f11789e

File tree

3 files changed

+36
-1
lines changed

3 files changed

+36
-1
lines changed

docs/release-notes/3564.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added a new compressed parameter to the read_10x_mtx function to support reading uncompressed matrix files produced by tools like STARsolo. This parameter allows users to read uncompressed outputs from tools that don't produce gzipped files by default.

src/scanpy/readwrite.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@ def read_10x_mtx(
553553
cache_compression: Literal["gzip", "lzf"] | None | Empty = _empty,
554554
gex_only: bool = True,
555555
prefix: str | None = None,
556+
compressed: bool = True,
556557
) -> AnnData:
557558
"""Read 10x-Genomics-formatted mtx directory.
558559
@@ -579,6 +580,11 @@ def read_10x_mtx(
579580
if the files are named `patientA_matrix.mtx`, `patientA_genes.tsv` and
580581
`patientA_barcodes.tsv` the prefix is `patientA_`.
581582
(Default: no prefix)
583+
compressed
584+
Whether to expect Cell Ranger v3+ files (.mtx, features.tsv, barcodes.tsv)
585+
to be gzipped. If True, '.gz' suffix is appended to filenames.
586+
Set to False for STARsolo output.
587+
Has no effect on legacy (v2-) files.
582588
583589
Returns
584590
-------
@@ -596,6 +602,7 @@ def read_10x_mtx(
596602
cache_compression=cache_compression,
597603
prefix=prefix,
598604
is_legacy=is_legacy,
605+
compressed=compressed,
599606
)
600607
if is_legacy or not gex_only:
601608
return adata
@@ -612,9 +619,11 @@ def _read_10x_mtx(
612619
cache_compression: Literal["gzip", "lzf"] | None | Empty = _empty,
613620
prefix: str = "",
614621
is_legacy: bool,
622+
compressed: bool = True,
615623
) -> AnnData:
616624
"""Read mex from output from Cell Ranger v2- or v3+."""
617-
suffix = "" if is_legacy else ".gz"
625+
# Only append .gz if not a legacy file AND compression is requested
626+
suffix = "" if is_legacy else (".gz" if compressed else "")
618627
adata = read(
619628
path / f"{prefix}matrix.mtx{suffix}",
620629
cache=cache,

tests/test_read_10x.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,28 @@ def test_10x_probe_barcode_read():
175175
assert set(probe_anndata.obs.columns) == {"filtered_barcodes"}
176176
assert probe_anndata.shape == (4987, 1000)
177177
assert probe_anndata.X.nnz == 858
178+
179+
180+
def test_read_10x_compressed_parameter(tmp_path):
181+
"""Test that the compressed parameter works correctly."""
182+
# Copy test data to temp directory
183+
mtx_path_v3 = ROOT / "3.0.0" / "filtered_feature_bc_matrix"
184+
test_path = tmp_path / "test_compressed"
185+
test_path.mkdir()
186+
187+
# Create uncompressed copies of the compressed files
188+
for file in mtx_path_v3.glob("*.gz"):
189+
import gzip
190+
191+
with gzip.open(file, "rb") as f_in:
192+
content = f_in.read()
193+
dest_file = test_path / file.name[:-3] # Removes .gz extension
194+
with dest_file.open("wb") as f_out:
195+
f_out.write(content)
196+
197+
# Read the uncompressed data
198+
adata_uncompressed = sc.read_10x_mtx(test_path, compressed=False)
199+
# Read the compressed data
200+
adata_compressed = sc.read_10x_mtx(mtx_path_v3, compressed=True)
201+
# Check that the two AnnData objects are equal
202+
assert_anndata_equal(adata_uncompressed, adata_compressed)

0 commit comments

Comments
 (0)