|
4 | 4 | import logging |
5 | 5 | import os |
6 | 6 | import re |
7 | | -import tempfile |
8 | 7 | import warnings |
9 | | -import zipfile |
10 | 8 | from pathlib import Path |
11 | 9 | from types import MappingProxyType |
12 | 10 | from typing import TYPE_CHECKING, Any |
@@ -245,7 +243,10 @@ def xenium( |
245 | 243 | labels_models_kwargs=labels_models_kwargs, |
246 | 244 | ) |
247 | 245 | if cell_labels_indices_mapping is not None and table is not None: |
248 | | - if not pd.DataFrame.equals(cell_labels_indices_mapping["cell_id"], table.obs[str(XeniumKeys.CELL_ID)]): |
| 246 | + if not pd.DataFrame.equals( |
| 247 | + cell_labels_indices_mapping["cell_id"], |
| 248 | + table.obs[str(XeniumKeys.CELL_ID)], |
| 249 | + ): |
249 | 250 | warnings.warn( |
250 | 251 | "The cell_id column in the cell_labels_table does not match the cell_id column derived from the " |
251 | 252 | "cell labels data. This could be due to trying to read a new version that is not supported yet. " |
@@ -384,9 +385,6 @@ def filter(self, record: logging.LogRecord) -> bool: |
384 | 385 |
|
385 | 386 | if table is not None: |
386 | 387 | tables["table"] = table |
387 | | - # valid_nucleus_mask = ~table.obs[XeniumKeys.CELL_ID].isin(invalid_nuc_ids) |
388 | | - # valid_cell_mask = ~table.obs[XeniumKeys.CELL_ID].isin(invalid_cell_ids) |
389 | | - # tables["table"] = table[valid_nucleus_mask & valid_cell_mask].copy() |
390 | 388 |
|
391 | 389 | elements_dict = { |
392 | 390 | "images": images, |
@@ -424,27 +422,6 @@ def _get_polygons( |
424 | 422 | # seems to be faster than pd.read_parquet |
425 | 423 | df = pq.read_table(path / file).to_pandas() |
426 | 424 |
|
427 | | - # df[XeniumKeys.CELL_ID] = _decode_cell_id_column(df[XeniumKeys.CELL_ID]) |
428 | | - # # filter out cell ids with too few vertices to form a valid polygon. |
429 | | - # invalid_ids = df.groupby(XeniumKeys.CELL_ID).filter(lambda x: len(x) < 3)[ |
430 | | - # XeniumKeys.CELL_ID].unique() |
431 | | - # invalid_ids = [] if len(invalid_ids) == 0 else invalid_ids |
432 | | - # |
433 | | - # if len(invalid_ids) > 0: |
434 | | - # logging.warning( |
435 | | - # f"Found {len(invalid_ids)} invalid polygons for {file}, removing the masks corresponding to the IDs: {invalid_ids}" |
436 | | - # ) |
437 | | - # |
438 | | - # # Filter based on valid cell IDs if idx is provided |
439 | | - # if idx is not None: |
440 | | - # idx = idx[~idx.isin(invalid_ids)] |
441 | | - # if len(invalid_ids) > 0: |
442 | | - # idx = idx.reset_index(drop=True) |
443 | | - # df = df[df[XeniumKeys.CELL_ID].isin(idx)] |
444 | | - # else: |
445 | | - # # If no idx provided, just (potentially) filter out invalid IDs |
446 | | - # df = df[~df[XeniumKeys.CELL_ID].isin(invalid_ids)] |
447 | | - |
448 | 425 | group_by = df.groupby(XeniumKeys.CELL_ID) |
449 | 426 | index = pd.Series(group_by.indices.keys()) |
450 | 427 | # convert the index to str since we will compare it with an AnnData object, where the index is a str |
@@ -492,7 +469,12 @@ def _get_labels_and_indices_mapping( |
492 | 469 | z = zarr.open(store, mode="r") |
493 | 470 | # get the labels |
494 | 471 | masks = da.from_array(z["masks"][f"{mask_index}"]) |
495 | | - labels = Labels2DModel.parse(masks, dims=("y", "x"), transformations={"global": Identity()}, **labels_models_kwargs) |
| 472 | + labels = Labels2DModel.parse( |
| 473 | + masks, |
| 474 | + dims=("y", "x"), |
| 475 | + transformations={"global": Identity()}, |
| 476 | + **labels_models_kwargs, |
| 477 | + ) |
496 | 478 |
|
497 | 479 | # build the matching table |
498 | 480 | version = _parse_version_of_xenium_analyzer(specs) |
|
0 commit comments