Skip to content

Commit 53a8343

Browse files
committed
tax_finder methods
1 parent b1943e5 commit 53a8343

File tree

5 files changed

+332
-61
lines changed

5 files changed

+332
-61
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ Package of utilities for FAIR-Ease demo workflows.
1212

1313
These methods are currently use only for FAIR-EASE [pilot demos](https://github.com/palec87/momics-demos), but eventually they can serve for general purpose manipulation of metagenomic data, locally and in VREs.
1414

15-
The idea is to provide testable methods to allow as much flexibility and remixing of the functionalities provided
15+
The idea is to provide testable methods to allow as much flexibility and remixing of the functionalities provided.
1616

1717
## Installation
1818

momics/diversity.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import pandas as pd
33
import numpy as np
4-
from typing import List, Dict
4+
from typing import Union, List, Dict
55

66
import skbio
77
from skbio.diversity import beta_diversity
@@ -11,7 +11,7 @@
1111
from .utils import (
1212
check_index_names,
1313
)
14-
14+
from momics.constants import TAXONOMY_RANKS
1515

1616
# logger setup
1717
FORMAT = "%(levelname)s | %(name)s | %(message)s"
@@ -131,9 +131,65 @@ def calculate_shannon_index(df: pd.DataFrame) -> pd.Series:
131131
return df.apply(shannon_index, axis=1)
132132

133133

134+
####################
135+
# Search functions #
136+
####################
137+
def find_taxa_in_table(
138+
table: pd.DataFrame,
139+
tax_level: str,
140+
search_term: Union[str, int],
141+
ncbi_tax_id: bool=False,
142+
exact_match:bool=False,
143+
) -> pd.DataFrame:
144+
"""
145+
Find taxa in the given table at the specified taxonomic level matching the search term.
146+
147+
args:
148+
table (pd.DataFrame): DataFrame containing taxonomic data.
149+
tax_level (str): Taxonomic level to search ('all' for all levels).
150+
search_term (str|int): Term to search for.
151+
ncbi_tax_id (bool): If True, search by NCBI taxonomic ID.
152+
exact_match (bool): If True, perform exact match; otherwise, use substring match.
153+
154+
returns:
155+
pd.DataFrame: DataFrame containing matching taxa.
156+
"""
157+
# ncbi_tax_id search
158+
index_names = getattr(table.index, "names", [])
159+
if ncbi_tax_id and ('ncbi_tax_id' not in table.columns and 'ncbi_tax_id' not in index_names):
160+
raise ValueError("The table does not contain 'ncbi_tax_id' column or index level.")
161+
162+
# if ncbi_tax_id is an index level, bring it into a column for uniform handling
163+
if ncbi_tax_id and ('ncbi_tax_id' in index_names):
164+
table = table.reset_index()
165+
166+
if ncbi_tax_id:
167+
# Search by NCBI taxonomic ID
168+
matching_taxa = table[table['ncbi_tax_id'].astype(str) == str(search_term)]
169+
return matching_taxa.set_index(index_names) if index_names else matching_taxa
170+
171+
# search by taxonomic level, all ranks
172+
if tax_level == 'all':
173+
found = []
174+
for tax_level in TAXONOMY_RANKS:
175+
if exact_match:
176+
found.append(table[table[tax_level].str.lower().fillna('') == search_term.lower()])
177+
else:
178+
found.append(table[table[tax_level].str.contains(search_term, case=False, na=False)])
179+
matching_taxa = pd.concat(found)
180+
# specific taxonomic level
181+
else:
182+
if exact_match:
183+
matching_taxa = table[table[tax_level].str.lower().fillna('') == search_term.lower()]
184+
else:
185+
matching_taxa = table[table[tax_level].str.contains(search_term, case=False, na=False)]
186+
187+
return matching_taxa
188+
134189
#######################
135190
# diversity functions #
136191
#######################
192+
137193
def calculate_alpha_diversity(df: pd.DataFrame, factors: pd.DataFrame) -> pd.DataFrame:
138194
"""
139195
Calculates the alpha diversity (Shannon index) for a DataFrame.

momics/panel_utils.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,63 @@ def is_port_in_use(port: int) -> bool:
6464
return s.connect_ex(("localhost", port)) == 0
6565

6666

67+
def tax_finder_selector() -> Tuple[
68+
pn.widgets.Select,
69+
pn.widgets.Select,
70+
pn.widgets.TextInput,
71+
pn.widgets.Checkbox,
72+
pn.widgets.Checkbox,
73+
]:
74+
select_table_tax = pn.widgets.Select(
75+
name="Taxonomic table",
76+
value="ssu",
77+
options=["ssu", "lsu"],
78+
description="Select a table for taxonomic search",
79+
)
80+
81+
tax_level = pn.widgets.Select(
82+
name="Taxonomic level",
83+
value="all",
84+
options=[
85+
"all",
86+
"ncbi_tax_id",
87+
"superkingdom",
88+
"kingdom",
89+
"phylum",
90+
"class",
91+
"order",
92+
"family",
93+
"genus",
94+
"species",
95+
],
96+
description="Select a taxonomic search level",
97+
)
98+
99+
search_term = pn.widgets.TextInput(
100+
name="Search term",
101+
value="",
102+
description="Enter a search term (string or NCBI tax ID)",
103+
)
104+
105+
checkbox_exact_match = pn.widgets.Checkbox(
106+
name="Exact match of the search term",
107+
value=False,
108+
)
109+
110+
log_scale_checkbox = pn.widgets.Checkbox(
111+
name="Log scale for abundance coloring",
112+
value=True,
113+
)
114+
115+
return (
116+
select_table_tax,
117+
tax_level,
118+
search_term,
119+
checkbox_exact_match,
120+
log_scale_checkbox,
121+
)
122+
123+
67124
def diversity_select_widgets(cat_columns: List[str], num_columns: List[str]) -> Tuple[
68125
pn.widgets.Select,
69126
pn.widgets.Select,

0 commit comments

Comments
 (0)