Skip to content

Commit ad05a97

Browse files
committed
removed the invitation code requirement for PIASOmarkerDB API and client
1 parent 41f049b commit ad05a97

File tree

1 file changed

+101
-94
lines changed

1 file changed

+101
-94
lines changed

piaso/tools/_markerdb.py

Lines changed: 101 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,29 @@
1313
import piaso
1414
1515
# Query marker genes
16-
df = piaso.tl.queryPIASOmarkerDB(gene="CD3E", species="Human")
16+
df = piaso.tl.queryPIASOmarkerDB(gene="Foxp2", species="Mouse")
1717
1818
# Get both DataFrame and marker dict
1919
df, marker_dict = piaso.tl.queryPIASOmarkerDB(
20-
study="AllenHumanImmuneHealthAtlas_L2",
21-
species="Human",
20+
study="AllenWholeMouseBrain_isocortex",
21+
species="Mouse",
2222
as_dict=True
2323
)
2424
2525
# List available studies
2626
studies = piaso.tl.queryPIASOmarkerDB(list_studies=True)
2727
2828
# Analyze gene lists for cell type inference
29-
df = piaso.tl.analyzeMarkers(["CD3E", "CD8A", "GZMK"])
29+
df = piaso.tl.analyzeMarkers(["Syt6", "Tle4", "Foxp2", "Fezf2"])
3030
3131
# Analyze COSG results (DataFrame input) with specific study
3232
import pandas as pd
33-
cosg_df = pd.DataFrame(adata.uns['cosg']['names'])
33+
cosg_df = pd.DataFrame(adata.uns['cosg']['names']).head(50)
3434
results, top_hits = piaso.tl.analyzeMarkers(
3535
cosg_df,
3636
n_top_genes=50,
37-
studies="SEAAD2024_MTG_Subclass"
37+
species="Mouse",
38+
studies="AllenWholeMouseBrain_isocortex"
3839
)
3940
"""
4041

@@ -88,7 +89,7 @@ class MarkerDBConnectionError(PIASOmarkerDBError):
8889

8990

9091
class AuthenticationError(PIASOmarkerDBError):
91-
"""Authentication/invitation code error for download access."""
92+
"""Authentication error (kept for potential future use)."""
9293
pass
9394

9495

@@ -109,9 +110,6 @@ class PIASOmarkerDB:
109110
base_url : str, optional
110111
Base URL for the PIASOmarkerDB API.
111112
Default: "https://piaso.org/piasomarkerdb"
112-
invitation_code : str, optional
113-
Invitation code for bulk download access.
114-
Contact dai@broadinstitute.org to request.
115113
timeout : int, optional
116114
Request timeout in seconds. Default: 30
117115
cache_dir : str or Path, optional
@@ -126,10 +124,16 @@ class PIASOmarkerDB:
126124
>>> client = PIASOmarkerDB()
127125
>>>
128126
>>> # Query markers
129-
>>> df = client.getMarkers(gene="CD3E")
127+
>>> df = client.getMarkers(gene="Foxp2")
130128
>>>
131129
>>> # Get as dict
132-
>>> df, marker_dict = client.getMarkers(gene="CD3E", as_dict=True)
130+
>>> df, marker_dict = client.getMarkers(
131+
... study="AllenWholeMouseBrain_isocortex",
132+
... as_dict=True
133+
... )
134+
>>>
135+
>>> # Download markers
136+
>>> client.downloadMarkers("markers.csv", species="Mouse")
133137
134138
See Also
135139
--------
@@ -155,12 +159,10 @@ class PIASOmarkerDB:
155159
def __init__(
156160
self,
157161
base_url: str = None,
158-
invitation_code: str = None,
159162
timeout: int = None,
160163
cache_dir: str | Path | None = None,
161164
):
162165
self.base_url = (base_url or self.DEFAULT_BASE_URL).rstrip('/')
163-
self.invitation_code = invitation_code
164166
self.timeout = timeout or self.DEFAULT_TIMEOUT
165167
self._session = requests.Session()
166168

@@ -215,11 +217,6 @@ def _request(
215217
**kwargs
216218
)
217219

218-
if response.status_code == 403:
219-
raise AuthenticationError(
220-
"Invalid or missing invitation code"
221-
)
222-
223220
if response.status_code >= 400:
224221
try:
225222
error_data = response.json()
@@ -324,8 +321,11 @@ def getMarkers(
324321
325322
Examples
326323
--------
327-
>>> df = client.getMarkers(gene="CD3E", species="Human")
328-
>>> df, marker_dict = client.getMarkers(study="SEAAD2024_MTG_Subclass", as_dict=True)
324+
>>> df = client.getMarkers(gene="Foxp2", species="Mouse")
325+
>>> df, marker_dict = client.getMarkers(
326+
... study="AllenWholeMouseBrain_isocortex",
327+
... as_dict=True
328+
... )
329329
"""
330330
params = {}
331331

@@ -479,6 +479,12 @@ def listStudies(
479479
-------
480480
list of str
481481
List of study/publication identifiers.
482+
483+
Examples
484+
--------
485+
>>> studies = client.listStudies()
486+
>>> print(f"Total studies: {len(studies)}")
487+
>>> mouse_studies = client.listStudies(species="Mouse")
482488
"""
483489
params = {}
484490
if species:
@@ -628,7 +634,7 @@ def analyzeGenes(
628634
629635
- **list of str**: Single list of gene symbols. Returns DataFrame.
630636
- **pd.DataFrame**: Columns are clusters/cell types, rows are genes.
631-
E.g., COSG output: pd.DataFrame(adata.uns['cosg']['names']).
637+
E.g., COSG output: pd.DataFrame(adata.uns['cosg']['names']).head(50).
632638
Returns tuple (results_dict, top_hits_dict).
633639
- **dict**: {cluster_name: [gene_list]}.
634640
Returns tuple (results_dict, top_hits_dict).
@@ -670,34 +676,30 @@ def analyzeGenes(
670676
671677
Examples
672678
--------
673-
Single gene list:
674-
675-
>>> df = client.analyzeGenes(["CD3E", "CD8A", "GZMK"])
676-
677-
With specific study:
679+
Single gene list (mouse cortex L6 markers):
678680
679681
>>> df = client.analyzeGenes(
680-
... ["CD3E", "CD8A", "GZMK"],
681-
... studies="SEAAD2024_MTG_Subclass"
682+
... ["Syt6", "Tle4", "Hs3st4", "Fezf2", "Foxp2"],
683+
... species="Mouse"
682684
... )
683685
684-
COSG output (DataFrame) with multiple studies:
686+
With specific study filter:
685687
686-
>>> cosg_df = pd.DataFrame(adata.uns['cosg']['names'])
687688
>>> results, top_hits = client.analyzeGenes(
688-
... cosg_df,
689+
... cosg_marker_df,
689690
... n_top_genes=50,
690-
... studies=["SEAAD2024_MTG_Subclass", "SilettiLinnarssonWholeHumanBrain2023_class"]
691+
... min_genes=5,
692+
... studies=['AllenWholeMouseBrain_isocortex'],
693+
... species="Mouse"
691694
... )
692-
>>> print(top_hits)
693-
{'Lamp5': 'GABAergic neuron', 'Lhx6': 'Interneuron', ...}
694695
695696
Dictionary input:
696697
697-
>>> results, top_hits = client.analyzeGenes({
698-
... 'Cluster_0': ['CD3E', 'CD8A', 'GZMK'],
699-
... 'Cluster_1': ['MS4A1', 'CD19', 'CD79A'],
700-
... })
698+
>>> gene_sets = {
699+
... 'Microglia': ['Cx3cr1', 'P2ry12', 'Tmem119', 'Csf1r', 'Trem2'],
700+
... 'L6_CT': ['Syt6', 'Tle4', 'Foxp2', 'Fezf2'],
701+
... }
702+
>>> results, top_hits = client.analyzeGenes(gene_sets)
701703
"""
702704
# Validate and normalize studies parameter
703705
studies_list = None
@@ -1015,32 +1017,25 @@ def _to_marker_dict(
10151017
def downloadMarkers(
10161018
self,
10171019
filepath: str | Path,
1018-
invitation_code: str = None,
10191020
**kwargs
10201021
) -> None:
10211022
"""
10221023
Download markers to CSV file.
10231024
1024-
Requires an invitation code. Contact dai@broadinstitute.org to request.
1025-
10261025
Parameters
10271026
----------
10281027
filepath : str or Path
10291028
Path to save the CSV file.
1030-
invitation_code : str, optional
1031-
Invitation code. Uses client default if not provided.
10321029
**kwargs
1033-
Same filter parameters as getMarkers().
1034-
"""
1035-
code = invitation_code or self.invitation_code
1036-
if not code:
1037-
raise AuthenticationError(
1038-
"Invitation code required for download. "
1039-
"Request one from dai@broadinstitute.org"
1040-
)
1030+
Same filter parameters as getMarkers() (species, tissue, study, etc.).
10411031
1032+
Examples
1033+
--------
1034+
>>> client = PIASOmarkerDB()
1035+
>>> client.downloadMarkers("human_markers.csv", species="Human")
1036+
>>> client.downloadMarkers("mouse_brain.csv", species="Mouse", tissue="brain")
1037+
"""
10421038
params = dict(kwargs)
1043-
params['invitation_code'] = code
10441039
params['format'] = 'csv'
10451040

10461041
url = self._build_url('markers')
@@ -1052,9 +1047,6 @@ def downloadMarkers(
10521047
timeout=self.timeout * 2
10531048
)
10541049

1055-
if response.status_code == 403:
1056-
raise AuthenticationError("Invalid invitation code")
1057-
10581050
response.raise_for_status()
10591051

10601052
filepath = Path(filepath)
@@ -1135,6 +1127,13 @@ def getRecommendedStudy(
11351127
-------
11361128
str or None
11371129
Recommended study name, or None if no recommendation.
1130+
1131+
Examples
1132+
--------
1133+
>>> client.getRecommendedStudy("human", "blood")
1134+
'AllenHumanImmuneHealthAtlas_L2'
1135+
>>> client.getRecommendedStudy("mouse", "cortex")
1136+
'AllenWholeMouseBrain_isocortex'
11381137
"""
11391138
recommendations = {
11401139
("human", "blood"): "AllenHumanImmuneHealthAtlas_L2",
@@ -1151,6 +1150,7 @@ def getRecommendedStudy(
11511150
("human", "spleen"): "XuTeichmann2023_Spleen",
11521151
("human", "intestine"): "XuTeichmann2023_Intestine",
11531152
("mouse", "cortex"): "AllenWholeMouseBrain_isocortex",
1153+
("mouse", "brain"): "AllenWholeMouseBrain_Neuron",
11541154
}
11551155

11561156
key = (species.lower(), tissue.lower())
@@ -1245,28 +1245,27 @@ def queryPIASOmarkerDB(
12451245
Query marker genes:
12461246
12471247
>>> import piaso
1248-
>>> df = piaso.tl.queryPIASOmarkerDB(gene="CD3E", species="Human")
1248+
>>> df = piaso.tl.queryPIASOmarkerDB(gene="Foxp2", species="Mouse")
1249+
>>> df = piaso.tl.queryPIASOmarkerDB(gene=["Foxp2", "Syt6", "Tle4"])
12491250
12501251
Get both DataFrame and marker dictionary:
12511252
12521253
>>> df, marker_dict = piaso.tl.queryPIASOmarkerDB(
1253-
... study="SEAAD2024_MTG_Subclass",
1254-
... species="Human",
1254+
... study="AllenWholeMouseBrain_isocortex",
1255+
... species="Mouse",
12551256
... as_dict=True
12561257
... )
1258+
>>> print(f"DataFrame shape: {df.shape}")
1259+
>>> print(f"Cell types in dict: {len(marker_dict)}")
12571260
12581261
List available studies:
12591262
12601263
>>> studies = piaso.tl.queryPIASOmarkerDB(list_studies=True)
1261-
>>> studies = piaso.tl.queryPIASOmarkerDB(list_studies=True, species="Human")
1264+
>>> print(f"Total studies: {len(studies)}")
12621265
12631266
List cell types:
12641267
1265-
>>> cell_types = piaso.tl.queryPIASOmarkerDB(list_cell_types=True, species="Human")
1266-
1267-
List genes:
1268-
1269-
>>> genes = piaso.tl.queryPIASOmarkerDB(list_genes=True, cell_type="T-cell")
1268+
>>> cell_types = piaso.tl.queryPIASOmarkerDB(list_cell_types=True, species="Mouse")
12701269
12711270
See Also
12721271
--------
@@ -1344,7 +1343,7 @@ def analyzeMarkers(
13441343
Returns: pd.DataFrame with analysis results.
13451344
13461345
- **pd.DataFrame**: Columns are clusters/cell types, rows are genes.
1347-
Ideal for COSG output: ``pd.DataFrame(adata.uns['cosg']['names'])``.
1346+
Ideal for COSG output: ``pd.DataFrame(adata.uns['cosg']['names']).head(50)``.
13481347
Returns: tuple (results_dict, top_hits_dict).
13491348
13501349
- **dict**: ``{cluster_name: [gene_list]}``.
@@ -1391,48 +1390,56 @@ def analyzeMarkers(
13911390
13921391
Examples
13931392
--------
1394-
Single gene list:
1393+
Single gene list (mouse cortex L6 markers):
13951394
13961395
>>> import piaso
1397-
>>> df = piaso.tl.analyzeMarkers(["CD3E", "CD8A", "GZMK", "PRF1"])
1396+
>>> query_genes = ["Syt6", "Tle4", "Hs3st4", "Fezf2", "Foxp2", "Col12a1"]
1397+
>>> df = piaso.tl.analyzeMarkers(query_genes)
13981398
>>> print(df.head())
1399+
# Top hit: EN-L6-CT from WangKriegstein2025
13991400
14001401
With specific study filter:
14011402
1402-
>>> df = piaso.tl.analyzeMarkers(
1403-
... ["CD3E", "CD8A", "GZMK"],
1404-
... studies="SEAAD2024_MTG_Subclass"
1403+
>>> results, top_hits = piaso.tl.analyzeMarkers(
1404+
... cosg_marker_df,
1405+
... n_top_genes=50,
1406+
... min_genes=5,
1407+
... studies=['AllenWholeMouseBrain_isocortex'],
1408+
... species="Mouse"
14051409
... )
1410+
>>> print(top_hits)
1411+
{'L2-3 IT': '007 L2/3 IT CTX Glut', 'PV': '052 Pvalb Gaba', ...}
14061412
1407-
With multiple studies:
1413+
Dictionary input (microglia and L6 CT markers):
14081414
1409-
>>> df = piaso.tl.analyzeMarkers(
1410-
... ["CD3E", "CD8A", "GZMK"],
1411-
... studies=["SEAAD2024_MTG_Subclass", "SilettiLinnarssonWholeHumanBrain2023_class"]
1412-
... )
1415+
>>> gene_sets = {
1416+
... 'Cluster_0': ['Cx3cr1', 'P2ry12', 'Tmem119', 'Csf1r', 'Trem2'],
1417+
... 'Cluster_1': ['Syt6', 'Tle4', 'Hs3st4', 'Fezf2', 'Foxp2'],
1418+
... }
1419+
>>> results, top_hits = piaso.tl.analyzeMarkers(gene_sets)
1420+
>>> print(top_hits)
1421+
{'Cluster_0': 'Microglia', 'Cluster_1': 'EN-L6-CT'}
14131422
1414-
COSG output (DataFrame with columns as clusters):
1423+
COSG integration workflow:
14151424
1425+
>>> import cosg
14161426
>>> import pandas as pd
1417-
>>> cosg_df = pd.DataFrame(adata.uns['cosg']['names'])
1427+
>>>
1428+
>>> # Run COSG
1429+
>>> cosg.cosg(adata, key_added='cosg', groupby='leiden')
1430+
>>>
1431+
>>> # Get top 50 markers per cluster
1432+
>>> cosg_marker_df = pd.DataFrame(adata.uns['cosg']['names']).head(50)
1433+
>>>
1434+
>>> # Analyze with PIASOmarkerDB
14181435
>>> results, top_hits = piaso.tl.analyzeMarkers(
1419-
... cosg_df,
1436+
... cosg_marker_df,
14201437
... n_top_genes=50,
1421-
... species="Human",
1422-
... studies="SEAAD2024_MTG_Subclass"
1438+
... species="Mouse"
14231439
... )
1424-
>>> print(top_hits)
1425-
{'Lamp5': 'Lamp5', 'Sst': 'Sst', 'Pvalb': 'Pvalb', ...}
1426-
1427-
Dictionary input:
1428-
1429-
>>> results, top_hits = piaso.tl.analyzeMarkers({
1430-
... 'Cluster_0': ['CD3E', 'CD8A', 'GZMK'],
1431-
... 'Cluster_1': ['MS4A1', 'CD19', 'CD79A'],
1432-
... 'Cluster_2': ['LYZ', 'CD14', 'FCGR3A'],
1433-
... }, species="Human")
1434-
>>> print(top_hits)
1435-
{'Cluster_0': 'CD8+ T cell', 'Cluster_1': 'B cell', 'Cluster_2': 'Monocyte'}
1440+
>>>
1441+
>>> # Add annotations to AnnData
1442+
>>> adata.obs['cell_type_predicted'] = adata.obs['leiden'].map(top_hits)
14361443
14371444
See Also
14381445
--------
@@ -1474,4 +1481,4 @@ def analyzeMarkers(
14741481
'PIASOmarkerDB',
14751482
]
14761483

1477-
__version__ = "1.0.3"
1484+
__version__ = "1.0.3"

0 commit comments

Comments
 (0)