1313 import piaso
1414
1515 # Query marker genes
16- df = piaso.tl.queryPIASOmarkerDB(gene="CD3E ", species="Human ")
16+ df = piaso.tl.queryPIASOmarkerDB(gene="Foxp2 ", species="Mouse ")
1717
1818 # Get both DataFrame and marker dict
1919 df, marker_dict = piaso.tl.queryPIASOmarkerDB(
20- study="AllenHumanImmuneHealthAtlas_L2 ",
21- species="Human ",
20+ study="AllenWholeMouseBrain_isocortex ",
21+ species="Mouse ",
2222 as_dict=True
2323 )
2424
2525 # List available studies
2626 studies = piaso.tl.queryPIASOmarkerDB(list_studies=True)
2727
2828 # Analyze gene lists for cell type inference
29- df = piaso.tl.analyzeMarkers(["CD3E ", "CD8A ", "GZMK "])
29+ df = piaso.tl.analyzeMarkers(["Syt6 ", "Tle4 ", "Foxp2", "Fezf2 "])
3030
3131 # Analyze COSG results (DataFrame input) with specific study
3232 import pandas as pd
33- cosg_df = pd.DataFrame(adata.uns['cosg']['names'])
33+ cosg_df = pd.DataFrame(adata.uns['cosg']['names']).head(50)
3434 results, top_hits = piaso.tl.analyzeMarkers(
3535 cosg_df,
3636 n_top_genes=50,
37- studies="SEAAD2024_MTG_Subclass"
37+ species="Mouse",
38+ studies="AllenWholeMouseBrain_isocortex"
3839 )
3940"""
4041
@@ -88,7 +89,7 @@ class MarkerDBConnectionError(PIASOmarkerDBError):
8889
8990
9091class AuthenticationError (PIASOmarkerDBError ):
91- """Authentication/invitation code error for download access ."""
92+ """Authentication error (kept for potential future use) ."""
9293 pass
9394
9495
@@ -109,9 +110,6 @@ class PIASOmarkerDB:
109110 base_url : str, optional
110111 Base URL for the PIASOmarkerDB API.
111112 Default: "https://piaso.org/piasomarkerdb"
112- invitation_code : str, optional
113- Invitation code for bulk download access.
114- Contact dai@broadinstitute.org to request.
115113 timeout : int, optional
116114 Request timeout in seconds. Default: 30
117115 cache_dir : str or Path, optional
@@ -126,10 +124,16 @@ class PIASOmarkerDB:
126124 >>> client = PIASOmarkerDB()
127125 >>>
128126 >>> # Query markers
129- >>> df = client.getMarkers(gene="CD3E ")
127+ >>> df = client.getMarkers(gene="Foxp2 ")
130128 >>>
131129 >>> # Get as dict
132- >>> df, marker_dict = client.getMarkers(gene="CD3E", as_dict=True)
130+ >>> df, marker_dict = client.getMarkers(
131+ ... study="AllenWholeMouseBrain_isocortex",
132+ ... as_dict=True
133+ ... )
134+ >>>
135+ >>> # Download markers
136+ >>> client.downloadMarkers("markers.csv", species="Mouse")
133137
134138 See Also
135139 --------
@@ -155,12 +159,10 @@ class PIASOmarkerDB:
155159 def __init__ (
156160 self ,
157161 base_url : str = None ,
158- invitation_code : str = None ,
159162 timeout : int = None ,
160163 cache_dir : str | Path | None = None ,
161164 ):
162165 self .base_url = (base_url or self .DEFAULT_BASE_URL ).rstrip ('/' )
163- self .invitation_code = invitation_code
164166 self .timeout = timeout or self .DEFAULT_TIMEOUT
165167 self ._session = requests .Session ()
166168
@@ -215,11 +217,6 @@ def _request(
215217 ** kwargs
216218 )
217219
218- if response .status_code == 403 :
219- raise AuthenticationError (
220- "Invalid or missing invitation code"
221- )
222-
223220 if response .status_code >= 400 :
224221 try :
225222 error_data = response .json ()
@@ -324,8 +321,11 @@ def getMarkers(
324321
325322 Examples
326323 --------
327- >>> df = client.getMarkers(gene="CD3E", species="Human")
328- >>> df, marker_dict = client.getMarkers(study="SEAAD2024_MTG_Subclass", as_dict=True)
324+ >>> df = client.getMarkers(gene="Foxp2", species="Mouse")
325+ >>> df, marker_dict = client.getMarkers(
326+ ... study="AllenWholeMouseBrain_isocortex",
327+ ... as_dict=True
328+ ... )
329329 """
330330 params = {}
331331
@@ -479,6 +479,12 @@ def listStudies(
479479 -------
480480 list of str
481481 List of study/publication identifiers.
482+
483+ Examples
484+ --------
485+ >>> studies = client.listStudies()
486+ >>> print(f"Total studies: {len(studies)}")
487+ >>> mouse_studies = client.listStudies(species="Mouse")
482488 """
483489 params = {}
484490 if species :
@@ -628,7 +634,7 @@ def analyzeGenes(
628634
629635 - **list of str**: Single list of gene symbols. Returns DataFrame.
630636 - **pd.DataFrame**: Columns are clusters/cell types, rows are genes.
631- E.g., COSG output: pd.DataFrame(adata.uns['cosg']['names']).
637+ E.g., COSG output: pd.DataFrame(adata.uns['cosg']['names']).head(50).
632638 Returns tuple (results_dict, top_hits_dict).
633639 - **dict**: {cluster_name: [gene_list]}.
634640 Returns tuple (results_dict, top_hits_dict).
@@ -670,34 +676,30 @@ def analyzeGenes(
670676
671677 Examples
672678 --------
673- Single gene list:
674-
675- >>> df = client.analyzeGenes(["CD3E", "CD8A", "GZMK"])
676-
677- With specific study:
679+ Single gene list (mouse cortex L6 markers):
678680
679681 >>> df = client.analyzeGenes(
680- ... ["CD3E ", "CD8A ", "GZMK "],
681- ... studies="SEAAD2024_MTG_Subclass "
682+ ... ["Syt6 ", "Tle4 ", "Hs3st4", "Fezf2", "Foxp2 "],
683+ ... species="Mouse "
682684 ... )
683685
684- COSG output (DataFrame) with multiple studies :
686+ With specific study filter :
685687
686- >>> cosg_df = pd.DataFrame(adata.uns['cosg']['names'])
687688 >>> results, top_hits = client.analyzeGenes(
688- ... cosg_df,
689+ ... cosg_marker_df,
689690 ... n_top_genes=50,
690- ... studies=["SEAAD2024_MTG_Subclass", "SilettiLinnarssonWholeHumanBrain2023_class"]
691+ ... min_genes=5,
692+ ... studies=['AllenWholeMouseBrain_isocortex'],
693+ ... species="Mouse"
691694 ... )
692- >>> print(top_hits)
693- {'Lamp5': 'GABAergic neuron', 'Lhx6': 'Interneuron', ...}
694695
695696 Dictionary input:
696697
697- >>> results, top_hits = client.analyzeGenes({
698- ... 'Cluster_0': ['CD3E', 'CD8A', 'GZMK'],
699- ... 'Cluster_1': ['MS4A1', 'CD19', 'CD79A'],
700- ... })
698+ >>> gene_sets = {
699+ ... 'Microglia': ['Cx3cr1', 'P2ry12', 'Tmem119', 'Csf1r', 'Trem2'],
700+ ... 'L6_CT': ['Syt6', 'Tle4', 'Foxp2', 'Fezf2'],
701+ ... }
702+ >>> results, top_hits = client.analyzeGenes(gene_sets)
701703 """
702704 # Validate and normalize studies parameter
703705 studies_list = None
@@ -1015,32 +1017,25 @@ def _to_marker_dict(
10151017 def downloadMarkers (
10161018 self ,
10171019 filepath : str | Path ,
1018- invitation_code : str = None ,
10191020 ** kwargs
10201021 ) -> None :
10211022 """
10221023 Download markers to CSV file.
10231024
1024- Requires an invitation code. Contact dai@broadinstitute.org to request.
1025-
10261025 Parameters
10271026 ----------
10281027 filepath : str or Path
10291028 Path to save the CSV file.
1030- invitation_code : str, optional
1031- Invitation code. Uses client default if not provided.
10321029 **kwargs
1033- Same filter parameters as getMarkers().
1034- """
1035- code = invitation_code or self .invitation_code
1036- if not code :
1037- raise AuthenticationError (
1038- "Invitation code required for download. "
1039- "Request one from dai@broadinstitute.org"
1040- )
1030+ Same filter parameters as getMarkers() (species, tissue, study, etc.).
10411031
1032+ Examples
1033+ --------
1034+ >>> client = PIASOmarkerDB()
1035+ >>> client.downloadMarkers("human_markers.csv", species="Human")
1036+ >>> client.downloadMarkers("mouse_brain.csv", species="Mouse", tissue="brain")
1037+ """
10421038 params = dict (kwargs )
1043- params ['invitation_code' ] = code
10441039 params ['format' ] = 'csv'
10451040
10461041 url = self ._build_url ('markers' )
@@ -1052,9 +1047,6 @@ def downloadMarkers(
10521047 timeout = self .timeout * 2
10531048 )
10541049
1055- if response .status_code == 403 :
1056- raise AuthenticationError ("Invalid invitation code" )
1057-
10581050 response .raise_for_status ()
10591051
10601052 filepath = Path (filepath )
@@ -1135,6 +1127,13 @@ def getRecommendedStudy(
11351127 -------
11361128 str or None
11371129 Recommended study name, or None if no recommendation.
1130+
1131+ Examples
1132+ --------
1133+ >>> client.getRecommendedStudy("human", "blood")
1134+ 'AllenHumanImmuneHealthAtlas_L2'
1135+ >>> client.getRecommendedStudy("mouse", "cortex")
1136+ 'AllenWholeMouseBrain_isocortex'
11381137 """
11391138 recommendations = {
11401139 ("human" , "blood" ): "AllenHumanImmuneHealthAtlas_L2" ,
@@ -1151,6 +1150,7 @@ def getRecommendedStudy(
11511150 ("human" , "spleen" ): "XuTeichmann2023_Spleen" ,
11521151 ("human" , "intestine" ): "XuTeichmann2023_Intestine" ,
11531152 ("mouse" , "cortex" ): "AllenWholeMouseBrain_isocortex" ,
1153+ ("mouse" , "brain" ): "AllenWholeMouseBrain_Neuron" ,
11541154 }
11551155
11561156 key = (species .lower (), tissue .lower ())
@@ -1245,28 +1245,27 @@ def queryPIASOmarkerDB(
12451245 Query marker genes:
12461246
12471247 >>> import piaso
1248- >>> df = piaso.tl.queryPIASOmarkerDB(gene="CD3E", species="Human")
1248+ >>> df = piaso.tl.queryPIASOmarkerDB(gene="Foxp2", species="Mouse")
1249+ >>> df = piaso.tl.queryPIASOmarkerDB(gene=["Foxp2", "Syt6", "Tle4"])
12491250
12501251 Get both DataFrame and marker dictionary:
12511252
12521253 >>> df, marker_dict = piaso.tl.queryPIASOmarkerDB(
1253- ... study="SEAAD2024_MTG_Subclass ",
1254- ... species="Human ",
1254+ ... study="AllenWholeMouseBrain_isocortex ",
1255+ ... species="Mouse ",
12551256 ... as_dict=True
12561257 ... )
1258+ >>> print(f"DataFrame shape: {df.shape}")
1259+ >>> print(f"Cell types in dict: {len(marker_dict)}")
12571260
12581261 List available studies:
12591262
12601263 >>> studies = piaso.tl.queryPIASOmarkerDB(list_studies=True)
1261- >>> studies = piaso.tl.queryPIASOmarkerDB(list_studies=True, species="Human ")
1264+ >>> print(f"Total studies: {len(studies)} ")
12621265
12631266 List cell types:
12641267
1265- >>> cell_types = piaso.tl.queryPIASOmarkerDB(list_cell_types=True, species="Human")
1266-
1267- List genes:
1268-
1269- >>> genes = piaso.tl.queryPIASOmarkerDB(list_genes=True, cell_type="T-cell")
1268+ >>> cell_types = piaso.tl.queryPIASOmarkerDB(list_cell_types=True, species="Mouse")
12701269
12711270 See Also
12721271 --------
@@ -1344,7 +1343,7 @@ def analyzeMarkers(
13441343 Returns: pd.DataFrame with analysis results.
13451344
13461345 - **pd.DataFrame**: Columns are clusters/cell types, rows are genes.
1347- Ideal for COSG output: ``pd.DataFrame(adata.uns['cosg']['names'])``.
1346+ Ideal for COSG output: ``pd.DataFrame(adata.uns['cosg']['names']).head(50) ``.
13481347 Returns: tuple (results_dict, top_hits_dict).
13491348
13501349 - **dict**: ``{cluster_name: [gene_list]}``.
@@ -1391,48 +1390,56 @@ def analyzeMarkers(
13911390
13921391 Examples
13931392 --------
1394- Single gene list:
1393+ Single gene list (mouse cortex L6 markers) :
13951394
13961395 >>> import piaso
1397- >>> df = piaso.tl.analyzeMarkers(["CD3E", "CD8A", "GZMK", "PRF1"])
1396+ >>> query_genes = ["Syt6", "Tle4", "Hs3st4", "Fezf2", "Foxp2", "Col12a1"]
1397+ >>> df = piaso.tl.analyzeMarkers(query_genes)
13981398 >>> print(df.head())
1399+ # Top hit: EN-L6-CT from WangKriegstein2025
13991400
14001401 With specific study filter:
14011402
1402- >>> df = piaso.tl.analyzeMarkers(
1403- ... ["CD3E", "CD8A", "GZMK"],
1404- ... studies="SEAAD2024_MTG_Subclass"
1403+ >>> results, top_hits = piaso.tl.analyzeMarkers(
1404+ ... cosg_marker_df,
1405+ ... n_top_genes=50,
1406+ ... min_genes=5,
1407+ ... studies=['AllenWholeMouseBrain_isocortex'],
1408+ ... species="Mouse"
14051409 ... )
1410+ >>> print(top_hits)
1411+ {'L2-3 IT': '007 L2/3 IT CTX Glut', 'PV': '052 Pvalb Gaba', ...}
14061412
1407- With multiple studies :
1413+ Dictionary input (microglia and L6 CT markers) :
14081414
1409- >>> df = piaso.tl.analyzeMarkers(
1410- ... ["CD3E", "CD8A", "GZMK"],
1411- ... studies=["SEAAD2024_MTG_Subclass", "SilettiLinnarssonWholeHumanBrain2023_class"]
1412- ... )
1415+ >>> gene_sets = {
1416+ ... 'Cluster_0': ['Cx3cr1', 'P2ry12', 'Tmem119', 'Csf1r', 'Trem2'],
1417+ ... 'Cluster_1': ['Syt6', 'Tle4', 'Hs3st4', 'Fezf2', 'Foxp2'],
1418+ ... }
1419+ >>> results, top_hits = piaso.tl.analyzeMarkers(gene_sets)
1420+ >>> print(top_hits)
1421+ {'Cluster_0': 'Microglia', 'Cluster_1': 'EN-L6-CT'}
14131422
1414- COSG output (DataFrame with columns as clusters) :
1423+ COSG integration workflow :
14151424
1425+ >>> import cosg
14161426 >>> import pandas as pd
1417- >>> cosg_df = pd.DataFrame(adata.uns['cosg']['names'])
1427+ >>>
1428+ >>> # Run COSG
1429+ >>> cosg.cosg(adata, key_added='cosg', groupby='leiden')
1430+ >>>
1431+ >>> # Get top 50 markers per cluster
1432+ >>> cosg_marker_df = pd.DataFrame(adata.uns['cosg']['names']).head(50)
1433+ >>>
1434+ >>> # Analyze with PIASOmarkerDB
14181435 >>> results, top_hits = piaso.tl.analyzeMarkers(
1419- ... cosg_df ,
1436+ ... cosg_marker_df ,
14201437 ... n_top_genes=50,
1421- ... species="Human",
1422- ... studies="SEAAD2024_MTG_Subclass"
1438+ ... species="Mouse"
14231439 ... )
1424- >>> print(top_hits)
1425- {'Lamp5': 'Lamp5', 'Sst': 'Sst', 'Pvalb': 'Pvalb', ...}
1426-
1427- Dictionary input:
1428-
1429- >>> results, top_hits = piaso.tl.analyzeMarkers({
1430- ... 'Cluster_0': ['CD3E', 'CD8A', 'GZMK'],
1431- ... 'Cluster_1': ['MS4A1', 'CD19', 'CD79A'],
1432- ... 'Cluster_2': ['LYZ', 'CD14', 'FCGR3A'],
1433- ... }, species="Human")
1434- >>> print(top_hits)
1435- {'Cluster_0': 'CD8+ T cell', 'Cluster_1': 'B cell', 'Cluster_2': 'Monocyte'}
1440+ >>>
1441+ >>> # Add annotations to AnnData
1442+ >>> adata.obs['cell_type_predicted'] = adata.obs['leiden'].map(top_hits)
14361443
14371444 See Also
14381445 --------
@@ -1474,4 +1481,4 @@ def analyzeMarkers(
14741481 'PIASOmarkerDB' ,
14751482]
14761483
1477- __version__ = "1.0.3"
1484+ __version__ = "1.0.3"
0 commit comments