diff --git a/.gitignore b/.gitignore
index 99f167443..0d679c07f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
site/
.DS_Store
-venv/
+venv*/
+.vscode/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..8dbb36353
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+- repo: git@github.com:Yelp/detect-secrets
+ rev: v0.13.1
+ hooks:
+ - id: detect-secrets
+ args: ['--baseline', '.secrets.baseline']
\ No newline at end of file
diff --git a/.secrets.baseline b/.secrets.baseline
new file mode 100644
index 000000000..a22082a8b
--- /dev/null
+++ b/.secrets.baseline
@@ -0,0 +1,220 @@
+{
+ "exclude": {
+ "files": null,
+ "lines": null
+ },
+ "generated_at": "2023-11-30T17:33:04Z",
+ "plugins_used": [
+ {
+ "name": "AWSKeyDetector"
+ },
+ {
+ "name": "ArtifactoryDetector"
+ },
+ {
+ "base64_limit": 4.5,
+ "name": "Base64HighEntropyString"
+ },
+ {
+ "name": "BasicAuthDetector"
+ },
+ {
+ "name": "CloudantDetector"
+ },
+ {
+ "hex_limit": 3,
+ "name": "HexHighEntropyString"
+ },
+ {
+ "name": "IbmCloudIamDetector"
+ },
+ {
+ "name": "IbmCosHmacDetector"
+ },
+ {
+ "name": "JwtTokenDetector"
+ },
+ {
+ "keyword_exclude": null,
+ "name": "KeywordDetector"
+ },
+ {
+ "name": "MailchimpDetector"
+ },
+ {
+ "name": "PrivateKeyDetector"
+ },
+ {
+ "name": "SlackDetector"
+ },
+ {
+ "name": "SoftlayerDetector"
+ },
+ {
+ "name": "StripeDetector"
+ },
+ {
+ "name": "TwilioKeyDetector"
+ }
+ ],
+ "results": {
+ "docs/API/Users_Guide/Additional_Examples.md": [
+ {
+ "hashed_secret": "d9bbc424159a2c5ef89902e02caa2be6cff1817c",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 89,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "34d2afab6dc6f76855d5f83cc2dbed2efd99ddfb",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 110,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "docs/API/Users_Guide/Getting_Started.md": [
+ {
+ "hashed_secret": "05b339a29ce9a548e1efbe032131cfcdde6727d8",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 91,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "docs/API/Users_Guide/Search_and_Retrieval.md": [
+ {
+ "hashed_secret": "8b0471397a6dec83405ee2ae28edde87d02271a0",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 386,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "14f469780554b9dd75a3d03a267b3be725582499",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 409,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "6b505580b5bc4fab5bd4b2e0e10d621fea0614ee",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 447,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "98895ba87fa2e568b0f48b1afbddb9b45d8c8ec3",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 491,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "20b3183bf913ad4ef0d5ecd59c1de5437a7e8a04",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 528,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "docs/API/Users_Guide/Submission.md": [
+ {
+ "hashed_secret": "93f5b94e262e685fee4a419438d60e82fafaf491",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 2468,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "313355a8530a54c23567f7bbedd9f804bb269820",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 2561,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "b47ceb76f45ab4e8b52da270875d85fdd9b7fc33",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 2623,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "docs/API/Users_Guide/System_Information.md": [
+ {
+ "hashed_secret": "ecb0642a6305ce066c2675dac1562535b530e5b0",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 70,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "docs/Data_Submission_Portal/Users_Guide/Best_Practices.md": [
+ {
+ "hashed_secret": "87d7b59b5af7c86ea71b60ed042d1e62175136fb",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 86,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "docs/Data_Submission_Portal/Users_Guide/Data_Submission_Walkthrough.md": [
+ {
+ "hashed_secret": "87d7b59b5af7c86ea71b60ed042d1e62175136fb",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 651,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "docs/Data_Submission_Portal/Users_Guide/Data_Upload_UG.md": [
+ {
+ "hashed_secret": "dbaf99f4789432509c1313aba5256a6ea4ddb986",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 482,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "471568dffba5b4873ca000b88049046d5aa687d4",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 553,
+ "type": "Hex High Entropy String"
+ },
+ {
+ "hashed_secret": "87d7b59b5af7c86ea71b60ed042d1e62175136fb",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 765,
+ "type": "Hex High Entropy String"
+ }
+ ],
+ "theme/css/font-awesome-4.5.css": [
+ {
+ "hashed_secret": "51de2b835bd35a67eb32dbcd3d77d4b96e5aa39d",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 1734,
+ "type": "Secret Keyword"
+ }
+ ],
+ "theme/css/font-awesome.min-4.5.css": [
+ {
+ "hashed_secret": "3e4128ccd5d7d597667230af6326b3387fd18545",
+ "is_secret": false,
+ "is_verified": false,
+ "line_number": 4,
+ "type": "Secret Keyword"
+ }
+ ]
+ },
+ "version": "0.13.1",
+ "word_list": {
+ "file": null,
+ "hash": null
+ }
+}
diff --git a/API_UG.yml b/API_UG.yml
index 83d08700a..91b8c60d0 100644
--- a/API_UG.yml
+++ b/API_UG.yml
@@ -16,6 +16,7 @@ pages:
- BAM Slicing: 'API/Users_Guide/BAM_Slicing.md'
- Submission: 'API/Users_Guide/Submission.md'
- Python Examples: 'API/Users_Guide/Python_Examples.md'
+ - GraphQL Examples: 'API/Users_Guide/GraphQL_Examples.md'
- System Information: 'API/Users_Guide/System_Information.md'
- Additional Examples: 'API/Users_Guide/Additional_Examples.md'
- "Appendix A: Available Fields": 'API/Users_Guide/Appendix_A_Available_Fields.md'
diff --git a/Adding_write_to_file.md b/Adding_write_to_file.md
new file mode 100644
index 000000000..39a5cb741
--- /dev/null
+++ b/Adding_write_to_file.md
@@ -0,0 +1,14 @@
+# Changes made 2-5-2021
+
+An extra script added to the following scripts, to redirect a copy of the output to a file:
+
+```
+- A Basic Query
+
+- A Filtered Query
+
+- Complex Filters
+
+- Basic Troubleshooting
+```
+Also the ```.PY``` files in ```scripts``` folder were updated accordingly.
diff --git a/Data_Portal_UG.yml b/Data_Portal_UG.yml
index b74dd45d9..7cab3b7e4 100644
--- a/Data_Portal_UG.yml
+++ b/Data_Portal_UG.yml
@@ -9,18 +9,22 @@ copyright: "© 2015-2016"
theme_dir: theme
pages:
- Data Portal:
- - Getting Started: 'Data_Portal/Users_Guide/Getting_Started.md'
+ - Getting Started: 'Data_Portal/Users_Guide/getting_started.md'
+ - Quick Start: 'Data_Portal/Users_Guide/quick_start.md'
+ - Cohort Builder: 'Data_Portal/Users_Guide/cohort_builder.md'
+ - Analysis Center: 'Data_Portal/Users_Guide/analysis_center.md'
+ - Repository: 'Data_Portal/Users_Guide/Repository.md'
- Projects: 'Data_Portal/Users_Guide/Projects.md'
- - Exploration: Data_Portal/Users_Guide/Exploration.md
- - Repository: Data_Portal/Users_Guide/Repository.md
- - Genes and Mutations: Data_Portal/Users_Guide/Genes_and_Mutations.md
- - Custom Set Analysis: Data_Portal/Users_Guide/Custom_Set_Analysis.md
- - Annotations: 'Data_Portal/Users_Guide/Annotations.md'
- - Advanced Search: 'Data_Portal/Users_Guide/Advanced_Search.md'
- - Authentication: 'Data_Portal/Users_Guide/Authentication.md'
- - File Cart: 'Data_Portal/Users_Guide/Cart.md'
- - Image Viewer: 'Data_Portal/Users_Guide/Image_viewer.md'
- - Legacy Archive: 'Data_Portal/Users_Guide/Legacy_Archive.md'
+ - BAM Slicing: 'Data_Portal/Users_Guide/BAMslicing.md'
+ - Clinical Data Analysis: 'Data_Portal/Users_Guide/clinical_data_analysis.md'
+ - Cohort Comparison: 'Data_Portal/Users_Guide/cohort_comparison.md'
+ - Gene Expression Clustering: 'Data_Portal/Users_Guide/gene_expression_clustering.md'
+ - Mutation Frequency: 'Data_Portal/Users_Guide/mutation_frequency.md'
+ - OncoMatrix: 'Data_Portal/Users_Guide/oncomatrix.md'
+ - ProteinPaint: 'Data_Portal/Users_Guide/proteinpaint_lollipop.md'
+ - Sequence Reads: 'Data_Portal/Users_Guide/proteinpaint_bam.md'
+ - Set Operations: 'Data_Portal/Users_Guide/set_operations.md'
+ - For Developers: 'Data_Portal/Users_Guide/Developers_Guide.md'
- Release Notes: 'Data_Portal/Release_Notes/Data_Portal_Release_Notes.md'
extra:
project_root_dir: '/'
diff --git a/Data_Portal_V1_UG.yml b/Data_Portal_V1_UG.yml
new file mode 100644
index 000000000..46e8e8061
--- /dev/null
+++ b/Data_Portal_V1_UG.yml
@@ -0,0 +1,27 @@
+#
+# GDC Docs - Config
+#
+
+site_name: GDC Docs
+site_url: http://docs.gdc.cancer.gov
+repo_url: https://github.com/NCI-GDC/gdc-docs
+copyright: "© 2015-2016"
+theme_dir: theme
+pages:
+- Data Portal v1.0:
+ - Getting Started: 'Data_Portal_V1/Users_Guide/Getting_Started.md'
+ - Projects: 'Data_Portal_V1/Users_Guide/Projects.md'
+ - Exploration: Data_Portal_V1/Users_Guide/Exploration.md
+ - Repository: Data_Portal_V1/Users_Guide/Repository.md
+ - Genes and Mutations: Data_Portal_V1/Users_Guide/Genes_and_Mutations.md
+ - Annotations: 'Data_Portal_V1/Users_Guide/Annotations.md'
+ - Advanced Search: 'Data_Portal_V1/Users_Guide/Advanced_Search.md'
+ - Authentication: 'Data_Portal_V1/Users_Guide/Authentication.md'
+ - File Cart: 'Data_Portal_V1/Users_Guide/Cart.md'
+ - Image Viewer: 'Data_Portal_V1/Users_Guide/Image_viewer.md'
+ - Release Notes: 'Data_Portal_V1/Release_Notes/Data_Portal_V1_Release_Notes.md'
+extra:
+ project_root_dir: '/'
+ project_org: 'GDC'
+ project_description: 'GDC Docs'
+ version: 1.0
diff --git a/Data_Submission_Portal_UG.yml b/Data_Submission_Portal_UG.yml
index b00ffc3d2..0f9f5f4c5 100644
--- a/Data_Submission_Portal_UG.yml
+++ b/Data_Submission_Portal_UG.yml
@@ -9,19 +9,13 @@ copyright: "© 2015-2016"
theme_dir: theme
pages:
- Data Submission Portal:
- - Getting Started: 'Data_Submission_Portal/Users_Guide/Getting_Started.md'
- - Submission Workflow: 'Data_Submission_Portal/Users_Guide/Submission_Workflow.md'
- - Authentication: 'Data_Submission_Portal/Users_Guide/Authentication.md'
- - Homepage: 'Data_Submission_Portal/Users_Guide/Homepage.md'
- - Dashboard: 'Data_Submission_Portal/Users_Guide/Dashboard.md'
- - Upload Data: 'Data_Submission_Portal/Users_Guide/Data_Upload_UG.md'
- - Submit Data: 'Data_Submission_Portal/Users_Guide/Submit_Data.md'
- - Release Data: 'Data_Submission_Portal/Users_Guide/Release_Data.md'
- - Transactions: 'Data_Submission_Portal/Users_Guide/Transactions.md'
- - Browse Data: 'Data_Submission_Portal/Users_Guide/Browse_Data.md'
- - Pre-Release Data Review: 'Data_Submission_Portal/Users_Guide/Pre_Release_QC.md'
- - Best Practices: 'Data_Submission_Portal/Users_Guide/Best_Practices.md'
+ - Before Submitting Data to the GDC Portal: 'Data_Submission_Portal/Users_Guide/Checklist.md'
+ - Data Submission Overview: 'Data_Submission_Portal/Users_Guide/Data_Submission_Overview.md'
+ - Data Submission Portal: 'Data_Submission_Portal/Users_Guide/Data_Submission_Process.md'
+ - Data Upload Walkthrough: 'Data_Submission_Portal/Users_Guide/Data_Submission_Walkthrough.md'
+ - Pre-Release Data Portal: 'Data_Submission_Portal/Users_Guide/Pre_Release_QC.md'
- Release Notes: 'Data_Submission_Portal/Release_Notes/Data_Submission_Portal_Release_Notes.md'
+
extra:
project_root_dir: '/'
project_org: 'GDC'
diff --git a/Data_Transfer_Tool_UG.yml b/Data_Transfer_Tool_UG.yml
index d8e7a47f4..a474a63af 100644
--- a/Data_Transfer_Tool_UG.yml
+++ b/Data_Transfer_Tool_UG.yml
@@ -10,12 +10,10 @@ theme_dir: theme
pages:
- Data Transfer Tool:
- Getting Started: 'Data_Transfer_Tool/Users_Guide/Getting_Started.md'
- - Accessing Built-in Help: 'Data_Transfer_Tool/Users_Guide/Accessing_Built-in_Help.md'
- Preparing for Data Download and Upload: 'Data_Transfer_Tool/Users_Guide/Preparing_for_Data_Download_and_Upload.md'
- - Data Download and Upload - Command Line: 'Data_Transfer_Tool/Users_Guide/Data_Download_and_Upload.md'
- - Data Download - UI: 'Data_Transfer_Tool/Users_Guide/Data_Download_Upload_UI.md'
- - Key Terms: 'Data_Transfer_Tool/Users_Guide/Appendix_A_-_Key_Terms.md'
+ - Data Transfer Tool Command Line Documentation: 'Data_Transfer_Tool/Users_Guide/Data_Download_and_Upload.md'
- Release Notes - Command Line: 'Data_Transfer_Tool/Release_Notes/DTT_Release_Notes.md'
+ - Data Transfer Tool UI Documentation: 'Data_Transfer_Tool/Users_Guide/Data_Download_DTT_UI.md'
- Release Notes - UI: 'Data_Transfer_Tool/Release_Notes/DTT_UI_Release_Notes.md'
extra:
diff --git a/Data_UG.yml b/Data_UG.yml
index 449054797..58e0bfbe8 100644
--- a/Data_UG.yml
+++ b/Data_UG.yml
@@ -18,7 +18,9 @@ pages:
- "Bioinformatics Pipeline: mRNA Analysis": 'Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline.md'
- "Bioinformatics Pipeline: miRNA Analysis": 'Data/Bioinformatics_Pipelines/miRNA_Pipeline.md'
- "Bioinformatics Pipeline: Copy Number Variation": 'Data/Bioinformatics_Pipelines/CNV_Pipeline.md'
- - "Bioinformatics Pipeline: Methylation Liftover": 'Data/Bioinformatics_Pipelines/Methylation_LO_Pipeline.md'
+ - "Bioinformatics Pipeline: Methylation Analysis": 'Data/Bioinformatics_Pipelines/Methylation_Pipeline.md'
+ - "Bioinformatics Pipeline: Protein Expression": 'Data/Bioinformatics_Pipelines/RPPA_intro.md'
+ - Aligned Reads Summary Metrics: 'Data/Bioinformatics_Pipelines/Aligned_reads_summary_metrics.md'
- Release Notes: 'Data/Release_Notes/Data_Release_Notes.md'
extra:
project_root_dir: '/'
diff --git a/Notebooks/API_April_2021/Webinar_April_2021.ipynb b/Notebooks/API_April_2021/Webinar_April_2021.ipynb
new file mode 100644
index 000000000..c9f46b508
--- /dev/null
+++ b/Notebooks/API_April_2021/Webinar_April_2021.ipynb
@@ -0,0 +1,953 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# GDC April 2021 Webinar: Using the GDC API\n",
+ "\n",
+ "### Monday, April 26, 2021
2:00 PM - 3:00 PM (EST)
Bill Wysocki, Lead for GDC User Services
University of Chicago\n",
+ "\n",
+ "## Table of Contents\n",
+ "\n",
+ "- [API User's Guide and Other Helpful Links](#links)\n",
+ "- [Notebook Overview](#overview)\n",
+ " - [About this notebook](#about_notebook)\n",
+ " - [Using the Python requests package and interpreting request reponse messages](#requests_package)\n",
+ "- [GDC API Overview](#api_overview)\n",
+ " - [GDC API Format](#api_format)\n",
+ " \n",
+ "- [Using the GDC API to Query Data in GDC](#query_data)\n",
+ " - [Search and Retrieval Endpoints Examples](#search_retrieve)\n",
+ " - [Data Analysis Endpoints Examples](#analysis)\n",
+ "- [Using the GDC API to Submit Data to GDC](#submit)\n",
+ "\n",
+ "## API User's Guide and Other Helpful Links\n",
+ "\n",
+ "[GDC API User's Guide](https://docs.gdc.cancer.gov/API/Users_Guide/Getting_Started/)\n",
+ "\n",
+ "[GDC Support Website](https://gdc.cancer.gov/support)\n",
+ "\n",
+ "support@nci-gdc.datacommons.io - GDC Helpdesk E-mail\n",
+ "\n",
+ "[Requests Python Package User's Guide](https://2.python-requests.org/en/master/)\n",
+ "\n",
+ "[Python Documentation Website](https://www.python.org)\n",
+ "\n",
+ "[Jupyter Notebook Documentation](https://jupyter.org/documentation)\n",
+ "\n",
+ "# Notebook Overview\n",
+ "\n",
+ "\n",
+ "### About this notebook\n",
+ "\n",
+ "- This notebook serves to be a resource for GDC users to familiarize themselves with GDC API endpoints and allow users to edit and create custom queries \"in-place\" with provided template functions or submission tasks\n",
+ "- The provided functional templates can facilitate downstream data analyses and visualizations within the Jupyter Notebook interface and other Python packages\n",
+ "- Commands and functions in this notebook will rely on the following Python packages:\n",
+ " - `requests` - if not already installed on your system, can install with command `pip install requests` from command line or using a new code cell in this notebook\n",
+ " - `json` - part of Python standard library, should already be installed on system\n",
+ " - `urllib` - part of Python standard library, should already be installed on system\n",
+ "- To execute code in a code cell, press either 'Cmd + Enter' or 'Control + Enter' depending on operating system and keyboard layout\n",
+ "- If using notebook to aid in submission requests, will need to download token file from the [GDC Submission Portal](https://docs.gdc.cancer.gov/Data_Submission_Portal/Users_Guide/Data_Submission_Process/#authentication)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#import packages to use in this notebook\n",
+ "\n",
+ "import requests\n",
+ "import json\n",
+ "import urllib"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Using the Python `requests` package and interpreting request reponse messages\n",
+ "\n",
+ "- The `requests` package allows users to communicate with the GDC API to make standard `POST`, `PUT`, `GET` and `DELETE` HTTP methods\n",
+ "- Need to specify request method as part of function (i.e. `request.get()` for `GET` method, `request.post()` for `POST` method etc.)\n",
+ "- When making a request with `requests` package, can save results of request as variable, i.e.:\n",
+ " - `response = requests.get(url)`\n",
+ "- Example `GET` request:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "response = requests.get('https://api.gdc.cancer.gov/cases?filters=%7B%22op%22%3A%20%22%3D%22%2C%20%22content%22%3A%20%7B%22field%22%3A%20%22cases.project.program.name%22%2C%20%22value%22%3A%20%22TCGA%22%7D%7D&fields=submitter_slide_ids&size=1&format=json&pretty=true')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- When returning the contents of the `response` variable, will only return HTTP status code of request, such as `` or ``; need to specify `response.text` method to get return message or data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "response"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "json.loads(response.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Typically, successful responses begin with `'2'`, like `200` or `201` and unsuccessful requests begin with `'4'`, like `400` (bad request) or `403` ('forbidden' error, result of bad or insufficient credentials)\n",
+ "- A list and accompanying explanations of HTTP status codes can be [found here](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# GDC API Overview\n",
+ "\n",
+ "\n",
+ "- The GDC Application Programming Interface (API) is the external facing REpresentational State Transfer (REST) interface for the GDC\n",
+ "- The GDC API supports user interactions with the GDC Submission and Data Portals, as well as provides developers with a programmatic interface to query and download GDC data, metadata and annotations and submit data to the GDC.\n",
+ "- The [GDC Data Transfer Tool](https://gdc.cancer.gov/access-data/gdc-data-transfer-tool) client also relies on the GDC API for user authentication, reading manifests, and for download and upload features\n",
+ "\n",
+ "\n",
+ "### GDC API Format\n",
+ "\n",
+ "- The HTTP URL that corresponds to the GDC API is: https://api.gdc.cancer.gov/\n",
+ "- GDC API format for search and retrieval use is: API_URL + ENDPOINT + QUERY_PARAMETERS\n",
+ "- In order to utilize the GDC API, calls to specific API 'endpoints' for a given query need to be made, i.e. for retrieving data about cases in the GDC, will make calls to `cases` endpoint, https://api.gdc.cancer.gov/cases/\n",
+ "- For search and retrieval API calls, query parameters can be included, such as filters on endpoint fields, and the fields parameter to specify fields to return from query\n",
+ " - List of all indexed data fields to use specify as filters or fields for search and retrieval endpoints can be found at https://docs.gdc.cancer.gov/API/Users_Guide/Appendix_A_Available_Fields/\n",
+ " - Can also view available fields for both Search and Retrieval and Data Analysis endpoints by [using the `_mapping` endpoint for a given endpoint](https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#_mapping-endpoint) or at the corresponding pages at the [GDC API Documentation site](https://docs.gdc.cancer.gov/API/Users_Guide/Getting_Started/)\n",
+ " - Formatting parameters can be specified such as format (TSV or JSON format) and size (number of hits to return)\n",
+ "- For submitting data using the GDC API, the format for using the GDC API Submission endpoint uses the project ID: https://api.gdc.cancer.gov/program_name/project_code, i.e. https://api.gdc.cancer.gov/submission/TCGA/LUAD or https://api.gdc.cancer.gov/submission/CPTAC/3 \n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Using the GDC API to Query Data in GDC\n",
+ "\n",
+ "### Overview\n",
+ "\n",
+ "- Submitters can make use of several GDC API endpoints to retrieve various data indexed in the GDC API, including biospecimen, clinical and annotation metadata\n",
+ "- The HTTP `GET` method will be used to retrieve data\n",
+ "- Additional parameters can be specified to tailor the returned data, such as number of returned entries and filters on data at endpoint\n",
+ "- Data can be retrieved in `JSON` or `TSV` format by specifying in the request the format desired (see below)\n",
+ "- Additional features and more information regarding using the GDC API can be found at this link: https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/\n",
+ "\n",
+ "\n",
+ "\n",
+ "### Endpoints\n",
+ "\n",
+ "There are two 'types' of endpoints that can be used to query data in the GDC:\n",
+ "\n",
+ "\n",
+ "[GDC Search and Retrieval Endpoints](https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#endpoints) - includes endpoints that index project, file and case information, including clinical and biospecimen metadata, as well as file version and history\n",
+ "\n",
+ "[GDC Analysis Endpoints](https://docs.gdc.cancer.gov/API/Users_Guide/Data_Analysis/) - endpoints that are used by the GDC data analysis, visualization and exploration (DAVE) tools in the Exploration tab of the GDC Data Portal to access indexed data including gene, mutation, copy number variation and survival data. \n",
+ "\n",
+ "\n",
+ "### Steps\n",
+ "\n",
+ "1. Specify and percent-encode `filters`\n",
+ "2. Specify `fields` to be returned\n",
+ "3. Specify additional parameters (`size`, `format` of results etc.)\n",
+ "3. Concatenate parameters to build query url\n",
+ "4. Submit query and save response text to file\n",
+ "\n",
+ "Note: specifying parameters are optional; not specifying `filters` will return all instances at a given endpoint, and not specifying `fields` will return all fields at endpoint, while other parameters will be set to default value (i.e. `size` = 10, `format` = JSON)\n",
+ "\n",
+ "### Template queryBuilder() function\n",
+ "\n",
+ "- `GET` requests can be built as a URL with the endpoint and other parameters specified using a Python function\n",
+ "- In notebook, need to first run code for queryBuilder() function to instantiate the function\n",
+ "- Parameters must be passed into parantheses in the order that they are specified in the function\n",
+ "- To specify default parameters, users can simply input two quotation marks, i.e. `''`, for a given variable when using the queryBuilder() function\n",
+ "- Users can edit the template queryBuilder() function to build url request for querying data in GDC API to include other parameters, such as `facets`, `expand`, `from` (pagination) and `sort`: \n",
+ "https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#request-parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#format is specified as 'frmat' in function as format is an already declared object in python [the format() function]\n",
+ "\n",
+ "def queryBuilder(endpoint, filters, fields, size, frmat):\n",
+ " api_url = 'https://api.gdc.cancer.gov/'\n",
+ " \n",
+ " if frmat.lower() == 'json':\n",
+ " request_query = api_url + endpoint + '?filters=' + filters + '&fields=' + fields + '&size=' + size + '&format=' + frmat + '&pretty=true'\n",
+ " else:\n",
+ " request_query = api_url + endpoint + '?filters=' + filters + '&fields=' + fields + '&size=' + size + '&format=' + frmat\n",
+ " return request_query"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Templates for query `filters`\n",
+ "\n",
+ "- `Filters` are used to specify which hits to return from an endpoint, such as cases of a certain project or files from a certain workflow\n",
+ "- Filters need to be created in JSON format that then will need to be [percent-encoded]() to be sent in the URL request (can use the `urllib` Python package for percent-endcoded formatting)\n",
+ "- JSON filters use [operators](https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#filters-specifying-the-query) to specify relationships between a field and their possible values\n",
+ "- For a given endpoint, need to use indexed fields at that endpoint\n",
+ " - For Search and Retrieval endpoints, can reference [Appendix A at GDC API Documentation site](https://docs.gdc.cancer.gov/API/Users_Guide/Appendix_A_Available_Fields/)\n",
+ " - Can also view available fields for both Search and Retrieval and Data Analysis endpoints by [using the `_mapping` endpoint for a given endpoint](https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#_mapping-endpoint)\n",
+ "- Specifying no filters will return all instances for a given endpoint (default)\n",
+ "- Below are several examples users can edit to build filters for a `GET` request"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#one filter applied to endpoint\n",
+ "\n",
+ "#one filter \n",
+ "one_filter = {\n",
+ " \"op\":\"=\",\n",
+ " \"content\":{\n",
+ " \"field\": \"cases.project.project_id\", \n",
+ " \"value\": \"TCGA-BRCA\"\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#combination of two filters applied to endpoint, i.e. (x AND/OR y) must be met\n",
+ "\n",
+ "combination_two = {\n",
+ " \"op\" : \"and\",\n",
+ " \"content\":[{\n",
+ " \"op\":\"=\",\n",
+ " \"content\":{\n",
+ " \"field\": \"cases.project.project_id\", \n",
+ " \"value\": \"TCGA-BRCA\"\n",
+ " }\n",
+ " }, \n",
+ " {\n",
+ " \"op\":\"=\", \n",
+ " \"content\":{\n",
+ " \"field\":\"cases.disease_type\",\n",
+ " \"value\": \"ductal and lobular neoplasms\"\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#combination of three filters applied to endpoint, i.e. (x AND/OR y AND/OR z) must be met\n",
+ "\n",
+ "combination_three = {\n",
+ " \"op\" : \"and\",\n",
+ " \"content\":[{\n",
+ " \"op\":\"=\",\n",
+ " \"content\":{\n",
+ " \"field\": \"cases.project.project_id\", \n",
+ " \"value\": \"TCGA-BRCA\"\n",
+ " }\n",
+ " }, \n",
+ " {\n",
+ " \"op\":\"=\", \n",
+ " \"content\":{\n",
+ " \"field\":\"cases.disease_type\",\n",
+ " \"value\": \"ductal and lobular neoplasms\"\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " \"op\":\">\", \n",
+ " \"content\":{\n",
+ " \"field\":\"diagnoses.age_at_diagnosis\",\n",
+ " \"value\": \"15000\"\n",
+ " }\n",
+ " }\n",
+ " \n",
+ " ]\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#complex combination of three filters applied to endpoint, i.e. (x AND/OR [y AND/OR z]) must be met\n",
+ "\n",
+ "combination_three_2 = {\n",
+ " \"op\": \"and\",\n",
+ " \"content\": [{\n",
+ " \"op\": \"=\",\n",
+ " \"content\": {\n",
+ " \"field\": \"cases.project.project_id\",\n",
+ " \"value\": \"TCGA-BRCA\"\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " \"op\": \"or\",\n",
+ " \"content\": [{\n",
+ " \"op\": \"=\",\n",
+ " \"content\": {\n",
+ " \"field\": \"cases.disease_type\",\n",
+ " \"value\": \"cystic, mucinous and serious neoplasms\"\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " \"op\": \"=\",\n",
+ " \"content\": {\n",
+ " \"field\": \"cases.disease_type\",\n",
+ " \"value\": \"ductal and lobular neoplasms\"\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ " }\n",
+ " ]\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Template commands for formatting filter parameters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#percent encoding of filters\n",
+ "json_string=str(json.dumps(one_filter)) #replace one_filter with input filter variable here\n",
+ "example_filter = urllib.parse.quote(json_string.encode('utf-8'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Template for formatting `fields` to be returned by query\n",
+ "\n",
+ "- The `fields` parameter is passed to the API request URL as a comma-delimited list of fields to be returned\n",
+ "- For a given endpoint, can only specify indexed fields at that endpoint\n",
+ " - For Search and Retrieval endpoints, can reference [Appendix A at GDC API Documentation site](https://docs.gdc.cancer.gov/API/Users_Guide/Appendix_A_Available_Fields/)\n",
+ " - Can also view available fields for both Search and Retrieval and Data Analysis endpoints by [using the `_mapping` endpoint for a given endpoint](https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/#_mapping-endpoint)\n",
+ "- Specifying no fields will return all available fields for entities that match `filters` for a given endpoint (default)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#specify fields to be returned\n",
+ "example_fields = \",\".join([\n",
+ " \"submitter_id\",\n",
+ " \"disease_type\",\n",
+ " \"samples.submitter_id\",\n",
+ " \"samples.sample_type\", \n",
+ " \"samples.tissue_type\",\n",
+ " \"diagnoses.age_at_diagnosis\"\n",
+ "])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Template API `GET` Request "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#build API query: queryBuilder(endpoint, filters, fields, size, frmat)\n",
+ "\n",
+ "#to specify no filters and/or no fields to return, replace variable with ''\n",
+ "\n",
+ "template_request = queryBuilder('cases', example_filter, example_fields, '11315', \"json\")\n",
+ "\n",
+ "template_request"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### Note: You can also copy and paste formatted request URL into browser url bar to return results in browser"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#send request\n",
+ "result = requests.get(template_request)\n",
+ "\n",
+ "#write request results to file, edit file name and type \n",
+ "with open(\"ffpe.json\", \"w+\") as output: \n",
+ " output.write(result.text)\n",
+ "output.close()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Search and Retrieval Endpoints Examples\n",
+ "\n",
+ "### Example 1: Retrieve case barcode, sample type and primary diagnosis data for DNA-seq files in TCGA-BRCA project\n",
+ "\n",
+ "- For this example, we would like to retrieve whether BAM files in the TCGA-BRCA project are for normal or tumor samples, as well as what disease cases were diagnosed as\n",
+ "- Use 'files' endpoint, as this endpoint contains metadata related to files in the GDC (such as experimental strategy and data category)\n",
+ "- Need to filter down to files that are of the data category \"sequencing reads\" and experimental strategy type \"WXS\" (whole exome) to filter out other categories (like copy number variation, gene expression) and other experimental stragies (like RNA-Seq). "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#step 1: specify and encode filters\n",
+ "\n",
+ "filters = {\n",
+ " \"op\" : \"and\",\n",
+ " \"content\":[{\n",
+ " \"op\":\"=\",\n",
+ " \"content\":{\n",
+ " \"field\": \"cases.project.project_id\", \n",
+ " \"value\": \"TCGA-BRCA\"\n",
+ " }\n",
+ " }, \n",
+ " {\n",
+ " \"op\":\"=\", \n",
+ " \"content\":{\n",
+ " \"field\":\"files.data_category\",\n",
+ " \"value\": \"sequencing reads\"\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " \"op\":\"=\", \n",
+ " \"content\":{\n",
+ " \"field\":\"files.experimental_strategy\",\n",
+ " \"value\": \"WXS\"\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " \"op\":\"=\", \n",
+ " \"content\":{\n",
+ " \"field\":\"files.data_format\",\n",
+ " \"value\": \"BAM\"\n",
+ " }\n",
+ " }\n",
+ " \n",
+ " ]\n",
+ "}\n",
+ "\n",
+ "json_string=str(json.dumps(filters))\n",
+ "filters_format = urllib.parse.quote(json_string.encode('utf-8'))\n",
+ "\n",
+ "#step 2: specify fields to be returned\n",
+ "fields = \",\".join([\n",
+ " \"cases.submitter_id\",\n",
+ " \"file_name\",\n",
+ " \"cases.samples.sample_type\",\n",
+ " \"cases.diagnoses.primary_diagnosis\"\n",
+ "])\n",
+ "\n",
+ "#step 3+4: specify size=1 and format=tsv, build query url with 'files' endpoint\n",
+ "brca_request = queryBuilder('files', filters_format, fields, '1', \"tsv\")\n",
+ "\n",
+ "#step 5: send request\n",
+ "brca_result = requests.get(brca_request)\n",
+ "\n",
+ "print(brca_result.text)\n",
+ "brca_request"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Example 2: Retrieve FFPE data for samples and portions for TCGA projects\n",
+ "\n",
+ "- In this example, we will retrieve whether case samples and portions taken from cases in TCGA projects were Formalin-Fixed Paraffin-Embedded (FFPE) specimens or not\n",
+ "- Use the 'cases' endpoint, as this endpoint contains biospecimen and clinical information related to cases and samples in the GDC"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "#step 1: specify and encode filters\n",
+ "filters = {\n",
+ " \"op\":\"=\",\n",
+ " \"content\":{\n",
+ " \"field\": \"cases.project.program.name\", \n",
+ " \"value\": \"TCGA\"\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "json_string=str(json.dumps(filters))\n",
+ "filters_format = urllib.parse.quote(json_string.encode('utf-8'))\n",
+ "\n",
+ "#step 2: specify fields to be returned\n",
+ "fields = \",\".join([\n",
+ " \"submitter_id\",\n",
+ " \"samples.submitter_id\",\n",
+ " \"samples.is_ffpe\",\n",
+ " \"samples.portions.submitter_id\",\n",
+ " \"samples.portions.is_ffpe\"\n",
+ "])\n",
+ "\n",
+ "#step 3+4: specify size=1 and format=json, build query url with 'cases' endpoint\n",
+ "ffpe_request = queryBuilder('cases', filters_format, fields, '1', \"json\")\n",
+ "\n",
+ "#step 5: send request\n",
+ "ffpe_result = requests.get(ffpe_request)\n",
+ "\n",
+ "print(ffpe_result.text)\n",
+ "ffpe_request"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Example 3: Age at Diagnosis, Days to Death after Diagnosis, Vital Status and other clinical data for cases in TCGA-KIRC project\n",
+ "\n",
+ "- In this example, we will retrieve age, survival and other clinical data for cases in the TCGA-KIRC project\n",
+ "- Use the 'cases' endpoint, as this endpoint contains biospecimen and clinical information related to cases and samples in the GDC\n",
+ "- Results will only show data for `demographic.days_to_death` if case is deceased"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#step 1: specify and encode filters\n",
+ "filters = {\n",
+ " \"op\":\"=\",\n",
+ " \"content\":{\n",
+ " \"field\": \"cases.project.project_id\", \n",
+ " \"value\": \"TCGA-KIRC\"\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "json_string=str(json.dumps(filters))\n",
+ "filters_format = urllib.parse.quote(json_string.encode('utf-8'))\n",
+ "\n",
+ "#step 2: specify fields to be returned\n",
+ "fields = \",\".join([\n",
+ " \"submitter_id\",\n",
+ " \"diagnoses.age_at_diagnosis\",\n",
+ " \"demographic.days_to_death\",\n",
+ " \"demographic.vital_status\", \n",
+ " \"demographic.ethnicity\",\n",
+ " \"demographic.race\",\n",
+ " \"demographic.gender\"\n",
+ "])\n",
+ "\n",
+ "#step 3+4: specify size=2 and format=tsv, build query url with 'cases' endpoint\n",
+ "age_request = queryBuilder('cases', filters_format, fields, '2', \"tsv\")\n",
+ "\n",
+ "#step 5: send request\n",
+ "age_result = requests.get(age_request)\n",
+ "\n",
+ "print(age_result.text)\n",
+ "age_request"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Analysis Endpoints Examples\n",
+ "\n",
+ "### Example 4: Gene information\n",
+ "\n",
+ "- In this example, we will retrieve gene IDs and positions of genes present on chromosome 8 of the human genome\n",
+ "- Use the 'genes' endpoint, as this endpoint contains gene information indexed in the GDC API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#step 1: specify and encode filters\n",
+ "filters = {\n",
+ " \"op\":\"=\",\n",
+ " \"content\":{\n",
+ " \"field\": \"gene_chromosome\", \n",
+ " \"value\": \"8\"\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "json_string=str(json.dumps(filters))\n",
+ "filters_format = urllib.parse.quote(json_string.encode('utf-8'))\n",
+ "\n",
+ "#step 2: specify fields to be returned\n",
+ "fields = \",\".join([\n",
+ " \"id\",\n",
+ " \"symbol\",\n",
+ " \"gene_start\",\n",
+ " \"gene_end\"\n",
+ "])\n",
+ "\n",
+ "#step 3+4: specify size=10 and format=tsv, build query url with 'genes' endpoint\n",
+ "genes_request = queryBuilder('genes', filters_format, fields, '10', \"tsv\")\n",
+ "\n",
+ "#step 5: send request\n",
+ "genes_result = requests.get(genes_request)\n",
+ "\n",
+ "print(genes_result.text)\n",
+ "genes_request"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Can use gene_id to also query individual information about the gene in question from genes endpoint as well\n",
+ "#by appending the gene_id at the end of the 'genes' endpoint and specifying parameters\n",
+ "\n",
+ "individual_gene_request = requests.get('https://api.gdc.cancer.gov/genes/ENSG00000160948?pretty=true')\n",
+ "\n",
+ "print(individual_gene_request.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Example 5: Simple Somatic Mutation Information \n",
+ "\n",
+ "- In this example, we will retrieve information on a specific mutation using its COSMIC ID\n",
+ "- Use the 'ssms' endpoint, as this endpoint contains mutation information indexed in the GDC API"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#step 1: specify and encode filters\n",
+ "filters = {\n",
+ " \"op\":\"in\",\n",
+ " \"content\":{\n",
+ " \"field\":\"cosmic_id\",\n",
+ " \"value\":[\n",
+ " \"COSM4860838\"\n",
+ " ]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "json_string=str(json.dumps(filters))\n",
+ "filters_format = urllib.parse.quote(json_string.encode('utf-8'))\n",
+ "\n",
+ "#step 2: specify all fields to be returned (default = \"\")\n",
+ "fields = \",\".join([\n",
+ " \"\"\n",
+ "])\n",
+ "\n",
+ "#step 3+4: specify size=1 and format=json, build query url with 'ssms' endpoint\n",
+ "mutation_request = queryBuilder('ssms', filters_format, fields, '1', \"json\")\n",
+ "\n",
+ "#step 5: send request\n",
+ "mutation_result = requests.get(mutation_request)\n",
+ "\n",
+ "print(mutation_result.text)\n",
+ "mutation_request"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Example 6: Compare survival data for TCGA-SKCM cases with and without the `chr7:g.140753336A>T` mutation \n",
+ "\n",
+ "- For this example we wish to use the survival analysis endpoint to compare two survival plots for TCGA-SKCM cases: one plot with cases having the `chr7:g.140753336A>T`, and the other plot for cases with out the mutation. \n",
+ "- Can retrieve the `ssm_id` for a mutation from the [GDC Data Portal > Exploration](https://portal.gdc.cancer.gov/exploration) tab. \n",
+ "- The API query will also print the results of a chi-squared analysis between the two subsets of cases\n",
+ " - Note that results of chi-square test are dependent on number of cases returned for each plot (`size` parameter); to choose all cases, use total number of cases in project for `size` parameter"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#step 1: specify and encode filters\n",
+ "filters = [ \n",
+ " { \n",
+ " \"op\":\"and\",\n",
+ " \"content\":[ \n",
+ " { \n",
+ " \"op\":\"=\",\n",
+ " \"content\":{ \n",
+ " \"field\":\"cases.project.project_id\",\n",
+ " \"value\":\"TCGA-SKCM\"\n",
+ " }\n",
+ " },\n",
+ " { \n",
+ " \"op\":\"=\",\n",
+ " \"content\":{ \n",
+ " \"field\":\"gene.ssm.ssm_id\",\n",
+ " \"value\":\"84aef48f-31e6-52e4-8e05-7d5b9ab15087\"\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ " },\n",
+ " { \n",
+ " \"op\":\"and\",\n",
+ " \"content\":[ \n",
+ " { \n",
+ " \"op\":\"=\",\n",
+ " \"content\":{ \n",
+ " \"field\":\"cases.project.project_id\",\n",
+ " \"value\":\"TCGA-SKCM\"\n",
+ " }\n",
+ " },\n",
+ " { \n",
+ " \"op\":\"excludeifany\",\n",
+ " \"content\":{ \n",
+ " \"field\":\"gene.ssm.ssm_id\",\n",
+ " \"value\":\"84aef48f-31e6-52e4-8e05-7d5b9ab15087\"\n",
+ " }\n",
+ " }\n",
+ " ]\n",
+ " }\n",
+ "]\n",
+ "\n",
+ "json_string=str(json.dumps(filters))\n",
+ "filters_format = urllib.parse.quote(json_string.encode('utf-8'))\n",
+ "\n",
+ "#step 2: specify that all fields to be returned (default = \"\") \n",
+ "fields = \",\".join([\n",
+ " \"\"\n",
+ "])\n",
+ "\n",
+ "#step 3+4: specify size=10 and format=JSON, build query url with 'analysis/survival' endpoint,\n",
+ "survival_request = queryBuilder('analysis/survival', filters_format, \"\", '10', \"JSON\")\n",
+ "\n",
+ "#step 5: send request\n",
+ "survival_result = requests.get(survival_request)\n",
+ "\n",
+ "#print(survival_result.text)\n",
+ "survival_request"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#can parse out the chi-squared test from results\n",
+ "#by loading results as a JSON object and selecting\n",
+ "#overallStats from results\n",
+ "\n",
+ "json.loads(survival_result.text)['overallStats']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Using the GDC API to Submit Data to GDC\n",
+ "\n",
+ "### Overview\n",
+ "\n",
+ "- For projects that have been approved to be included in the GDC, submitters can make use of the `submission` GDC API endpoint to submit node entities to submission projects\n",
+ "- Submission will require a token downloaded from the [GDC Submission Portal](https://docs.gdc.cancer.gov/Data_Submission_Portal/Users_Guide/Data_Submission_Process/#authentication)\n",
+ "- Data can be submitted in `JSON` or `TSV` format; depending on the data format, users will need to edit the `\"Content-Type\"` in the request command (see below)\n",
+ "- Additionally, `JSON` and `TSV` templates for nodes to be submitted can be downloaded from the GDC Data Dictionary Viewer webpage: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?_top=1\n",
+ "- Submittable files (such as FASTQ or BAM files) should be uploaded with the [GDC Data Transfer Tool](https://gdc.cancer.gov/access-data/gdc-data-transfer-tool)\n",
+ "- Additional features and more information regarding submission using the GDC API can be found here: https://docs.gdc.cancer.gov/API/Users_Guide/Submission/ \n",
+ "- [Strategies for Submitting in Bulk](https://docs.gdc.cancer.gov/Data_Submission_Portal/Users_Guide/Data_Submission_Walkthrough/#strategies-for-submitting-in-bulk)\n",
+ "\n",
+ "### Endpoint\n",
+ "\n",
+ "- The format for using the GDC API Submission endpoint uses the project information, i.e. `https://api.gdc.cancer.gov/submission//`\n",
+ "- For example: https://api.gdc.cancer.gov/submission/TCGA/LUAD or https://api.gdc.cancer.gov/submission/CPTAC/3 \n",
+ "\n",
+ "### Steps\n",
+ "\n",
+ "1. Read in token file\n",
+ "2. Read in submission file\n",
+ "3. Edit endpoint with project ID information and submit data using `POST` (JSON file submission) or `PUT` (TSV file submission) request\n",
+ "\n",
+ "### Example 7: Submitting a JSON Data File"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#1. Read in token file\n",
+ "\n",
+ "token = open(\"../gdc-user-token.txt\").read().strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#2. Read in submission file\n",
+ "\n",
+ "example_file_json = json.load(open(\"example_file.json\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#3. Edit endpoint and submit data using POST request\n",
+ "\n",
+ "ENDPT = \"https://api.gdc.cancer.gov/submission/GDC/INTERNAL\"\n",
+ "\n",
+ "#submission request if data is in JSON format\n",
+ "response = requests.post(url = ENDPT, json = example_file_json, headers={'X-Auth-Token': token, \"Content-Type\": \"application/json\"})\n",
+ "print(response.text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Example 8: Submitting a TSV Data File"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#1. Read in token file\n",
+ "\n",
+ "token = open(\"../gdc-user-token.txt\").read().strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#2. Read in submission file\n",
+ "\n",
+ "example_file_tsv = open(\"example_file.txt\", \"rb\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#3. Edit endpoint and submit data using PUT request\n",
+ "\n",
+ "ENDPT = \"https://api.gdc.cancer.gov/submission/GDC/INTERNAL/_dry_run\"\n",
+ "\n",
+ "#submission request if data is in TSV format\n",
+ "res = requests.put(url = ENDPT, data = example_file_tsv, headers={'X-Auth-Token': token, \"Content-Type\": \"text/tsv\"})\n",
+ "\n",
+ "res.text"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Notebooks/API_April_2021/example_file.json b/Notebooks/API_April_2021/example_file.json
new file mode 100644
index 000000000..bad702575
--- /dev/null
+++ b/Notebooks/API_April_2021/example_file.json
@@ -0,0 +1,5 @@
+{
+"type":"case",
+"projects":{"code":"INTERNAL"},
+"submitter_id":"GDC-INTERNAL-000099"
+}
diff --git a/Notebooks/API_April_2021/example_file.txt b/Notebooks/API_April_2021/example_file.txt
new file mode 100644
index 000000000..4bcb3d614
--- /dev/null
+++ b/Notebooks/API_April_2021/example_file.txt
@@ -0,0 +1,2 @@
+type submitter_id projects.code
+case GDC-INTERNAL-000098 INTERNAL
diff --git a/Notebooks/Submission_June_2021/SAR.json b/Notebooks/Submission_June_2021/SAR.json
new file mode 100644
index 000000000..24c7a1e79
--- /dev/null
+++ b/Notebooks/Submission_June_2021/SAR.json
@@ -0,0 +1,12 @@
+{
+"type": "submitted_aligned_reads",
+"submitter_id": "BAM_file_internal_",
+"read_groups" : [{"submitter_id":"RG_INTERNAL_2"}],
+"file_name":"WXS-file.bam",
+"md5sum":"c2d1f25918af7609975370320632f24a",
+"file_size": 20,
+"data_category":"Sequencing Reads",
+"data_type": "Aligned Reads",
+"data_format":"BAM",
+"experimental_strategy":"WXS"
+}
diff --git a/Notebooks/Submission_June_2021/WXS-file.bam b/Notebooks/Submission_June_2021/WXS-file.bam
new file mode 100644
index 000000000..b3108f263
--- /dev/null
+++ b/Notebooks/Submission_June_2021/WXS-file.bam
@@ -0,0 +1 @@
+not a real BAM file
diff --git a/Notebooks/Submission_June_2021/Webinar_June_2021.ipynb b/Notebooks/Submission_June_2021/Webinar_June_2021.ipynb
new file mode 100644
index 000000000..1fa0b5637
--- /dev/null
+++ b/Notebooks/Submission_June_2021/Webinar_June_2021.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# GDC June 2021 Webinar: GDC Data Submission Overview\n",
+ "\n",
+ "### Monday, June 28, 2021
2:00 PM - 3:00 PM (EST)
Bill Wysocki, Lead for GDC User Services
University of Chicago\n",
+ "\n",
+ "\n",
+ "# Notebook Overview\n",
+ "\n",
+ "\n",
+ "### About this notebook\n",
+ "\n",
+ "- This notebook functions as a step-by-step set of instructions to submit a BAM file to the GDC using Python. Submitters who have a completely empty project or who have just started submitting with python might find this useful.\n",
+ "\n",
+ "- Commands and functions in this notebook will rely on the following Python packages:\n",
+ " - `requests` - if not already installed on your system, can install with command `pip install requests` from command line or using a new code cell in this notebook\n",
+ " - `json` - part of Python standard library, should already be installed on system\n",
+ "- To execute code in a code cell, press either 'Cmd + Enter' or 'Control + Enter' depending on operating system and keyboard layout\n",
+ "- A token file will need to be downloaded from the [GDC Submission Portal](https://docs.gdc.cancer.gov/Data_Submission_Portal/Users_Guide/Data_Submission_Process/#authentication)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Overview\n",
+ "\n",
+ "- For projects that have been approved to be included in the GDC, submitters can make use of the `submission` GDC API endpoint to submit node entities to submission projects\n",
+ "- Submission will require a token downloaded from the [GDC Submission Portal](https://docs.gdc.cancer.gov/Data_Submission_Portal/Users_Guide/Data_Submission_Process/#authentication)\n",
+ "- Data can be submitted in `JSON` or `TSV` format; depending on the data format, users will need to edit the `\"Content-Type\"` in the request command (see below)\n",
+ "- Additionally, `JSON` and `TSV` templates for nodes to be submitted can be downloaded from the GDC Data Dictionary Viewer webpage: https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?_top=1\n",
+ "- Submittable files (such as FASTQ or BAM files) should be uploaded with the [GDC Data Transfer Tool](https://gdc.cancer.gov/access-data/gdc-data-transfer-tool)\n",
+ "- Additional features and more information regarding submission using the GDC API can be found here: https://docs.gdc.cancer.gov/API/Users_Guide/Submission/ \n",
+ "- [Strategies for Submitting in Bulk](https://docs.gdc.cancer.gov/Data_Submission_Portal/Users_Guide/Data_Submission_Walkthrough/#strategies-for-submitting-in-bulk)\n",
+ "\n",
+ "### Endpoint\n",
+ "\n",
+ "- The format for using the GDC API Submission endpoint uses the project information, i.e. `https://api.gdc.cancer.gov/submission//`\n",
+ "- For example: https://api.gdc.cancer.gov/submission/TCGA/LUAD or https://api.gdc.cancer.gov/submission/CPTAC/3 \n",
+ "\n",
+ "### Steps\n",
+ "\n",
+ "1. Read in token file\n",
+ "2. Read in submission file\n",
+ "3. Edit endpoint with project ID information and submit data using `POST` (JSON file submission) or `PUT` (TSV file submission) request\n",
+ "\n",
+ "### 1. Submitting a Case (JSON)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#1. Import Python packages and read in token file\n",
+ "\n",
+ "import json\n",
+ "import requests\n",
+ "\n",
+ "token = open(\"../gdc-user-token.txt\").read().strip()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#2. Read in submission file\n",
+ "\n",
+ "case_json = json.load(open(\"case.json\"))\n",
+ "\n",
+ "print(json.dumps(case_json, indent=4))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#3. Edit endpoint and submit data using PUT request\n",
+ "\n",
+ "ENDPT = \"https://api.gdc.cancer.gov/submission/GDC/INTERNAL/_dry_run\"\n",
+ "\n",
+ "#submission request if data is in JSON format\n",
+ "response = requests.put(url = ENDPT, json = case_json, headers={'X-Auth-Token': token, \"Content-Type\": \"application/json\"})\n",
+ "print(json.dumps(json.loads(response.text), indent = 4))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 2: Submitting a Sample"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "#1. Read in submission file\n",
+ "\n",
+ "sample_tsv = open(\"sample.tsv\", \"rb\")\n",
+ "sample_tsv_display = open(\"sample.tsv\", \"r\")\n",
+ "\n",
+ "for x in sample_tsv_display.readlines():\n",
+ " print(x.strip().split(\"\\t\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#2. Edit endpoint and submit data using PUT request\n",
+ "\n",
+ "ENDPT = \"https://api.gdc.cancer.gov/submission/GDC/INTERNAL/\"\n",
+ "\n",
+ "#submission request if data is in TSV format\n",
+ "response = requests.put(url = ENDPT, data = sample_tsv, headers={'X-Auth-Token': token, \"Content-Type\": \"text/tsv\"})\n",
+ "\n",
+ "print(json.dumps(json.loads(response.text), indent = 4))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3: Submitting the Aliquot and Read_Group"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#1. Read in submission file\n",
+ "\n",
+ "aliquot_rg_json = json.load(open(\"aliquot_readgroup.json\"))\n",
+ "\n",
+ "print(json.dumps(aliquot_rg_json, indent=4))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#2. Submit data using PUT request\n",
+ "\n",
+ "ENDPT = \"https://api.gdc.cancer.gov/submission/GDC/INTERNAL\"\n",
+ "\n",
+ "#submission request if data is in JSON format\n",
+ "response = requests.put(url = ENDPT, json = aliquot_rg_json, headers={'X-Auth-Token': token, \"Content-Type\": \"application/json\"})\n",
+ "print(json.dumps(json.loads(response.text), indent = 4))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4: Register the Submitted Aligned Reads File"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#1. Read in submission file\n",
+ "\n",
+ "sar_json = json.load(open(\"SAR.json\"))\n",
+ "\n",
+ "print(json.dumps(sar_json, indent=4))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#2. Submit data using PUT request\n",
+ "\n",
+ "ENDPT = \"https://api.gdc.cancer.gov/submission/GDC/INTERNAL\"\n",
+ "\n",
+ "#submission request if data is in JSON format\n",
+ "response = requests.put(url = ENDPT, json = sar_json, headers={'X-Auth-Token': token, \"Content-Type\": \"application/json\"})\n",
+ "print(json.dumps(json.loads(response.text), indent = 4))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5: Upload the Submitted Aligned Reads Data File Using Data Transfer Tool\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "## ./gdc-client upload -t token_file.txt"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Notebooks/Submission_June_2021/aliquot_readgroup.json b/Notebooks/Submission_June_2021/aliquot_readgroup.json
new file mode 100644
index 000000000..16948c3cd
--- /dev/null
+++ b/Notebooks/Submission_June_2021/aliquot_readgroup.json
@@ -0,0 +1,22 @@
+[
+{
+"type":"aliquot",
+"submitter_id":"ALIQUOT_INTERNAL_2",
+"samples":{"submitter_id":"SAMPLE_INTERNAL_2"}
+},
+{
+"type":"read_group",
+"submitter_id":"RG_INTERNAL_2",
+"aliquots":{"submitter_id": "ALIQUOT_INTERNAL_2"},
+"experiment_name": "INTERNAL",
+"library_name" : "INTERNAL",
+"is_paired_end": true,
+"library_selection": "PCR",
+"library_strategy":"WXS",
+"platform": "Illumina",
+"read_group_name": "Read_group_1",
+"read_length": 100,
+"sequencing_center": "UChicago",
+"target_capture_kit": "SeqCap EZ Human Exome v2.0"
+}
+]
diff --git a/Notebooks/Submission_June_2021/case.json b/Notebooks/Submission_June_2021/case.json
new file mode 100644
index 000000000..4e1eb9344
--- /dev/null
+++ b/Notebooks/Submission_June_2021/case.json
@@ -0,0 +1,5 @@
+{
+"type":"case",
+"projects":{"code":"INTERNAL1"},
+"submitter_id":"GDC-INTERNAL-000097"
+}
diff --git a/Notebooks/Submission_June_2021/sample.tsv b/Notebooks/Submission_June_2021/sample.tsv
new file mode 100644
index 000000000..97d0da777
--- /dev/null
+++ b/Notebooks/Submission_June_2021/sample.tsv
@@ -0,0 +1,2 @@
+type submitter_id cases.submitter_id sample_type tissue_type
+sample SAMPLE_INTERNAL_2 GDC-INTERNAL-000097 Primary Tumor Genomic Tumor
diff --git a/README.md b/README.md
index 132ed9c3f..e82cab829 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,97 @@

-# THIS PROJECT HAS MOVED
+# GDC Open Source code
-Please join us at https://github.com/NCI-GDC/gdc-docs
+=======
+GDC is Open Source, Github Repositories containing source code of GDC Applications can be found on [GDC GitHub Organization page](https://github.com/NCI-GDC/).
+- GDC Data Portal: https://github.com/NCI-GDC/portal-ui
+- GDC Legacy Archive: https://github.com/NCI-GDC/portal-ui-legacy
+- GDC Data Transfer Tool: https://github.com/NCI-GDC/gdc-client
+- GDC Data Dictionary: https://github.com/NCI-GDC/gdcdictionary
+- GDC Data Model: https://github.com/NCI-GDC/gdcdatamodel
+- GDC Psqlgraph: https://github.com/NCI-GDC/psqlgraph
+
+# Support
+
+Please direct technical questions to [GDC Support](https://gdc.cancer.gov/support).
+
+# GDC Documentation Site
+
+### Technology
+
+ - Python 2.6, 2.7, 3.3, 3.4 and 3.5.
+ - [mkdocs](http://www.mkdocs.org/)
+ - [BSCodeTabs for mkdocs](https://github.com/mikecules/MarkdownBSCodeTabs#for-use-in-mkdocs)
+
+### Install & Run
+
+(Optional) Set up virtualenv:
+
+- [Install virtualenv](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/)
+- `python -m virtualenv venv`
+- `source venv/bin/activate`
+- Run the installation commands below
+- To leave the virtual environment: `deactivate`
+
+Install GDC-docs:
+
+ - `pip install -r requirements.txt`
+ - `mkdocs serve` (optionally set port `--dev-addr=0.0.0.0:`)
+
+### Build
+
+ - `mkdocs build --clean`
+
+### Repository Conventions
+
+- All Shared content in the "Commons" directory
+- One Directory per GDC product (API, Data_Portal, Data_Submission_Portal, Data_Transfer_Tool)
+- Each GDC product have a Users_Guide and Release_Notes directory
+
+### Linking
+
+To another documentation page
+```
+[Authentication and Authorization](../../Commons/Authentication.md)
+```
+
+Inside another documentation page
+
+```
+[Authentication and Authorization](../../Commons/Authentication.md#internal-section)
+```
+
+### Adding icons and PDFs
+The convention for this, when updating mkdocs.yml is the following:
+- : 'index.md'
+example:
+- fa-file-pdf-o Download PDF /API/PDF/API_UG.pdf: 'index.md'
+
+### Documentation Conventions
+
+A detailed list of all conventions is available on [GDC Website](https://gdc.cancer.gov/conventions-page)
+
+
+### Build PDF
+
+Install mkdocs2pandoc, following instructions available here:
+```
+https://github.com/jgrassler/mkdocs-pandoc
+```
+
+Prepare a yml file dedicated to your Userguide, using Data_Portal_UG.yml as an example.
+
+Run the following commands to:
+* Convert the User Guide to Pandoc:
+* Tweak the pandoc file
+* Build a PDF
+
+```
+mkdocs2pandoc -f Data_Portal_UG.yml -o docs/Data_Portal/PDF/Data_portal_UG.pd
+sed -i -e 's/# / /g' docs/Data_Portal/PDF/Data_portal_UG.pd
+sed -i -e 's/### /## /g' docs/Data_Portal/PDF/Data_portal_UG.pd
+sed -i -e 's/\/site\//\/docs\//g' docs/Data_Portal/PDF/Data_portal_UG.pd
+pandoc --toc -V documentclass=report -V geometry:"top=2cm, bottom=1.5cm, left=1cm, right=1cm" -f markdown+grid_tables+table_captions -o docs/Data_Portal/PDF/Data_portal_UG.pdf docs/Data_Portal/PDF/Data_portal_UG.pd
+```
diff --git a/docs/API/Release_Notes/API_Release_Notes.md b/docs/API/Release_Notes/API_Release_Notes.md
index 232ccf13c..ec0d2d490 100644
--- a/docs/API/Release_Notes/API_Release_Notes.md
+++ b/docs/API/Release_Notes/API_Release_Notes.md
@@ -3,6 +3,21 @@
| Version | Date |
|---|---|
+| [v4.0.0](API_Release_Notes.md#v400) | July 31, 2023 |
+| [v3.28.0](API_Release_Notes.md#v3280) | May 11, 2023 |
+| [v3.5.0](API_Release_Notes.md#v350) | July 8, 2022 |
+| [v3.3.0](API_Release_Notes.md#v330) | May 17, 2021 |
+| [v3.0.0](API_Release_Notes.md#v300) | August 14, 2020 |
+| [v2.1.2](API_Release_Notes.md#v212) | May 7, 2020 |
+| [v2.1.0](API_Release_Notes.md#v210) | March 10, 2020 |
+| [v2.0.0](API_Release_Notes.md#v200) | January 30, 2020 |
+| [v1.23.0](API_Release_Notes.md#v1230) | November 6, 2019 |
+| [v1.22.0](API_Release_Notes.md#v1220) | July 31, 2019 |
+| [v1.21.0](API_Release_Notes.md#v1210) | June 5, 2019 |
+| [v1.20.0](API_Release_Notes.md#v1200) | April 17, 2019 |
+| [v1.19.0](API_Release_Notes.md#v1190) | February 20, 2019 |
+| [v1.18.0](API_Release_Notes.md#v1180) | December 18, 2018 |
+| [v1.17.0](API_Release_Notes.md#v1170) | November 7, 2018 |
| [v1.16.0](API_Release_Notes.md#v1160) | September 27, 2018 |
| [v1.15.0](API_Release_Notes.md#v1150) | August 23, 2018 |
| [v1.14.1](API_Release_Notes.md#v1141) | May 21, 2018 |
@@ -21,6 +36,349 @@
| [v1.1.0](API_Release_Notes.md#v110) | May 25, 2016 |
| [v1.0.1](API_Release_Notes.md#v101) | May 16, 2016 |
+## v4.0.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: July 31, 2023
+
+### New Features and Changes
+
+* The GDC API no longer supports download of Legacy Archive files.
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+## v3.28.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: May 11, 2023
+
+### New Features and Changes
+
+* The `/legacy/files` endpoint has been deprecated and is no longer available.
+* Features to support Data Portal bugfixes were added.
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+## v3.5.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: July 8, 2022
+
+### New Features and Changes
+
+* Features to support Data Portal performance were added.
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+## v3.3.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: May 17, 2021
+
+### New Features and Changes
+
+* Features to better support batch tracking for submitted data were added.
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+## v3.0.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: August 14, 2020
+
+### New Features and Changes
+
+* Enhancements were made to increase performance.
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+## v2.1.2
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: May 7, 2020
+
+### New Features and Changes
+
+* An update to improve usability in the homepage quicksearch
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+## v2.1.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: March 10, 2020
+
+### New Features and Changes
+
+* New data dictionary changes.
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+## v2.0.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: January 30, 2020
+
+### New Features and Changes
+
+* API code now uses Python 3.
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+## v1.23.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: November 6, 2019
+
+### New Features and Changes
+
+* QC Tests added for Submission
+* BAM slicing is now supported for unmapped reads
+* API now includes data from molecular_test and follow_up nodes. This impacts what is displayed on the GDC Data Portal
+* Better handling of concurrent transactions
+* CIViC annotations now included on the ssms endpoint. You can read more about CIViC annotations [here](https://civicdb.org/home)
+
+### Bugs Fixed Since Last Release
+
+* Fixed API memory leak
+* Fixed data offset issue returned by clinical.tar endpoint
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+
+## v1.22.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: July 31, 2019
+
+### New Features and Changes
+
+* Updated BCR Clinical XML parser code to support future indexing of additional clinical data. Parser code can be found [here](https://github.com/NCI-GDC/gdcdatamodel/tree/develop/gdcdatamodel/xml_mappings)
+
+ - Added a mapping for:
+ - `ann_arbor_b_symptoms`
+ - `ann_arbor_extranodal_involvement`
+ - `ajcc_pathologic_t`
+ - `ajcc_pathologic_n`
+ - `ajcc_pathologic_m`
+ - `ajcc_clinical_t`
+ - `ajcc_clinical_n`
+ - `ajcc_clinical_m`
+ - `ajcc_staging_system_edition`
+ - `figo_stage`
+ - `ajcc_clinical_stage`
+ - `primary_gleason_grade`
+ - `secondary_gleason_graade`
+ - `igcccg_stage`
+ - `masaoka_stage`
+
+ - Updated the mapping for:
+ - `primary_diagnosis`, `morphology`, `tissue_or_organ_of_origin`, and `site_of_resection_or_biopsy`
+
+### Bugs Fixed Since Last Release
+
+* Fixed a bug preventing multipart uploads
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+## v1.21.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: June 5, 2019
+
+### New Features and Changes
+
+* Mutation indexer update to accommodate data model changes
+* Updates to when in the release cycle downloaders and submitters have access to files
+
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+## v1.20.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: April 17, 2019
+
+### New Features and Changes
+
+* Disallowed creation of multiple file versions in the same data release
+* Improved API concurrency performance and addressed random failures/4XX/5XX responses
+* Improved API/Portal performances for querying large sets of data from the Elasticsearch indices
+* Updated BCR Clinical XML parser code to support future indexing of additional clinical data. Parser code can be found [here](https://github.com/NCI-GDC/gdcdatamodel/tree/develop/gdcdatamodel/xml_mappings)
+ - Updated the mapping for:
+ - `days_to_death`
+ - `days_to_birth`
+ - `vital_status`
+ - `prior_malignancy` and `synchronous_malignancy`
+ - Added a mapping for:
+ - `pack_years_smoked`
+ - `prior_treatment`
+ - `age_at_index`
+ - `days_to_diagnosis`
+ - `icd_10_code`
+ - `year_of_diagnosis`
+ - Remove calculation for:
+ - `cigarettes_per_day`
+ - `year_of_birth`
+ - `year_of_death`
+ - `bmi`
+ - Updated XML Parser to pull the most Up-to-Date Survival Information
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+
+## v1.19.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: February 20, 2019
+
+### New Features and Changes
+
+* Added API features to support controlled access DAVE
+* Updated API query endpoints to handle filtering of queries based on tokens
+* Created login notification Endpoint
+* Added hashing and logging for similar ES queries
+
+### Bugs Fixed Since Last Release
+
+* Fixed bug where quick search ES query grows with each request
+* Fixed bug where new file versions could be created when exactly the same existing metadata is uploaded
+* Fixed bug where submitting to specific projects produced error that data already existed
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+## v1.18.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: December 18, 2018
+
+### New Features and Changes
+
+* Update to auth for GDC Pre-Release Data Portal
+
+### Bugs Fixed Since Last Release
+
+* None
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+
+## v1.17.0
+
+* __GDC Product__: Application Programming Interface (API)
+* __Release Date__: November 7, 2018
+
+### New Features and Changes
+
+* Created new index cnv_centric
+* Created new index cnv_occurrence_centric
+* Created new REST API endpoints for CNV
+* Created mapping from aliquot to case for occurrence on cnv_centric
+* Created new graphql endpoints for CNV
+* Updated index case_centric to add cnv
+* Updated index gene_centric to add cnv
+
+### Bugs Fixed Since Last Release
+
+* Fixed bug to prevent users from deleting files in state submitted or released
+
+### Known Issues and Workarounds
+
+* Fields are not counted as missing if parent field is also missing. This may occur with queries of nested fields in the Data Portal Advanced Search or an API query using a filter. This behavior could impact results reported using search parameters of "IS MISSING" or "NOT MISSING".
+* Certain very large API requests will time out. It is recommended to break up very large requests into a series of smaller requests.
+
+
+
## v1.16.0
* __GDC Product__: Application Programming Interface (API)
diff --git a/docs/API/Users_Guide/Additional_Examples.md b/docs/API/Users_Guide/Additional_Examples.md
index 86057cbf0..8b852f61b 100644
--- a/docs/API/Users_Guide/Additional_Examples.md
+++ b/docs/API/Users_Guide/Additional_Examples.md
@@ -668,7 +668,7 @@ curl 'https://api.gdc.cancer.gov/cases?filters=%7b%0d%0a+++%22op%22+%3a+%22%3d%2
#### Example: Filter using a range
-This is an example of filtering for age at diagnosis. The request is for cases where the age at diagnosis is between 40 and 70 years. *Note:* `age_at_diagnosis` is expressed in days.
+This is an example of filtering for age at diagnosis. The request is for cases where the age at diagnosis is between 40 and 70 years. >**Note:** `age_at_diagnosis` is expressed in days.
```Filter
{
diff --git a/docs/API/Users_Guide/Appendix_C_Format_of_Submission_Requests_and_Responses.md b/docs/API/Users_Guide/Appendix_C_Format_of_Submission_Requests_and_Responses.md
index e831cc09b..9de5b8060 100644
--- a/docs/API/Users_Guide/Appendix_C_Format_of_Submission_Requests_and_Responses.md
+++ b/docs/API/Users_Guide/Appendix_C_Format_of_Submission_Requests_and_Responses.md
@@ -123,4 +123,4 @@ API responses will contain a status for each entity specified in the request:
**`error`**: The desired transaction was not sucessful, and the transaction was aborted because of this entity. This entity did not pass validation or an internal error occured when attempting to complete the transaction. The error state will be accompanied by a list of errors recorded about the entity (see label-error-messages).
-**Note:** GDC API requests are transactional. An error with processing a node specified in the transaction will abort the transaction and will result in no changes being applied for any node involved in the transaction.
+>**Note:** GDC API requests are transactional. An error with processing a node specified in the transaction will abort the transaction and will result in no changes being applied for any node involved in the transaction.
diff --git a/docs/API/Users_Guide/BAM_Slicing.md b/docs/API/Users_Guide/BAM_Slicing.md
index 4462549c3..56b96f5cf 100644
--- a/docs/API/Users_Guide/BAM_Slicing.md
+++ b/docs/API/Users_Guide/BAM_Slicing.md
@@ -13,7 +13,7 @@ Please note the following:
* The functionality of this API differs from the usual functionality of `samtools` in that alignment records that overlap multiple regions will not be returned multiple times.
* A request with no region or gene specified will return the BAM header, which makes it easy to inspect the references to which the alignment records were aligned.
* A request for regions that are not included in the source BAM is not considered an error, and is treated the same as if no records existed for the region.
-* Examples provided for BAM slicing functionality are intended for use with GDC harmonized data (i.e. BAM files available in the GDC Data Portal). Slicing of unharmonized BAM files (i.e. BAM files in the GDC Legacy Portal) is not supported.
+* Examples provided for BAM slicing functionality are intended for use with GDC harmonized data (i.e. BAM files available in the GDC Data Portal).
* Bam slicing does not create an associated bam index (.bai) file. For applications requiring a .bai file users will need to generate this file from the bam slice using a tool and command such as `samtools index`.
### Query Parameters
@@ -21,11 +21,11 @@ Please note the following:
The following query parameters and JSON fields are supported:
| Description | Query Parameter | JSON Field | Query format |
-|---|---|---|
+|---|---|---|---|
| entire chromosome, or a position or region on the chromosome, specified using chromosomal coordinates | region | regions | region=(:(-)?)? |
-| region specified using a [HGNC](http://www.genenames.org/) / [GENCODE v22](http://www.gencodegenes.org/) gene name | gencode | gencode | gencode= |
+| region specified using a [HGNC](http://www.genenames.org/) / [GENCODE v36](http://www.gencodegenes.org/) gene name | gencode | gencode | gencode= |
-**NOTE:** The successfully sliced BAM will contain all reads that overlap (entirely or partially) with the specified region or gene. It is possible to specify an open-ended region, e.g. `chr2:10000`, which would return all reads that (completely or partially) overlap with the region of chromosome 2 from position 10,000 to the end of the chromosome.
+>**NOTE:** The successfully sliced BAM will contain all reads that overlap (entirely or partially) with the specified region or gene. It is possible to specify an open-ended region, e.g. `chr2:10000`, which would return all reads that (completely or partially) overlap with the region of chromosome 2 from position 10,000 to the end of the chromosome.
### JSON Schema
@@ -61,7 +61,7 @@ The following two requests are examples of BAM slicing using region(s).
token=$(
```
+## Examples: Specifying unmapped reads
+
+Unmapped reads are found in GDC BAM files. You may request these reads by using the following commands.
+
+```GET
+token=$(
+```
+
After downloading, the sliced BAM file can be converted to SAM using the following command if `samtools` is installed on the user's system:
diff --git a/docs/API/Users_Guide/Data_Analysis.md b/docs/API/Users_Guide/Data_Analysis.md
index d987f538c..0b7fa2e4c 100644
--- a/docs/API/Users_Guide/Data_Analysis.md
+++ b/docs/API/Users_Guide/Data_Analysis.md
@@ -1,27 +1,36 @@
# Data Analysis
-The GDC DAVE tools use the same API as the rest of the Data Portal and takes advantage of several new endpoints. Similar to the [GDC Data Portal Exploration](http://docs.gdc.cancer.gov/Data_Portal/Users_Guide/Exploration/) feature, the GDC data analysis endpoints allow API users to programmatically explore data in the GDC using advanced filters at a gene and mutation level. Survival analysis data is also available.
+The GDC data analysis endpoints allow API users to programmatically explore data in the GDC using advanced filters at a gene and mutation level. Survival analysis data is also available.
## Endpoints
The following data analysis endpoints are available from the GDC API:
-| __Endpoint__ | __Description__ |
-|---|---|
-| __/genes__ | Allows users to access summary information about each gene using its Ensembl ID. |
-| __/ssms__ | Allows users to access information about each somatic mutation. For example, a `ssm` would represent the transition of C to T at position 52000 of chromosome 1. |
-| __/ssm_occurrences__ | A `ssm` entity as applied to a single instance (case). An example of a `ssm occurrence` would be that the transition of C to T at position 52000 of chromosome 1 occurred in patient TCGA-XX-XXXX. |
-|__/analysis/top_cases_counts_by_genes__| Returns the number of cases with a mutation in each gene listed in the gene_ids parameter for each project. Note that this endpoint cannot be used with the `format` or `fields` parameters.|
-|__/analysis/top_mutated_genes_by_project__| Returns a list of genes that have the most mutations within a given project. |
-|__/analysis/top_mutated_cases_by_gene__| Generates information about the cases that are most affected by mutations in a given number of genes |
-|__/analysis/mutated_cases_count_by_project__| Returns counts for the number of cases that have associated `ssm` data in each project. The number of affected cases can be found under "case_with_ssm": {"doc_count": $case_count}.|
-|__/analysis/survival__| Survival plots can be generated in the Data Portal for different subsets of data, based upon many query factors such as variants, disease type and projects. This endpoint can be used to programmatically retrieve the raw data to generate these plots and apply different filters to the data. (see Survival Example)|
+|__Node__| __Endpoint__ | __Description__ |
+|---|---|---|
+|__Genes__| __/genes__ | Allows users to access summary information about each gene using its Ensembl ID. |
+|__SSMS__| __/ssms__ | Allows users to access information about each somatic mutation. For example, a `ssm` would represent the transition of C to T at position 52000 of chromosome 1. |
+||__/ssms/``__|Get information about a specific ssm using a ``, often supplemented with the `expand` option to show fields of interest. |
+|| __/ssm_occurrences__ | A `ssm` entity as applied to a single instance (case). An example of a `ssm occurrence` would be that the transition of C to T at position 52000 of chromosome 1 occurred in patient TCGA-XX-XXXX. |
+||__/ssm_occurrences/``__|Get information about a specific ssm occurrence using a ``, often supplemented with the `expand` option to show fields of interest. |
+|__CNVS__|__/cnvs__|Allows users to access data about copy number variations (cnvs). This data will be specifc to cnvs and not a specific case. |
+||__/cnvs/``__|Get information about a specific copy number variation using a ``, often supplemented with the `expand` option to show fields of interest. |
+||__/cnvs/ids__|This endpoint will retrieve nodes that contain the queried cnv_id. This is accomplished by adding the query parameter: /cnvs/ids?query=``.|
+||__/cnv_occurrences__|A `cnv` entity as applied to a single case.|
+||__/cnv_occurrences/``__|Get information about a specific copy number variation occurrence using a ``, often supplemented with the `expand` option to show fields of interest. |
+||__/cnv_occurrences/ids__|This endpoint will retrieve nodes that contain the queried cnv_occurrence_id. This is accomplished by adding the query parameter: /cnv_occurrences/ids?query=``|
+|__Analysis__|__/analysis/top_cases_counts_by_genes__| Returns the number of cases with a mutation in each gene listed in the gene_ids parameter for each project. Note that this endpoint cannot be used with the `format` or `fields` parameters.|
+||__/analysis/top_mutated_genes_by_project__| Returns a list of genes that have the most mutations within a given project. |
+||__/analysis/top_mutated_cases_by_gene__| Generates information about the cases that are most affected by mutations in a given number of genes |
+||__/analysis/mutated_cases_count_by_project__| Returns counts for the number of cases that have associated `ssm` data in each project. The number of affected cases can be found under "case_with_ssm": {"doc_count": $case_count}.|
+||__/analysis/survival__| Survival plots can be generated in the Data Portal for different subsets of data, based upon many query factors such as variants, disease type and projects. This endpoint can be used to programmatically retrieve the raw data to generate these plots and apply different filters to the data. (see Survival Example)|
+
The methods for retrieving information from these endpoints are very similar to those used for the `cases` and `files` endpoints. These methods are explored in depth in the [API Search and Retrieval](https://docs.gdc.cancer.gov/API/Users_Guide/Search_and_Retrieval/) documentation. The `_mapping` parameter can also be used with each of these endpoints to generate a list of potential fields. For example:
`https://api.gdc.cancer.gov/ssms/_mapping`
-Note: While it is not an endpoint, the `observation` entity is featured in the visualization section of the API. The `observation` entity provides information from the MAF file, such as read depth and normal genotype, that supports the validity of the associated `ssm`. An example is demonstrated below:
+While it is not an endpoint, the `observation` entity is featured in the visualization section of the API. The `observation` entity provides information from the MAF file, such as read depth and normal genotype, that supports the validity of the associated `ssm`. An example is demonstrated below:
```Shell
curl "https://api.gdc.cancer.gov/ssms/57bb3f2e-ec05-52c2-ab02-7065b7d24849?expand=occurrence.case.observation.read_depth&pretty=true"
@@ -143,22 +152,22 @@ gene_start gene_end symbol id
## Simple Somatic Mutation Endpoint Examples
-__Example 1__: Similar to the `/genes` endpoint, a user would like to retrieve information about the mutation based on its COSMIC ID. This would be accomplished by creating a JSON filter such as:
+__Example 1__: Similar to the `/genes` endpoint, a user would like to retrieve information about the mutation based on its COSMIC ID. This would be accomplished by creating a JSON filter, which will then be encoded to URL for the `curl` command.
-```Query
+```Filter
{
"op":"in",
"content":{
"field":"cosmic_id",
"value":[
- "COSM4860838"
+ "COSM1135366"
]
}
}
```
```Shell
-curl 'https://api.gdc.cancer.gov/ssms?pretty=true&filters=%7B%0A%22op%22%3A%22in%22%2C%0A%22content%22%3A%7B%0A%22field%22%3A%22cosmic_id%22%2C%0A%22value%22%3A%5B%0A%22COSM4860838%22%0A%5D%0A%7D%0A%7D%0A'
+curl 'https://api.gdc.cancer.gov/ssms?pretty=true&filters=%7B%0A%22op%22%3A%22in%22%2C%0A%22content%22%3A%7B%0A%22field%22%3A%22cosmic_id%22%2C%0A%2value%22%3A%5B%0A%22COSM1135366%22%0A%5D%0A%7D%0A%7D%0A'
```
```Response
@@ -166,52 +175,117 @@ curl 'https://api.gdc.cancer.gov/ssms?pretty=true&filters=%7B%0A%22op%22%3A%22in
"data": {
"hits": [
{
- "ncbi_build": "GRCh38",
- "mutation_type": "Simple Somatic Mutation",
- "mutation_subtype": "Single base substitution",
- "end_position": 62438203,
+ "id": "edd1ae2c-3ca9-52bd-a124-b09ed304fcc2",
+ "start_position": 25245350,
+ "gene_aa_change": [
+ "KRAS G12D"
+ ],
"reference_allele": "C",
- "ssm_id": "8b3c1a7a-e4e0-5200-9d46-5767c2982145",
- "start_position": 62438203,
+ "ncbi_build": "GRCh38",
"cosmic_id": [
- "COSM4860838",
- "COSM731764",
- "COSM731765"
+ "COSM1135366",
+ "COSM521"
],
- "id": "8b3c1a7a-e4e0-5200-9d46-5767c2982145",
+ "mutation_subtype": "Single base substitution",
+ "mutation_type": "Simple Somatic Mutation",
+ "chromosome": "chr12",
+ "ssm_id": "edd1ae2c-3ca9-52bd-a124-b09ed304fcc2",
+ "genomic_dna_change": "chr12:g.25245350C>T",
"tumor_allele": "T",
- "gene_aa_change": [
- "CADPS G1147G",
- "CADPS G1187G",
- "CADPS G1217G",
- "CADPS G1226G",
- "CADPS G127G",
- "CADPS G218G",
- "CADPS G95G"
- ],
- "chromosome": "chr3",
- "genomic_dna_change": "chr3:g.62438203C>T"
+ "end_position": 25245350
}
],
"pagination": {
"count": 1,
- "sort": "",
+ "total": 1,
+ "size": 10,
"from": 0,
+ "sort": "",
"page": 1,
- "total": 1,
- "pages": 1,
- "size": 10
+ "pages": 1
}
},
"warnings": {}
}
```
+__Example 2:__ Based on the previous example's `ssm_id` (`8b3c1a7a-e4e0-5200-9d46-5767c2982145`), a user would like to look at the consequences and the VEP impact due to this ssm.
+
+```Shell
+curl 'https://api.gdc.cancer.gov/ssms/edd1ae2c-3ca9-52bd-a124-b09ed304fcc2?pretty=true&expand=consequence.transcript&fields=consequence.transcript.annotation.vep_impact'
+```
+
+```JSON
+{
+ "data": {
+ "consequence": [
+ {
+ "transcript": {
+ "annotation": {
+ "vep_impact": "MODERATE"
+ },
+ "transcript_id": "ENST00000557334",
+ "aa_end": 12,
+ "consequence_type": "missense_variant",
+ "aa_start": 12,
+ "is_canonical": false,
+ "aa_change": "G12D",
+ "ref_seq_accession": ""
+ }
+ },
+ {
+ "transcript": {
+ "annotation": {
+ "vep_impact": "MODERATE"
+ },
+ "transcript_id": "ENST00000256078",
+ "aa_end": 12,
+ "consequence_type": "missense_variant",
+ "aa_start": 12,
+ "is_canonical": true,
+ "aa_change": "G12D",
+ "ref_seq_accession": "NM_001369786.1&NM_033360.4"
+ }
+ },
+ {
+ "transcript": {
+ "annotation": {
+ "vep_impact": "MODERATE"
+ },
+ "transcript_id": "ENST00000311936",
+ "aa_end": 12,
+ "consequence_type": "missense_variant",
+ "aa_start": 12,
+ "is_canonical": false,
+ "aa_change": "G12D",
+ "ref_seq_accession": "NM_001369787.1&NM_004985.5"
+ }
+ },
+ {
+ "transcript": {
+ "annotation": {
+ "vep_impact": "MODERATE"
+ },
+ "transcript_id": "ENST00000556131",
+ "aa_end": 12,
+ "consequence_type": "missense_variant",
+ "aa_start": 12,
+ "is_canonical": false,
+ "aa_change": "G12D",
+ "ref_seq_accession": ""
+ }
+ }
+ ]
+ },
+ "warnings": {}
+}
+```
+
## Simple Somatic Mutation Occurrence Endpoint Examples
__Example 1:__ A user wants to determine the chromosome in case `TCGA-DU-6407` that contains the greatest number of `ssms`. As this relates to mutations that are observed in a case, the `ssm_occurrences` endpoint is used.
-```
+```Filter
{
"op":"in",
"content":{
@@ -223,721 +297,2405 @@ __Example 1:__ A user wants to determine the chromosome in case `TCGA-DU-6407` t
```Shell
curl "https://api.gdc.cancer.gov/ssm_occurrences?format=tsv&fields=ssm.chromosome&size=5000&filters=%7B%0D%0A%22op%22%3A%22in%22%2C%0D%0A%22content%22%3A%7B%0D%0A%22field%22%3A%22case.submitter_id%22%2C%0D%0A%22value%22%3A%5B%0D%0A%22TCGA-DU-6407%22%0D%0A%5D%0D%0A%7D%0D%0A%7D"
```
-```Response
-ssm.chromosome id
-chr3 552c09d1-69b1-5c04-b543-524a6feae3eb
-chr10 391011ff-c1fd-5e2a-a128-652bc660f64c
-chr10 1378cbc4-af88-55bb-b2e5-185bb4246d7a
-chr10 3a2b3870-a395-5bc3-8c8f-0d40b0f2202c
-chr1 4a93d7a5-988d-5055-80da-999dc3b45d80
-chrX 22a07c7c-16ba-51df-a9a9-1e41e2a45225
-chr12 dbc5eafa-ea26-5f1c-946c-b6974a345b69
-chr11 02ae553d-1f27-565d-96c5-2c3cfca7264a
-chr2 faee73a9-4804-58ea-a91f-18c3d901774f
-chr6 97c5b38b-fc96-57f5-8517-cc702b3aa70a
-chr17 0010a89d-9434-5d97-8672-36ee394767d0
-chr19 f08dcc53-eadc-5ceb-bf31-f6b38629e4cb
-chrX 19ca262d-b354-54a0-b582-c4719e37e91d
-chr19 c44a93a1-5c73-5cff-b40e-98ce7e5fe57b
-chr3 b67f31b5-0341-518e-8fcc-811cd2e36af1
-chr1 94abd5fd-d539-5a4a-8719-9615cf7cec5d
-chr17 1476a543-2951-5ec4-b165-67551b47d810
-chr2 b4822fc9-f0cc-56fd-9d97-f916234e309d
-chr2 3548ecfe-5186-51e7-8f40-37f4654cd260
-chr16 105e7811-4601-5ccb-ae93-e7107923599e
-chr2 99b3aad4-d368-506d-99d6-047cbe5dff0f
-chr13 9dc3f7cd-9efa-530a-8524-30d067e49d54
-chr21 1267330b-ae6d-5e25-b19e-34e98523679e
-chr16 c77f7ce5-fbe6-5da4-9a7b-b528f8e530cb
-chr10 2cb06277-993e-5502-b2c5-263037c45d18
-chr17 d25129ad-3ad7-584f-bdeb-fba5c3881d32
-chr17 a76469cb-973c-5d4d-bf82-7cf4e8f6c129
-chr10 727c9d57-7b74-556f-aa5b-e1ca1f76d119
-chr15 b4a86ffd-e60c-5c9c-aaa1-9e9f02d86116
-chr5 3a023e72-da92-54f7-aa18-502c1076b2b0
+```tsv
+id ssm.chromosome
+105e7811-4601-5ccb-ae93-e7107923599e chr16
+faee73a9-4804-58ea-a91f-18c3d901774f chr2
+99b3aad4-d368-506d-99d6-047cbe5dff0f chr2
+2cb06277-993e-5502-b2c5-263037c45d18 chr10
+f08dcc53-eadc-5ceb-bf31-f6b38629e4cb chr19
+97c5b38b-fc96-57f5-8517-cc702b3aa70a chr6
+19ca262d-b354-54a0-b582-c4719e37e91d chrX
+b4822fc9-f0cc-56fd-9d97-f916234e309d chr2
+22a07c7c-16ba-51df-a9a9-1e41e2a45225 chrX
+0010a89d-9434-5d97-8672-36ee394767d0 chr17
+3a023e72-da92-54f7-aa18-502c1076b2b0 chr5
+391011ff-c1fd-5e2a-a128-652bc660f64c chr10
+3548ecfe-5186-51e7-8f40-37f4654cd260 chr2
+b67f31b5-0341-518e-8fcc-811cd2e36af1 chr3
+4a93d7a5-988d-5055-80da-999dc3b45d80 chr1
+9dc3f7cd-9efa-530a-8524-30d067e49d54 chr13
+552c09d1-69b1-5c04-b543-524a6feae3eb chr3
+dbc5eafa-ea26-5f1c-946c-b6974a345b69 chr12
+d25129ad-3ad7-584f-bdeb-fba5c3881d32 chr17
+1378cbc4-af88-55bb-b2e5-185bb4246d7a chr10
+c44a93a1-5c73-5cff-b40e-98ce7e5fe57b chr19
+1267330b-ae6d-5e25-b19e-34e98523679e chr21
+1476a543-2951-5ec4-b165-67551b47d810 chr17
+727c9d57-7b74-556f-aa5b-e1ca1f76d119 chr10
+94abd5fd-d539-5a4a-8719-9615cf7cec5d chr1
+a76469cb-973c-5d4d-bf82-7cf4e8f6c129 chr17
```
-
-## Analysis Endpoints
-
-In addition to the `ssms`, `ssm_occurrences`, and `genes` endpoints mentioned previously, several `/analysis` endpoints were designed to quickly retrieve specific datasets used for visualization display.
-
-__Example 1:__ The `/analysis/top_cases_counts_by_genes` endpoint gives the number of cases with a mutation in each gene listed in the `gene_ids` parameter for each project. Note that this endpoint cannot be used with the `format` or `fields` parameters. In this instance, the query will produce the number of cases in each projects with mutations in the gene `ENSG00000155657`.
+__Example 2:__ A user has retrieved a `ssm_occurrence`, and would like to determine if that case also has diagnostic information.
```Shell
-curl "https://api.gdc.cancer.gov/analysis/top_cases_counts_by_genes?gene_ids=ENSG00000155657&pretty=true"
+curl 'https://api.gdc.cancer.gov/ssm_occurrences/6fd8527d-5c40-5604-8fa9-0ce798eec231?pretty=true&expand=case.diagnoses'
```
-
-This JSON-formatted output is broken up by project. For an example, see the following text:
-
-```json
- "genes": {
- "my_genes": {
- "gene_id": {
- "buckets": [
- {
- "key": "ENSG00000155657",
- "doc_count": 45
- }
- ],
- "sum_other_doc_count": 0,
- "doc_count_error_upper_bound": 0
- },
- "doc_count": 45
- },
- "doc_count": 12305
- },
- "key": "TCGA-GBM",
- "doc_count": 45
+```Json
+{
+ "data": {
+ "ssm_occurrence_id": "6fd8527d-5c40-5604-8fa9-0ce798eec231",
+ "case": {
+ "diagnoses": [
+ {
+ "ajcc_pathologic_t": "T3b",
+ "synchronous_malignancy": "No",
+ "morphology": "8720/3",
+ "ajcc_pathologic_stage": "Stage IIB",
+ "ajcc_pathologic_n": "N0",
+ "ajcc_pathologic_m": "M0",
+ "submitter_id": "TCGA-Z2-A8RT_diagnosis",
+ "days_to_diagnosis": 0,
+ "last_known_disease_status": "not reported",
+ "tissue_or_organ_of_origin": "Skin, NOS",
+ "days_to_last_follow_up": 839.0,
+ "age_at_diagnosis": 15342,
+ "primary_diagnosis": "Malignant melanoma, NOS",
+ "classification_of_tumor": "not reported",
+ "prior_malignancy": "no",
+ "year_of_diagnosis": 2012,
+ "diagnosis_id": "1d06a202-c51a-52e2-805f-eeb5f7fac14e",
+ "icd_10_code": "C44.6",
+ "site_of_resection_or_biopsy": "Skin of upper limb and shoulder",
+ "prior_treatment": "No",
+ "state": "released",
+ "tumor_grade": "Not Reported",
+ "progression_or_recurrence": "not reported",
+ "ajcc_staging_system_edition": "7th"
}
+ ]
+ }
+ },
+ "warnings": {}
+}
```
-This portion of the output shows TCGA-GBM including 45 cases that have `ssms` in the gene `ENSG00000155657`.
+## Copy Number Variation Endpoint Examples
-__Example 2:__ The following demonstrates a use of the `/analysis/top_mutated_genes_by_project` endpoint. This will output the genes that are mutated in the most cases in "TCGA-DLBC" and will count the mutations that have a `HIGH` or `MODERATE` impact on gene function. Note that the `score` field does not represent the number of mutations in a given gene, but a calculation that is used to determine which genes have the greatest number of unique mutations.
+__Example 1:__ A user is interested in finding the first 30 cnvs found on chromosome 4 that have a cnv loss.
-```json
-{
- "op":"AND",
- "content":[
- {
- "op":"in",
- "content":{
- "field":"case.project.project_id",
- "value":[
- "TCGA-DLBC"
- ]
- }
- },
- {
- "op":"in",
- "content":{
- "field":"case.ssm.consequence.transcript.annotation.impact",
- "value":[
- "HIGH",
- "MODERATE"
- ]
- }
- }
- ]
+```Filter
+{
+ "op": "and",
+ "content": [
+ {
+ "op": "in",
+ "content": {
+ "field": "chromosome",
+ "value": [
+ "4"
+ ]
+ }
+ },
+ {
+ "op": "in",
+ "content": {
+ "field": "cnv_change",
+ "value": [
+ "Loss"
+ ]
+ }
+ }
+ ]
}
```
+
```Shell
-curl "https://api.gdc.cancer.gov/analysis/top_mutated_genes_by_project?fields=gene_id,symbol&filters=%7B%22op%22%3A%22AND%22%2C%22content%22%3A%5B%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22case.project.project_id%22%2C%22value%22%3A%5B%22TCGA-DLBC%22%5D%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22case.ssm.consequence.transcript.annotation.impact%22%2C%22value%22%3A%5B%22HIGH%22%2C%22MODERATE%22%5D%7D%7D%5D%7D&pretty=true"
+curl 'https://api.gdc.cancer.gov/cnvs?filters=%7B%0D%0A+++%22op%22%3A+%22and%22%2C%0D%0A++++%22content%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22chromosome%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%224%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22cnv_change%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%22Loss%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D&size=30&sort=start_position&format=tsv'
```
-```Response
+
+```tsv
+chromosome cnv_change cnv_id end_position gene_level_cn id ncbi_build start_position
+4 Loss 11381600-f064-5c42-90d2-a5c79c8b23e1 88208 True 11381600-f064-5c42-90d2-a5c79c8b23e1 GRCh38 53286
+4 Loss edef0f2f-c1a7-507c-842f-e1f8a568df9d 202303 True edef0f2f-c1a7-507c-842f-e1f8a568df9d GRCh38 124501
+4 Loss eba92f9a-b045-54a8-948a-451e439ed418 305474 True eba92f9a-b045-54a8-948a-451e439ed418 GRCh38 270675
+4 Loss 89319453-2a3f-5ebe-be30-8af0426e0343 384868 True 89319453-2a3f-5ebe-be30-8af0426e0343 GRCh38 337814
+4 Loss 6567929c-4b6f-582b-aedf-acde2c0ec736 499156 True 6567929c-4b6f-582b-aedf-acde2c0ec736 GRCh38 425815
+4 Loss 2daff58b-5065-50cd-8239-253180eaee81 540200 True 2daff58b-5065-50cd-8239-253180eaee81 GRCh38 499210
+4 Loss 2b42c8d4-6d85-5352-96e1-9e52e722c248 576295 True 2b42c8d4-6d85-5352-96e1-9e52e722c248 GRCh38 573880
+4 Loss 2646cdc7-7602-59a4-ae4f-d171352bae88 670782 True 2646cdc7-7602-59a4-ae4f-d171352bae88 GRCh38 625573
+4 Loss c11ad392-949f-593f-a3ab-d834b2f82809 674330 True c11ad392-949f-593f-a3ab-d834b2f82809 GRCh38 672436
+4 Loss f31be658-4de0-549e-81be-e79759879acf 682033 True f31be658-4de0-549e-81be-e79759879acf GRCh38 673580
+4 Loss d72c62f2-fc29-5b83-9839-7f6b03970aff 689271 True d72c62f2-fc29-5b83-9839-7f6b03970aff GRCh38 681829
+4 Loss 45448d47-6e13-5d30-824d-96150a7f55c6 770640 True 45448d47-6e13-5d30-824d-96150a7f55c6 GRCh38 705748
+4 Loss 517e65ea-9084-54c2-abe0-b1b47e9f872c 826129 True 517e65ea-9084-54c2-abe0-b1b47e9f872c GRCh38 784957
+4 Loss b5a09c9b-d842-5b76-a500-56f18252c29d 932373 True b5a09c9b-d842-5b76-a500-56f18252c29d GRCh38 849276
+4 Loss e3a3b61d-2881-5ad4-90bf-58ef29ae9ecb 958656 True e3a3b61d-2881-5ad4-90bf-58ef29ae9ecb GRCh38 932387
+4 Loss 8630a1b6-3215-5b71-903a-ad9845505afc 986895 True 8630a1b6-3215-5b71-903a-ad9845505afc GRCh38 958887
+4 Loss f748b06f-1fb7-53a9-a7d6-2c22a3ae6de5 993440 True f748b06f-1fb7-53a9-a7d6-2c22a3ae6de5 GRCh38 979073
+4 Loss a5e4a63f-c5f6-5f0f-a6b6-f51bfb643533 1004564 True a5e4a63f-c5f6-5f0f-a6b6-f51bfb643533 GRCh38 986997
+4 Loss 73f6fbbe-6fd9-524c-a7c8-a7cf3f08ada4 1026898 True 73f6fbbe-6fd9-524c-a7c8-a7cf3f08ada4 GRCh38 1009936
+4 Loss adad579a-b002-5022-823a-570c59549065 1113564 True adad579a-b002-5022-823a-570c59549065 GRCh38 1056250
+4 Loss d5a5c45e-594b-5cbc-97d5-75fc5155d021 1208962 True d5a5c45e-594b-5cbc-97d5-75fc5155d021 GRCh38 1166932
+4 Loss 6c910993-faa8-5abc-b433-b3afcc5e9e11 1249953 True 6c910993-faa8-5abc-b433-b3afcc5e9e11 GRCh38 1211445
+4 Loss 4453b4cb-7d8a-5e26-a856-eac62eec287a 1340147 True 4453b4cb-7d8a-5e26-a856-eac62eec287a GRCh38 1289887
+4 Loss 6db1001a-a41b-518d-9491-2bf41544d90f 1395989 True 6db1001a-a41b-518d-9491-2bf41544d90f GRCh38 1345691
+4 Loss 6bef981a-ead1-5aa7-8a69-8d38e576e5c0 1406442 True 6bef981a-ead1-5aa7-8a69-8d38e576e5c0 GRCh38 1402932
+4 Loss af6e0b49-922a-587e-b353-4b9414605cf1 1684261 True af6e0b49-922a-587e-b353-4b9414605cf1 GRCh38 1617915
+4 Loss 400352ad-8526-562a-bbf4-29b90a48f46f 1712344 True 400352ad-8526-562a-bbf4-29b90a48f46f GRCh38 1692731
+4 Loss 8811414d-2434-56c6-afe5-a998c9b18d47 1745171 True 8811414d-2434-56c6-afe5-a998c9b18d47 GRCh38 1712891
+4 Loss 169c4409-0256-5841-9314-f1a4dd2bcc38 1721358 True 169c4409-0256-5841-9314-f1a4dd2bcc38 GRCh38 1715952
+4 Loss 1712ccac-6e70-5fb3-b71e-1a029eaf047c 1808872 True 1712ccac-6e70-5fb3-b71e-1a029eaf047c GRCh38 1793293
+```
+
+__Example 2:__ A user wants to determine the location and identity of the gene affected by the cnv `544c4896-0152-5787-8d77-894a16f0ded0`, and determine whether the gene is found within the Cancer Gene Census.
+
+```Shell
+curl 'https://api.gdc.cancer.gov/cnvs/544c4896-0152-5787-8d77-894a16f0ded0?pretty=true&expand=consequence.gene'
+```
+
+```Json
{
"data": {
- "hits": [
- {
- "_score": 14.0,
- "symbol": "IGHV2-70",
- "gene_id": "ENSG00000274576"
- },
- {
- "_score": 14.0,
- "symbol": "IGLV3-1",
- "gene_id": "ENSG00000211673"
- },
- {
- "_score": 14.0,
- "symbol": "IGHM",
- "gene_id": "ENSG00000211899"
- },
- {
- "_score": 11.0,
- "symbol": "KMT2D",
- "gene_id": "ENSG00000167548"
- },
- {
- "_score": 11.0,
- "symbol": "IGLL5",
- "gene_id": "ENSG00000254709"
- },
- {
- "_score": 11.0,
- "symbol": "BTG2",
- "gene_id": "ENSG00000159388"
- },
- {
- "_score": 9.0,
- "symbol": "CARD11",
- "gene_id": "ENSG00000198286"
- },
- {
- "_score": 9.0,
- "symbol": "IGHG1",
- "gene_id": "ENSG00000211896"
- },
- {
- "_score": 9.0,
- "symbol": "IGLC2",
- "gene_id": "ENSG00000211677"
- },
+ "start_position": 27100354,
+ "consequence": [
{
- "_score": 9.0,
- "symbol": "LRP1B",
- "gene_id": "ENSG00000168702"
+ "gene": {
+ "biotype": "protein_coding",
+ "symbol": "HOXA2",
+ "gene_id": "ENSG00000105996"
+ }
}
],
- "pagination": {
- "count": 10,
- "sort": "None",
- "from": 0,
- "page": 1,
- "total": 3214,
- "pages": 322,
- "size": 10
- }
+ "gene_level_cn": true,
+ "cnv_change": "Gain",
+ "ncbi_build": "GRCh38",
+ "chromosome": "7",
+ "cnv_id": "544c4896-0152-5787-8d77-894a16f0ded0",
+ "end_position": 27102686
},
"warnings": {}
}
```
-__Example 3:__ The `/analysis/top_mutated_cases_by_gene` endpoint will generate information about the cases that are most affected by mutations in a given number of genes. Below, the file count for each category is given for the cases most affected by mutations in these 50 genes. The size of the output is limited to two cases with the `size=2` parameter, but a higher value can be set by the user.
+## Copy Number Variation Occurrence Enpoint Examples
-```Shell
-curl "https://api.gdc.cancer.gov/analysis/top_mutated_cases_by_gene?fields=diagnoses.days_to_death,diagnoses.age_at_diagnosis,diagnoses.vital_status,diagnoses.primary_diagnosis,demographic.gender,demographic.race,demographic.ethnicity,case_id,summary.data_categories.file_count,summary.data_categories.data_category&filters=%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22%3D%22%2C%22content%22%3A%7B%22field%22%3A%22cases.project.project_id%22%2C%22value%22%3A%22TCGA-DLBC%22%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22genes.gene_id%22%2C%22value%22%3A%5B%22ENSG00000166710%22%2C%22ENSG00000005339%22%2C%22ENSG00000083857%22%2C%22ENSG00000168769%22%2C%22ENSG00000100906%22%2C%22ENSG00000184677%22%2C%22ENSG00000101680%22%2C%22ENSG00000101266%22%2C%22ENSG00000028277%22%2C%22ENSG00000140968%22%2C%22ENSG00000181827%22%2C%22ENSG00000116815%22%2C%22ENSG00000275221%22%2C%22ENSG00000139083%22%2C%22ENSG00000112851%22%2C%22ENSG00000112697%22%2C%22ENSG00000164134%22%2C%22ENSG00000009413%22%2C%22ENSG00000071626%22%2C%22ENSG00000135407%22%2C%22ENSG00000101825%22%2C%22ENSG00000104814%22%2C%22ENSG00000166415%22%2C%22ENSG00000142867%22%2C%22ENSG00000254585%22%2C%22ENSG00000139718%22%2C%22ENSG00000077721%22%2C%22ENSG00000130294%22%2C%22ENSG00000117245%22%2C%22ENSG00000117318%22%2C%22ENSG00000270550%22%2C%22ENSG00000163637%22%2C%22ENSG00000166575%22%2C%22ENSG00000065526%22%2C%22ENSG00000156453%22%2C%22ENSG00000128191%22%2C%22ENSG00000055609%22%2C%22ENSG00000204469%22%2C%22ENSG00000187605%22%2C%22ENSG00000185875%22%2C%22ENSG00000110888%22%2C%22ENSG00000007341%22%2C%22ENSG00000173198%22%2C%22ENSG00000115568%22%2C%22ENSG00000163714%22%2C%22ENSG00000125772%22%2C%22ENSG00000080815%22%2C%22ENSG00000189079%22%2C%22ENSG00000120837%22%2C%22ENSG00000143951%22%5D%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22ssms.consequence.transcript.annotation.impact%22%2C%22value%22%3A%5B%22HIGH%22%5D%7D%7D%5D%7D&pretty=true&size=2"
-```
-```Response
+__Example 1:__ A user is interested in finding cases that have both cnv and ssm data for females diagnosed with Squamous Cell Neoplasms and have a cnv gain change on chromosome 9. It is important to note that for a case like this, where multiple arguments are need for one filtered field, it is easier for the API to have multiple filters for the same field, `case.available_variation_data` in this example, than having one filter with multiple arguments.
+
+```Filter
{
- "data": {
- "hits": [
- {
- "_score": 7.0,
- "diagnoses": [
- {
- "days_to_death": null,
- "vital_status": "alive",
- "age_at_diagnosis": 18691,
- "primary_diagnosis": "c83.3"
- }
- ],
- "case_id": "eda9496e-be80-4a13-bf06-89f0cc9e937f",
- "demographic": {
- "gender": "male",
- "race": "white",
- "ethnicity": "hispanic or latino"
+ "op": "and",
+ "content": [
+ {
+ "op": "in",
+ "content": {
+ "field": "cnv.cnv_change",
+ "value": [
+ "Gain"
+ ]
+ }
},
- "summary": {
- "data_categories": [
- {
- "file_count": 1,
- "data_category": "DNA Methylation"
- },
- {
- "file_count": 5,
- "data_category": "Transcriptome Profiling"
- },
- {
- "file_count": 1,
- "data_category": "Biospecimen"
- },
- {
- "file_count": 16,
- "data_category": "Simple Nucleotide Variation"
- },
- {
- "file_count": 1,
- "data_category": "Clinical"
- },
- {
- "file_count": 4,
- "data_category": "Copy Number Variation"
- },
- {
- "file_count": 4,
- "data_category": "Raw Sequencing Data"
+ {
+ "op": "in",
+ "content": {
+ "field": "case.demographic.gender",
+ "value": [
+ "female"
+ ]
}
- ]
- }
- },
- {
- "_score": 4.0,
- "diagnoses": [
- {
- "days_to_death": null,
- "vital_status": "alive",
- "age_at_diagnosis": 27468,
- "primary_diagnosis": "c83.3"
- }
- ],
- "case_id": "a43e5f0e-a21f-48d8-97e0-084d413680b7",
- "demographic": {
- "gender": "male",
- "race": "white",
- "ethnicity": "not hispanic or latino"
},
- "summary": {
- "data_categories": [
- {
- "file_count": 1,
- "data_category": "DNA Methylation"
- },
- {
- "file_count": 5,
- "data_category": "Transcriptome Profiling"
- },
- {
- "file_count": 1,
- "data_category": "Biospecimen"
- },
- {
- "file_count": 16,
- "data_category": "Simple Nucleotide Variation"
- },
- {
- "file_count": 1,
- "data_category": "Clinical"
- },
- {
- "file_count": 4,
- "data_category": "Copy Number Variation"
- },
- {
- "file_count": 4,
- "data_category": "Raw Sequencing Data"
+ {
+ "op": "in",
+ "content": {
+ "field": "case.available_variation_data",
+ "value": [
+ "cnv"
+ ]
+ }
+ },
+ {
+ "op": "in",
+ "content": {
+ "field": "case.available_variation_data",
+ "value": [
+ "ssm"
+ ]
+ }
+ },
+ {
+ "op": "in",
+ "content": {
+ "field": "cnv.chromosome",
+ "value": [
+ "9"
+ ]
+ }
+ },
+ {
+ "op": "in",
+ "content": {
+ "field": "case.disease_type",
+ "value": [
+ "Squamous Cell Neoplasms"
+ ]
}
- ]
}
- }
- ],
- "pagination": {
- "count": 2,
- "sort": "None",
- "from": 0,
- "page": 1,
- "total": 27,
- "pages": 14,
- "size": 2
- }
- },
- "warnings": {}
+ ]
}
-```
-__Example 4:__ The `/analysis/mutated_cases_count_by_project` endpoint produces counts for the number of cases that have associated `ssm` data in each project. The number of affected cases can be found under `"case_with_ssm": {"doc_count": $case_count}`.
+```
```Shell
-curl "https://api.gdc.cancer.gov/analysis/mutated_cases_count_by_project?size=0&pretty=true"
+curl 'https://api.gdc.cancer.gov/cnv_occurrences?filters=%7B%0D%0A++++%22op%22%3A+%22and%22%2C%0D%0A++++%22content%22%3A+%5B%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22cnv.cnv_change%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%22Gain%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22case.demographic.gender%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%22female%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22case.available_variation_data%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%22cnv%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22case.available_variation_data%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%22ssm%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22cnv.chromosome%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%229%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%2C%0D%0A++++++++%7B%0D%0A++++++++++++%22op%22%3A+%22in%22%2C%0D%0A++++++++++++%22content%22%3A+%7B%0D%0A++++++++++++++++%22field%22%3A+%22case.disease_type%22%2C%0D%0A++++++++++++++++%22value%22%3A+%5B%0D%0A++++++++++++++++++++%22Squamous+Cell+Neoplasms%22%0D%0A++++++++++++++++%5D%0D%0A++++++++++++%7D%0D%0A++++++++%7D%0D%0A++++%5D%0D%0A%7D&fields=case.available_variation_data,case.case_id&format=tsv'
+```
+
+```tsv
+case.available_variation_data.0 case.available_variation_data.1 case.case_id id
+cnv ssm da30a845-c4d3-4c78-b8b0-210239224f8f 3caf6e3b-024f-57b6-bdd9-3b67e423cc11
+cnv ssm 0809ba8b-4ab6-4f43-934c-c1ccbc014a7e e6afe58e-c99c-5c8d-920e-8ba4daad4d89
+cnv ssm 8e0e456e-85ee-4de5-8f0b-72393d6acde0 9d983d9c-8320-53f1-9054-e46926c5b834
+cnv ssm 64a195f6-2212-4e81-bccc-e39c77a10908 8caeaecc-ad68-539d-8b3c-8320b3684763
+cnv ssm 2f6a0e87-1e6c-41f3-93e0-3e505fa654b0 4862c166-0f37-5e3c-ae4e-a2964de01cea
+cnv ssm f0daf315-8909-4cda-886d-a2770b08db94 099ff6cd-bd28-56f4-a181-6b02f3ba7503
+cnv ssm ff3808e4-eece-4046-819b-fe1019317f8e 0c936aa2-393e-5463-a431-3613b4510021
+cnv ssm 9205dc07-93f5-4b5e-924e-8e097616160f 133d27a7-fdc6-5082-a1f3-022b89f4e851
+cnv ssm 79ae5209-f476-4d65-a6c0-ebc18d7c8942 7a5e6bb1-8af3-5964-a3cc-c53602c8b099
+cnv ssm ff7099e1-8ff9-48e4-842d-46e98076e7e6 fb27fa8f-aa31-5e20-84da-8f45bb675405
+```
+
+__Example 2:__ A user is interested in the first cnv occurrence (`3b9f7ecc-2280-5b89-80f9-ec8d6c5e604e`) from the previous example, and would like to know more about the case exposures and demographics.
+
+```Shell
+curl 'https://api.gdc.cancer.gov/cnv_occurrences/3b9f7ecc-2280-5b89-80f9-ec8d6c5e604e?pretty=true&expand=cnv,case,case.exposures,case.demographic'
+```
+
+```Json
+{
+ "data": {
+ "cnv": {
+ "start_position": 68815994,
+ "gene_level_cn": true,
+ "cnv_change": "Gain",
+ "ncbi_build": "GRCh38",
+ "chromosome": "4",
+ "variant_status": "Tumor Only",
+ "cnv_id": "1a889109-30d5-51e3-848f-9f615c69f407",
+ "end_position": 68832023
+ },
+ "cnv_occurrence_id": "3b9f7ecc-2280-5b89-80f9-ec8d6c5e604e",
+ "case": {
+ "exposures": [
+ {
+ "cigarettes_per_day": 5.47945205479452,
+ "alcohol_history": "Not Reported",
+ "exposure_id": "f7b08a8e-d22b-5cb0-be9f-b922c9ca87d2",
+ "submitter_id": "TCGA-38-4629_exposure",
+ "state": "released",
+ "pack_years_smoked": 100.0
+ }
+ ],
+ "primary_site": "Bronchus and lung",
+ "disease_type": "Adenomas and Adenocarcinomas",
+ "available_variation_data": [
+ "cnv",
+ "ssm"
+ ],
+ "case_id": "127bf818-f7e5-46b5-a9de-39f6d96b8b83",
+ "submitter_id": "TCGA-38-4629",
+ "state": "released",
+ "demographic": {
+ "demographic_id": "9ea1f795-9510-5acc-a9a5-bf1379e6635a",
+ "ethnicity": "not hispanic or latino",
+ "gender": "male",
+ "race": "white",
+ "vital_status": "Dead",
+ "age_at_index": 68,
+ "submitter_id": "TCGA-38-4629_demographic",
+ "days_to_death": 864,
+ "days_to_birth": -25104,
+ "state": "released",
+ "year_of_death": 2005,
+ "year_of_birth": 1935
+ }
+ }
+ },
+ "warnings": {}
+}
+```
+
+## Analysis Endpoints
+
+In addition to the `ssms`, `ssm_occurrences`, and `genes` endpoints mentioned previously, several `/analysis` endpoints were designed to quickly retrieve specific datasets used for visualization display.
+
+__Example 1:__ The `/analysis/top_cases_counts_by_genes` endpoint gives the number of cases with a mutation in each gene listed in the `gene_ids` parameter for each project. Note that this endpoint cannot be used with the `format` or `fields` parameters. In this instance, the query will produce the number of cases in each projects with mutations in the gene `ENSG00000155657`.
+
+```Shell
+curl "https://api.gdc.cancer.gov/analysis/top_cases_counts_by_genes?gene_ids=ENSG00000155657&pretty=true"
+```
+
+
+This JSON-formatted output is broken up by project. For an example, see the following text:
+
+```json
+$ curl "https://api.gdc.cancer.gov/analysis/top_cases_counts_by_genes?gene_ids=ENSG00000155657&pretty=true"
+{
+ "took": 6,
+ "timed_out": false,
+ "_shards": {
+ "total": 12,
+ "successful": 12,
+ "skipped": 0,
+ "failed": 0
+ },
+ "hits": {
+ "total": {
+ "value": 5967,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
+ },
+ "aggregations": {
+ "projects": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "TCGA-BRCA",
+ "doc_count": 425,
+ "genes": {
+ "doc_count": 4031450,
+ "my_genes": {
+ "doc_count": 425,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 425
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-LUSC",
+ "doc_count": 423,
+ "genes": {
+ "doc_count": 4123089,
+ "my_genes": {
+ "doc_count": 423,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 423
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "CPTAC-3",
+ "doc_count": 421,
+ "genes": {
+ "doc_count": 251552,
+ "my_genes": {
+ "doc_count": 421,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 421
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-SKCM",
+ "doc_count": 391,
+ "genes": {
+ "doc_count": 3040929,
+ "my_genes": {
+ "doc_count": 391,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 391
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-LUAD",
+ "doc_count": 345,
+ "genes": {
+ "doc_count": 3188761,
+ "my_genes": {
+ "doc_count": 345,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 345
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-OV",
+ "doc_count": 341,
+ "genes": {
+ "doc_count": 3728561,
+ "my_genes": {
+ "doc_count": 341,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 341
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-STAD",
+ "doc_count": 300,
+ "genes": {
+ "doc_count": 2145783,
+ "my_genes": {
+ "doc_count": 300,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 300
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-UCEC",
+ "doc_count": 297,
+ "genes": {
+ "doc_count": 1637055,
+ "my_genes": {
+ "doc_count": 297,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 297
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-HNSC",
+ "doc_count": 293,
+ "genes": {
+ "doc_count": 2325617,
+ "my_genes": {
+ "doc_count": 293,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 293
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-COAD",
+ "doc_count": 288,
+ "genes": {
+ "doc_count": 1695280,
+ "my_genes": {
+ "doc_count": 288,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 288
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-BLCA",
+ "doc_count": 280,
+ "genes": {
+ "doc_count": 2466835,
+ "my_genes": {
+ "doc_count": 280,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 280
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "MMRF-COMMPASS",
+ "doc_count": 181,
+ "genes": {
+ "doc_count": 45977,
+ "my_genes": {
+ "doc_count": 181,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 181
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-LIHC",
+ "doc_count": 167,
+ "genes": {
+ "doc_count": 1216775,
+ "my_genes": {
+ "doc_count": 167,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 167
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-CESC",
+ "doc_count": 161,
+ "genes": {
+ "doc_count": 1103281,
+ "my_genes": {
+ "doc_count": 161,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 161
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-KIRC",
+ "doc_count": 161,
+ "genes": {
+ "doc_count": 842546,
+ "my_genes": {
+ "doc_count": 161,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 161
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "CPTAC-2",
+ "doc_count": 131,
+ "genes": {
+ "doc_count": 72575,
+ "my_genes": {
+ "doc_count": 131,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 131
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-GBM",
+ "doc_count": 131,
+ "genes": {
+ "doc_count": 756809,
+ "my_genes": {
+ "doc_count": 131,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 131
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-ESCA",
+ "doc_count": 129,
+ "genes": {
+ "doc_count": 1210888,
+ "my_genes": {
+ "doc_count": 129,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 129
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-PRAD",
+ "doc_count": 101,
+ "genes": {
+ "doc_count": 379949,
+ "my_genes": {
+ "doc_count": 101,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 101
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "HCMI-CMDC",
+ "doc_count": 99,
+ "genes": {
+ "doc_count": 54829,
+ "my_genes": {
+ "doc_count": 99,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 99
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-READ",
+ "doc_count": 98,
+ "genes": {
+ "doc_count": 726313,
+ "my_genes": {
+ "doc_count": 98,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 98
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-LGG",
+ "doc_count": 95,
+ "genes": {
+ "doc_count": 424689,
+ "my_genes": {
+ "doc_count": 95,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 95
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-KIRP",
+ "doc_count": 93,
+ "genes": {
+ "doc_count": 521936,
+ "my_genes": {
+ "doc_count": 93,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 93
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-SARC",
+ "doc_count": 93,
+ "genes": {
+ "doc_count": 903111,
+ "my_genes": {
+ "doc_count": 93,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 93
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-TGCT",
+ "doc_count": 51,
+ "genes": {
+ "doc_count": 524456,
+ "my_genes": {
+ "doc_count": 51,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 51
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TARGET-ALL-P2",
+ "doc_count": 50,
+ "genes": {
+ "doc_count": 1882,
+ "my_genes": {
+ "doc_count": 50,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 50
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-KICH",
+ "doc_count": 43,
+ "genes": {
+ "doc_count": 353674,
+ "my_genes": {
+ "doc_count": 43,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 43
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-PAAD",
+ "doc_count": 43,
+ "genes": {
+ "doc_count": 300427,
+ "my_genes": {
+ "doc_count": 43,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 43
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "CGCI-HTMCP-CC",
+ "doc_count": 37,
+ "genes": {
+ "doc_count": 3606,
+ "my_genes": {
+ "doc_count": 37,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 37
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "CDDP_EAGLE-1",
+ "doc_count": 32,
+ "genes": {
+ "doc_count": 16980,
+ "my_genes": {
+ "doc_count": 32,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 32
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-ACC",
+ "doc_count": 29,
+ "genes": {
+ "doc_count": 283969,
+ "my_genes": {
+ "doc_count": 29,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 29
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "CMI-MBC",
+ "doc_count": 28,
+ "genes": {
+ "doc_count": 3581,
+ "my_genes": {
+ "doc_count": 28,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 28
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-THCA",
+ "doc_count": 28,
+ "genes": {
+ "doc_count": 89120,
+ "my_genes": {
+ "doc_count": 28,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 28
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-UCS",
+ "doc_count": 28,
+ "genes": {
+ "doc_count": 283673,
+ "my_genes": {
+ "doc_count": 28,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 28
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-MESO",
+ "doc_count": 21,
+ "genes": {
+ "doc_count": 137002,
+ "my_genes": {
+ "doc_count": 21,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 21
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-PCPG",
+ "doc_count": 19,
+ "genes": {
+ "doc_count": 99444,
+ "my_genes": {
+ "doc_count": 19,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 19
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TARGET-NBL",
+ "doc_count": 15,
+ "genes": {
+ "doc_count": 829,
+ "my_genes": {
+ "doc_count": 15,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 15
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-UVM",
+ "doc_count": 12,
+ "genes": {
+ "doc_count": 68201,
+ "my_genes": {
+ "doc_count": 12,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 12
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "EXCEPTIONAL_RESPONDERS-ER",
+ "doc_count": 11,
+ "genes": {
+ "doc_count": 10617,
+ "my_genes": {
+ "doc_count": 11,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 11
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-THYM",
+ "doc_count": 11,
+ "genes": {
+ "doc_count": 59647,
+ "my_genes": {
+ "doc_count": 11,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 11
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "BEATAML1.0-COHORT",
+ "doc_count": 10,
+ "genes": {
+ "doc_count": 279,
+ "my_genes": {
+ "doc_count": 10,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 10
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TARGET-OS",
+ "doc_count": 10,
+ "genes": {
+ "doc_count": 414,
+ "my_genes": {
+ "doc_count": 10,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 10
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-LAML",
+ "doc_count": 10,
+ "genes": {
+ "doc_count": 10175,
+ "my_genes": {
+ "doc_count": 10,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 10
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-DLBC",
+ "doc_count": 9,
+ "genes": {
+ "doc_count": 63497,
+ "my_genes": {
+ "doc_count": 9,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 9
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TCGA-CHOL",
+ "doc_count": 8,
+ "genes": {
+ "doc_count": 52960,
+ "my_genes": {
+ "doc_count": 8,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 8
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "CMI-MPC",
+ "doc_count": 7,
+ "genes": {
+ "doc_count": 365,
+ "my_genes": {
+ "doc_count": 7,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 7
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "CMI-ASC",
+ "doc_count": 6,
+ "genes": {
+ "doc_count": 5745,
+ "my_genes": {
+ "doc_count": 6,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 6
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TARGET-WT",
+ "doc_count": 3,
+ "genes": {
+ "doc_count": 51,
+ "my_genes": {
+ "doc_count": 3,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 3
+ }
+ ]
+ }
+ }
+ }
+ },
+ {
+ "key": "TARGET-ALL-P3",
+ "doc_count": 2,
+ "genes": {
+ "doc_count": 66,
+ "my_genes": {
+ "doc_count": 2,
+ "gene_id": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "ENSG00000155657",
+ "doc_count": 2
+ }
+ ]
+ }
+ }
+ }
+ }
+ ]
+ }
+ }
+
+```
+
+This portion of the output shows TCGA-GBM including 45 cases that have `ssms` in the gene `ENSG00000155657`.
+
+__Example 2:__ The following demonstrates a use of the `/analysis/top_mutated_genes_by_project` endpoint. This will output the genes that are mutated in the most cases in "TCGA-DLBC" and will count the mutations that have a `HIGH` or `MODERATE` impact on gene function. Note that the `score` field does not represent the number of mutations in a given gene, but a calculation that is used to determine which genes have the greatest number of unique mutations.
+
+```json
+{
+ "op":"AND",
+ "content":[
+ {
+ "op":"in",
+ "content":{
+ "field":"case.project.project_id",
+ "value":[
+ "TCGA-DLBC"
+ ]
+ }
+ },
+ {
+ "op":"in",
+ "content":{
+ "field":"case.ssm.consequence.transcript.annotation.vep_impact",
+ "value":[
+ "HIGH",
+ "MODERATE"
+ ]
+ }
+ }
+ ]
+}
+```
+```Shell
+curl "https://api.gdc.cancer.gov/analysis/top_mutated_genes_by_project?fields=gene_id,symbol&filters=%7B%20%20%0A%20%20%20%22op%22%3A%22AND%20%20%20%22content%22%3A%5B%20%20%0A%20%20%20%20%20%20%7B%20%20%0A%20%20%20%20%20%20%20%20%20%22op%22%3A%22in%22%2C%0A%20%20%20%20%20%20%20%20%20%22content%22%3A%7B%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%22field%22%3A%22case.project.project_id%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22value%22%3A%5B%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%22TCGA-DLBC%22%0A%20%20%20%20%20%20%20%20%20%20%20%20%5D%0A%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%2C%0A%20%20%20%20%20%20%7B%20%20%0A%20%20%20%20%20%20%20%20%20%22op%22%3A%22in%22%2C%0A%20%20%20%20%20%20%20%20%20%22content%22%3A%7B%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%22field%22%3A%22case.ssm.consequence.transcript.annotation.vep_impact%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22value%22%3A%5B%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%22HIGH%22%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%22MODERATE%22%0A%20%20%20%20%20%20%20%20%20%20%20%20%5D%0A%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%5D%0A%7D%0A&pretty=true"
+```
+```Response
+{
+ "data": {
+ "hits": [
+ {
+ "symbol": "KMT2D",
+ "gene_id": "ENSG00000167548",
+ "_score": 13.0
+ },
+ {
+ "symbol": "BTG2",
+ "gene_id": "ENSG00000159388",
+ "_score": 13.0
+ },
+ {
+ "symbol": "B2M",
+ "gene_id": "ENSG00000166710",
+ "_score": 11.0
+ },
+ {
+ "symbol": "PIM1",
+ "gene_id": "ENSG00000137193",
+ "_score": 10.0
+ },
+ {
+ "symbol": "IGHG1",
+ "gene_id": "ENSG00000211896",
+ "_score": 10.0
+ },
+ {
+ "symbol": "CARD11",
+ "gene_id": "ENSG00000198286",
+ "_score": 10.0
+ },
+ {
+ "symbol": "H1-4",
+ "gene_id": "ENSG00000168298",
+ "_score": 9.0
+ },
+ {
+ "symbol": "PCLO",
+ "gene_id": "ENSG00000186472",
+ "_score": 9.0
+ },
+ {
+ "symbol": "IGHG2",
+ "gene_id": "ENSG00000211893",
+ "_score": 9.0
+ },
+ {
+ "symbol": "FAT4",
+ "gene_id": "ENSG00000196159",
+ "_score": 8.0
+ }
+ ],
+ "pagination": {
+ "count": 10,
+ "total": 3500,
+ "size": 10,
+ "from": 0,
+ "sort": "None",
+ "page": 1,
+ "pages": 350
+ }
+ },
+ "warnings": {}
+}
+```
+
+__Example 3:__ The `/analysis/top_mutated_cases_by_gene` endpoint will generate information about the cases that are most affected by mutations in a given number of genes. Below, the file count for each category is given for the cases most affected by mutations in these 50 genes. The size of the output is limited to two cases with the `size=2` parameter, but a higher value can be set by the user.
+
+```Shell
+curl "https://api.gdc.cancer.gov/analysis/top_mutated_cases_by_gene?fields=diagnoses.age_at_diagnosis,diagnoses.primary_diagnosis,demographic.gender,demographic.race,demographic.ethnicity,case_id,summary.data_categories.file_count,summary.data_categories.data_category&filters=%7B%22op%22%3A%22and%22%2C%22content%22%3A%5B%7B%22op%22%3A%22%3D%22%2C%22content%22%3A%7B%22field%22%3A%22cases.project.project_id%22%2C%22value%22%3A%22TCGA-DLBC%22%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22genes.gene_id%22%2C%22value%22%3A%5B%22ENSG00000166710%22%2C%22ENSG00000005339%22%2C%22ENSG00000083857%22%2C%22ENSG00000168769%22%2C%22ENSG00000100906%22%2C%22ENSG00000184677%22%2C%22ENSG00000101680%22%2C%22ENSG00000101266%22%2C%22ENSG00000028277%22%2C%22ENSG00000140968%22%2C%22ENSG00000181827%22%2C%22ENSG00000116815%22%2C%22ENSG00000275221%22%2C%22ENSG00000139083%22%2C%22ENSG00000112851%22%2C%22ENSG00000112697%22%2C%22ENSG00000164134%22%2C%22ENSG00000009413%22%2C%22ENSG00000071626%22%2C%22ENSG00000135407%22%2C%22ENSG00000101825%22%2C%22ENSG00000104814%22%2C%22ENSG00000166415%22%2C%22ENSG00000142867%22%2C%22ENSG00000254585%22%2C%22ENSG00000139718%22%2C%22ENSG00000077721%22%2C%22ENSG00000130294%22%2C%22ENSG00000117245%22%2C%22ENSG00000117318%22%2C%22ENSG00000270550%22%2C%22ENSG00000163637%22%2C%22ENSG00000166575%22%2C%22ENSG00000065526%22%2C%22ENSG00000156453%22%2C%22ENSG00000128191%22%2C%22ENSG00000055609%22%2C%22ENSG00000204469%22%2C%22ENSG00000187605%22%2C%22ENSG00000185875%22%2C%22ENSG00000110888%22%2C%22ENSG00000007341%22%2C%22ENSG00000173198%22%2C%22ENSG00000115568%22%2C%22ENSG00000163714%22%2C%22ENSG00000125772%22%2C%22ENSG00000080815%22%2C%22ENSG00000189079%22%2C%22ENSG00000120837%22%2C%22ENSG00000143951%22%5D%7D%7D%2C%7B%22op%22%3A%22in%22%2C%22content%22%3A%7B%22field%22%3A%22ssms.consequence.transcript.annotation.vep_impact%22%2C%22value%22%3A%5B%22HIGH%22%5D%7D%7D%5D%7D&pretty=true&size=2"
```
```Response
{
- "hits": {
- "hits": [],
- "total": 14551,
- "max_score": 0.0
+ "data": {
+ "hits": [
+ {
+ "summary": {
+ "data_categories": [
+ {
+ "file_count": 6,
+ "data_category": "Sequencing Reads"
+ },
+ {
+ "file_count": 14,
+ "data_category": "Biospecimen"
+ },
+ {
+ "file_count": 8,
+ "data_category": "Copy Number Variation"
+ },
+ {
+ "file_count": 16,
+ "data_category": "Simple Nucleotide Variation"
+ },
+ {
+ "file_count": 4,
+ "data_category": "Transcriptome Profiling"
+ },
+ {
+ "file_count": 3,
+ "data_category": "DNA Methylation"
+ },
+ {
+ "file_count": 8,
+ "data_category": "Clinical"
+ },
+ {
+ "file_count": 4,
+ "data_category": "Structural Variation"
+ },
+ {
+ "file_count": 1,
+ "data_category": "Proteome Profiling"
+ }
+ ]
+ },
+ "case_id": "eda9496e-be80-4a13-bf06-89f0cc9e937f",
+ "diagnoses": [
+ {
+ "age_at_diagnosis": 18691,
+ "primary_diagnosis": "Malignant lymphoma, large B-cell, diffuse, NOS"
+ }
+ ],
+ "demographic": {
+ "ethnicity": "hispanic or latino",
+ "gender": "male",
+ "race": "white"
+ },
+ "_score": 7.0
+ },
+ {
+ "summary": {
+ "data_categories": [
+ {
+ "file_count": 4,
+ "data_category": "Sequencing Reads"
+ },
+ {
+ "file_count": 13,
+ "data_category": "Biospecimen"
+ },
+ {
+ "file_count": 8,
+ "data_category": "Copy Number Variation"
+ },
+ {
+ "file_count": 16,
+ "data_category": "Simple Nucleotide Variation"
+ },
+ {
+ "file_count": 2,
+ "data_category": "Transcriptome Profiling"
+ },
+ {
+ "file_count": 3,
+ "data_category": "DNA Methylation"
+ },
+ {
+ "file_count": 8,
+ "data_category": "Clinical"
+ },
+ {
+ "file_count": 4,
+ "data_category": "Structural Variation"
+ }
+ ]
+ },
+ "case_id": "7a589441-11ef-4158-87e7-3951d86bc2aa",
+ "diagnoses": [
+ {
+ "age_at_diagnosis": 20812,
+ "primary_diagnosis": "Malignant lymphoma, large B-cell, diffuse, NOS"
+ }
+ ],
+ "demographic": {
+ "ethnicity": "not hispanic or latino",
+ "gender": "female",
+ "race": "white"
+ },
+ "_score": 4.0
+ }
+ ],
+ "pagination": {
+ "count": 2,
+ "total": 32,
+ "size": 2,
+ "from": 0,
+ "sort": "None",
+ "page": 1,
+ "pages": 16
+ }
},
+ "warnings": {}
+}
+```
+
+__Example 4:__ The `/analysis/mutated_cases_count_by_project` endpoint produces counts for the number of cases that have associated `ssm` data in each project. The number of affected cases can be found under `"case_with_ssm": {"doc_count": $case_count}`.
+
+```Shell
+curl "https://api.gdc.cancer.gov/analysis/mutated_cases_count_by_project?size=0&pretty=true"
+```
+```Response
+{
+ "took": 9,
+ "timed_out": false,
"_shards": {
- "successful": 9,
- "failed": 0,
- "total": 9
+ "total": 12,
+ "successful": 12,
+ "skipped": 0,
+ "failed": 0
+ },
+ "hits": {
+ "total": {
+ "value": 44451,
+ "relation": "eq"
+ },
+ "max_score": null,
+ "hits": []
},
- "took": 4,
"aggregations": {
"projects": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
"buckets": [
{
+ "key": "FM-AD",
+ "doc_count": 18004,
+ "case_summary": {
+ "doc_count": 54012,
+ "case_with_ssm": {
+ "doc_count": 18004
+ }
+ }
+ },
+ {
+ "key": "TARGET-AML",
+ "doc_count": 2492,
+ "case_summary": {
+ "doc_count": 10780,
+ "case_with_ssm": {
+ "doc_count": 22
+ }
+ }
+ },
+ {
+ "key": "TARGET-ALL-P2",
+ "doc_count": 1587,
+ "case_summary": {
+ "doc_count": 5562,
+ "case_with_ssm": {
+ "doc_count": 717
+ }
+ }
+ },
+ {
+ "key": "MP2PRT-ALL",
+ "doc_count": 1510,
+ "case_summary": {
+ "doc_count": 10472,
+ "case_with_ssm": {
+ "doc_count": 1508
+ }
+ }
+ },
+ {
+ "key": "CPTAC-3",
+ "doc_count": 1235,
+ "case_summary": {
+ "doc_count": 8508,
+ "case_with_ssm": {
+ "doc_count": 1218
+ }
+ }
+ },
+ {
+ "key": "TARGET-NBL",
+ "doc_count": 1132,
+ "case_summary": {
+ "doc_count": 3007,
+ "case_with_ssm": {
+ "doc_count": 220
+ }
+ }
+ },
+ {
+ "key": "TCGA-BRCA",
+ "doc_count": 1098,
+ "case_summary": {
+ "doc_count": 9735,
+ "case_with_ssm": {
+ "doc_count": 1098
+ }
+ }
+ },
+ {
+ "key": "MMRF-COMMPASS",
+ "doc_count": 995,
+ "case_summary": {
+ "doc_count": 3528,
+ "case_with_ssm": {
+ "doc_count": 959
+ }
+ }
+ },
+ {
+ "key": "BEATAML1.0-COHORT",
+ "doc_count": 826,
+ "case_summary": {
+ "doc_count": 2891,
+ "case_with_ssm": {
+ "doc_count": 759
+ }
+ }
+ },
+ {
+ "key": "TARGET-WT",
+ "doc_count": 652,
+ "case_summary": {
+ "doc_count": 3045,
+ "case_with_ssm": {
+ "doc_count": 631
+ }
+ }
+ },
+ {
+ "key": "TCGA-GBM",
+ "doc_count": 617,
+ "case_summary": {
+ "doc_count": 3867,
+ "case_with_ssm": {
+ "doc_count": 600
+ }
+ }
+ },
+ {
+ "key": "TCGA-OV",
+ "doc_count": 608,
+ "case_summary": {
+ "doc_count": 5028,
+ "case_with_ssm": {
+ "doc_count": 599
+ }
+ }
+ },
+ {
+ "key": "TCGA-LUAD",
+ "doc_count": 585,
+ "case_summary": {
+ "doc_count": 4825,
+ "case_with_ssm": {
+ "doc_count": 571
+ }
+ }
+ },
+ {
+ "key": "TCGA-UCEC",
+ "doc_count": 560,
+ "case_summary": {
+ "doc_count": 4559,
+ "case_with_ssm": {
+ "doc_count": 559
+ }
+ }
+ },
+ {
+ "key": "TCGA-KIRC",
+ "doc_count": 537,
+ "case_summary": {
+ "doc_count": 4768,
+ "case_with_ssm": {
+ "doc_count": 534
+ }
+ }
+ },
+ {
+ "key": "TCGA-HNSC",
+ "doc_count": 528,
+ "case_summary": {
+ "doc_count": 4577,
+ "case_with_ssm": {
+ "doc_count": 528
+ }
+ }
+ },
+ {
+ "key": "TCGA-LGG",
+ "doc_count": 516,
+ "case_summary": {
+ "doc_count": 4570,
+ "case_with_ssm": {
+ "doc_count": 516
+ }
+ }
+ },
+ {
+ "key": "TCGA-THCA",
+ "doc_count": 507,
+ "case_summary": {
+ "doc_count": 4442,
+ "case_with_ssm": {
+ "doc_count": 507
+ }
+ }
+ },
+ {
+ "key": "TCGA-LUSC",
+ "doc_count": 504,
"case_summary": {
+ "doc_count": 4425,
"case_with_ssm": {
- "doc_count": 216
- },
- "doc_count": 637
- },
- "key": "TARGET-NBL",
- "doc_count": 1127
+ "doc_count": 504
+ }
+ }
},
{
+ "key": "TCGA-PRAD",
+ "doc_count": 500,
"case_summary": {
+ "doc_count": 4365,
"case_with_ssm": {
- "doc_count": 1044
- },
- "doc_count": 7625
- },
- "key": "TCGA-BRCA",
- "doc_count": 1098
+ "doc_count": 500
+ }
+ }
},
{
+ "key": "NCICCR-DLBCL",
+ "doc_count": 489,
"case_summary": {
+ "doc_count": 1451,
"case_with_ssm": {
- "doc_count": 8
- },
- "doc_count": 579
- },
- "key": "TARGET-AML",
- "doc_count": 988
+ "doc_count": 0
+ }
+ }
},
{
+ "key": "TCGA-SKCM",
+ "doc_count": 470,
"case_summary": {
+ "doc_count": 4125,
"case_with_ssm": {
- "doc_count": 34
- },
- "doc_count": 290
- },
- "key": "TARGET-WT",
- "doc_count": 652
+ "doc_count": 470
+ }
+ }
},
{
+ "key": "TCGA-COAD",
+ "doc_count": 461,
"case_summary": {
+ "doc_count": 3888,
"case_with_ssm": {
- "doc_count": 396
- },
- "doc_count": 3197
- },
- "key": "TCGA-GBM",
- "doc_count": 617
+ "doc_count": 461
+ }
+ }
},
{
+ "key": "TCGA-STAD",
+ "doc_count": 443,
"case_summary": {
+ "doc_count": 3884,
"case_with_ssm": {
"doc_count": 443
- },
- "doc_count": 3880
- },
- "key": "TCGA-OV",
- "doc_count": 608
+ }
+ }
},
{
+ "key": "REBC-THYR",
+ "doc_count": 440,
"case_summary": {
+ "doc_count": 2456,
"case_with_ssm": {
- "doc_count": 569
- },
- "doc_count": 3874
- },
- "key": "TCGA-LUAD",
- "doc_count": 585
+ "doc_count": 380
+ }
+ }
},
{
+ "key": "TCGA-BLCA",
+ "doc_count": 412,
"case_summary": {
+ "doc_count": 3645,
"case_with_ssm": {
- "doc_count": 542
- },
- "doc_count": 3874
- },
- "key": "TCGA-UCEC",
- "doc_count": 560
+ "doc_count": 412
+ }
+ }
},
{
+ "key": "TARGET-OS",
+ "doc_count": 383,
"case_summary": {
+ "doc_count": 1276,
"case_with_ssm": {
- "doc_count": 339
- },
- "doc_count": 3547
- },
- "key": "TCGA-KIRC",
- "doc_count": 537
+ "doc_count": 97
+ }
+ }
},
{
+ "key": "TCGA-LIHC",
+ "doc_count": 377,
"case_summary": {
+ "doc_count": 3204,
"case_with_ssm": {
- "doc_count": 510
- },
- "doc_count": 3671
- },
- "key": "TCGA-HNSC",
- "doc_count": 528
+ "doc_count": 377
+ }
+ }
},
{
+ "key": "CPTAC-2",
+ "doc_count": 342,
"case_summary": {
+ "doc_count": 1349,
"case_with_ssm": {
- "doc_count": 513
- },
- "doc_count": 3606
- },
- "key": "TCGA-LGG",
- "doc_count": 516
+ "doc_count": 328
+ }
+ }
},
{
+ "key": "TRIO-CRU",
+ "doc_count": 339,
"case_summary": {
+ "doc_count": 339,
"case_with_ssm": {
- "doc_count": 496
- },
- "doc_count": 3536
- },
- "key": "TCGA-THCA",
- "doc_count": 507
+ "doc_count": 0
+ }
+ }
},
{
+ "key": "CGCI-BLGSP",
+ "doc_count": 324,
"case_summary": {
+ "doc_count": 2076,
"case_with_ssm": {
- "doc_count": 497
- },
- "doc_count": 3520
- },
- "key": "TCGA-LUSC",
- "doc_count": 504
+ "doc_count": 262
+ }
+ }
},
{
+ "key": "TCGA-CESC",
+ "doc_count": 307,
"case_summary": {
+ "doc_count": 2623,
"case_with_ssm": {
- "doc_count": 498
- },
- "doc_count": 3490
- },
- "key": "TCGA-PRAD",
- "doc_count": 500
+ "doc_count": 306
+ }
+ }
},
{
+ "key": "TCGA-KIRP",
+ "doc_count": 291,
"case_summary": {
+ "doc_count": 2568,
"case_with_ssm": {
- "doc_count": 470
- },
- "doc_count": 3289
- },
- "key": "TCGA-SKCM",
- "doc_count": 470
+ "doc_count": 291
+ }
+ }
},
{
+ "key": "HCMI-CMDC",
+ "doc_count": 278,
"case_summary": {
+ "doc_count": 2420,
"case_with_ssm": {
- "doc_count": 433
- },
- "doc_count": 3188
- },
- "key": "TCGA-COAD",
- "doc_count": 461
+ "doc_count": 277
+ }
+ }
+ },
+ {
+ "key": "TCGA-TGCT",
+ "doc_count": 263,
+ "case_summary": {
+ "doc_count": 2124,
+ "case_with_ssm": {
+ "doc_count": 262
+ }
+ }
+ },
+ {
+ "key": "TCGA-SARC",
+ "doc_count": 261,
+ "case_summary": {
+ "doc_count": 2309,
+ "case_with_ssm": {
+ "doc_count": 261
+ }
+ }
+ },
+ {
+ "key": "CGCI-HTMCP-CC",
+ "doc_count": 212,
+ "case_summary": {
+ "doc_count": 1452,
+ "case_with_ssm": {
+ "doc_count": 206
+ }
+ }
+ },
+ {
+ "key": "CMI-MBC",
+ "doc_count": 200,
+ "case_summary": {
+ "doc_count": 653,
+ "case_with_ssm": {
+ "doc_count": 174
+ }
+ }
+ },
+ {
+ "key": "TCGA-LAML",
+ "doc_count": 200,
+ "case_summary": {
+ "doc_count": 1533,
+ "case_with_ssm": {
+ "doc_count": 200
+ }
+ }
+ },
+ {
+ "key": "TARGET-ALL-P3",
+ "doc_count": 191,
+ "case_summary": {
+ "doc_count": 782,
+ "case_with_ssm": {
+ "doc_count": 86
+ }
+ }
+ },
+ {
+ "key": "TCGA-ESCA",
+ "doc_count": 185,
+ "case_summary": {
+ "doc_count": 1623,
+ "case_with_ssm": {
+ "doc_count": 185
+ }
+ }
+ },
+ {
+ "key": "TCGA-PAAD",
+ "doc_count": 185,
+ "case_summary": {
+ "doc_count": 1720,
+ "case_with_ssm": {
+ "doc_count": 185
+ }
+ }
+ },
+ {
+ "key": "TCGA-PCPG",
+ "doc_count": 179,
+ "case_summary": {
+ "doc_count": 1512,
+ "case_with_ssm": {
+ "doc_count": 179
+ }
+ }
+ },
+ {
+ "key": "OHSU-CNL",
+ "doc_count": 176,
+ "case_summary": {
+ "doc_count": 494,
+ "case_with_ssm": {
+ "doc_count": 158
+ }
+ }
+ },
+ {
+ "key": "TCGA-READ",
+ "doc_count": 172,
+ "case_summary": {
+ "doc_count": 1414,
+ "case_with_ssm": {
+ "doc_count": 171
+ }
+ }
+ },
+ {
+ "key": "TCGA-THYM",
+ "doc_count": 124,
+ "case_summary": {
+ "doc_count": 1078,
+ "case_with_ssm": {
+ "doc_count": 124
+ }
+ }
+ },
+ {
+ "key": "TCGA-KICH",
+ "doc_count": 113,
+ "case_summary": {
+ "doc_count": 705,
+ "case_with_ssm": {
+ "doc_count": 66
+ }
+ }
+ },
+ {
+ "key": "WCDT-MCRPC",
+ "doc_count": 101,
+ "case_summary": {
+ "doc_count": 299,
+ "case_with_ssm": {
+ "doc_count": 0
+ }
+ }
+ },
+ {
+ "key": "TCGA-ACC",
+ "doc_count": 92,
+ "case_summary": {
+ "doc_count": 809,
+ "case_with_ssm": {
+ "doc_count": 92
+ }
+ }
+ },
+ {
+ "key": "APOLLO-LUAD",
+ "doc_count": 87,
+ "case_summary": {
+ "doc_count": 510,
+ "case_with_ssm": {
+ "doc_count": 83
+ }
+ }
+ },
+ {
+ "key": "TCGA-MESO",
+ "doc_count": 87,
+ "case_summary": {
+ "doc_count": 813,
+ "case_with_ssm": {
+ "doc_count": 87
+ }
+ }
+ },
+ {
+ "key": "EXCEPTIONAL_RESPONDERS-ER",
+ "doc_count": 84,
+ "case_summary": {
+ "doc_count": 412,
+ "case_with_ssm": {
+ "doc_count": 20
+ }
+ }
+ },
+ {
+ "key": "TCGA-UVM",
+ "doc_count": 80,
+ "case_summary": {
+ "doc_count": 700,
+ "case_with_ssm": {
+ "doc_count": 80
+ }
+ }
+ },
+ {
+ "key": "CGCI-HTMCP-DLBCL",
+ "doc_count": 70,
+ "case_summary": {
+ "doc_count": 465,
+ "case_with_ssm": {
+ "doc_count": 50
+ }
+ }
+ },
+ {
+ "key": "ORGANOID-PANCREATIC",
+ "doc_count": 70,
+ "case_summary": {
+ "doc_count": 225,
+ "case_with_ssm": {
+ "doc_count": 57
+ }
+ }
+ },
+ {
+ "key": "TARGET-RT",
+ "doc_count": 69,
+ "case_summary": {
+ "doc_count": 404,
+ "case_with_ssm": {
+ "doc_count": 0
+ }
+ }
},
{
+ "key": "CMI-MPC",
+ "doc_count": 63,
"case_summary": {
+ "doc_count": 199,
"case_with_ssm": {
- "doc_count": 441
- },
- "doc_count": 3095
- },
- "key": "TCGA-STAD",
- "doc_count": 443
+ "doc_count": 60
+ }
+ }
},
{
+ "key": "MATCH-I",
+ "doc_count": 60,
"case_summary": {
+ "doc_count": 345,
"case_with_ssm": {
- "doc_count": 412
- },
- "doc_count": 2884
- },
- "key": "TCGA-BLCA",
- "doc_count": 412
+ "doc_count": 57
+ }
+ }
},
{
+ "key": "TCGA-DLBC",
+ "doc_count": 58,
"case_summary": {
+ "doc_count": 441,
"case_with_ssm": {
- "doc_count": 0
- },
- "doc_count": 0
- },
- "key": "TARGET-OS",
- "doc_count": 381
+ "doc_count": 50
+ }
+ }
},
{
+ "key": "TCGA-UCS",
+ "doc_count": 57,
"case_summary": {
+ "doc_count": 504,
"case_with_ssm": {
- "doc_count": 375
- },
- "doc_count": 2635
- },
- "key": "TCGA-LIHC",
- "doc_count": 377
+ "doc_count": 57
+ }
+ }
},
{
+ "key": "BEATAML1.0-CRENOLANIB",
+ "doc_count": 56,
"case_summary": {
+ "doc_count": 107,
"case_with_ssm": {
- "doc_count": 305
- },
- "doc_count": 2142
- },
- "key": "TCGA-CESC",
- "doc_count": 307
+ "doc_count": 51
+ }
+ }
},
{
+ "key": "MP2PRT-WT",
+ "doc_count": 52,
"case_summary": {
+ "doc_count": 361,
"case_with_ssm": {
- "doc_count": 288
- },
- "doc_count": 2033
- },
- "key": "TCGA-KIRP",
- "doc_count": 291
+ "doc_count": 51
+ }
+ }
},
{
+ "key": "TCGA-CHOL",
+ "doc_count": 51,
"case_summary": {
+ "doc_count": 378,
"case_with_ssm": {
- "doc_count": 255
- },
- "doc_count": 1821
- },
- "key": "TCGA-SARC",
- "doc_count": 261
+ "doc_count": 51
+ }
+ }
},
{
+ "key": "CDDP_EAGLE-1",
+ "doc_count": 50,
"case_summary": {
+ "doc_count": 384,
"case_with_ssm": {
- "doc_count": 149
- },
- "doc_count": 1192
- },
- "key": "TCGA-LAML",
- "doc_count": 200
+ "doc_count": 50
+ }
+ }
},
{
+ "key": "CTSP-DLBCL1",
+ "doc_count": 45,
"case_summary": {
+ "doc_count": 201,
"case_with_ssm": {
- "doc_count": 184
- },
- "doc_count": 1293
- },
- "key": "TCGA-ESCA",
- "doc_count": 185
+ "doc_count": 0
+ }
+ }
},
{
+ "key": "MATCH-W",
+ "doc_count": 45,
"case_summary": {
+ "doc_count": 265,
"case_with_ssm": {
- "doc_count": 183
- },
- "doc_count": 1285
- },
- "key": "TCGA-PAAD",
- "doc_count": 185
+ "doc_count": 44
+ }
+ }
},
{
+ "key": "MATCH-Z1A",
+ "doc_count": 45,
"case_summary": {
+ "doc_count": 262,
"case_with_ssm": {
- "doc_count": 179
- },
- "doc_count": 1253
- },
- "key": "TCGA-PCPG",
- "doc_count": 179
+ "doc_count": 43
+ }
+ }
},
{
+ "key": "CGCI-HTMCP-LC",
+ "doc_count": 39,
"case_summary": {
+ "doc_count": 292,
"case_with_ssm": {
- "doc_count": 158
- },
- "doc_count": 1169
- },
- "key": "TCGA-READ",
- "doc_count": 172
+ "doc_count": 34
+ }
+ }
},
{
+ "key": "CMI-ASC",
+ "doc_count": 36,
"case_summary": {
+ "doc_count": 124,
"case_with_ssm": {
- "doc_count": 150
- },
- "doc_count": 1018
- },
- "key": "TCGA-TGCT",
- "doc_count": 150
+ "doc_count": 36
+ }
+ }
},
{
+ "key": "MATCH-Z1D",
+ "doc_count": 36,
"case_summary": {
+ "doc_count": 212,
"case_with_ssm": {
- "doc_count": 123
- },
- "doc_count": 867
- },
- "key": "TCGA-THYM",
- "doc_count": 124
+ "doc_count": 34
+ }
+ }
},
{
+ "key": "MATCH-Q",
+ "doc_count": 35,
"case_summary": {
+ "doc_count": 203,
"case_with_ssm": {
- "doc_count": 66
- },
- "doc_count": 556
- },
- "key": "TCGA-KICH",
- "doc_count": 113
+ "doc_count": 34
+ }
+ }
},
{
+ "key": "MATCH-B",
+ "doc_count": 33,
"case_summary": {
+ "doc_count": 187,
"case_with_ssm": {
- "doc_count": 92
- },
- "doc_count": 620
- },
- "key": "TCGA-ACC",
- "doc_count": 92
+ "doc_count": 32
+ }
+ }
},
{
+ "key": "MATCH-Y",
+ "doc_count": 31,
"case_summary": {
+ "doc_count": 181,
"case_with_ssm": {
- "doc_count": 83
- },
- "doc_count": 605
- },
- "key": "TCGA-MESO",
- "doc_count": 87
+ "doc_count": 30
+ }
+ }
},
{
+ "key": "TARGET-ALL-P1",
+ "doc_count": 24,
"case_summary": {
+ "doc_count": 86,
"case_with_ssm": {
- "doc_count": 80
- },
- "doc_count": 560
- },
- "key": "TCGA-UVM",
- "doc_count": 80
+ "doc_count": 0
+ }
+ }
},
{
+ "key": "MATCH-U",
+ "doc_count": 23,
"case_summary": {
+ "doc_count": 137,
"case_with_ssm": {
- "doc_count": 0
- },
- "doc_count": 163
- },
- "key": "TARGET-RT",
- "doc_count": 75
+ "doc_count": 22
+ }
+ }
},
{
+ "key": "MATCH-H",
+ "doc_count": 21,
"case_summary": {
+ "doc_count": 122,
"case_with_ssm": {
- "doc_count": 48
- },
- "doc_count": 346
- },
- "key": "TCGA-DLBC",
- "doc_count": 58
+ "doc_count": 21
+ }
+ }
},
{
+ "key": "MATCH-N",
+ "doc_count": 21,
"case_summary": {
+ "doc_count": 120,
"case_with_ssm": {
- "doc_count": 57
- },
- "doc_count": 399
- },
- "key": "TCGA-UCS",
- "doc_count": 57
+ "doc_count": 21
+ }
+ }
},
{
+ "key": "TARGET-CCSK",
+ "doc_count": 13,
"case_summary": {
+ "doc_count": 100,
"case_with_ssm": {
- "doc_count": 51
- },
- "doc_count": 306
- },
- "key": "TCGA-CHOL",
- "doc_count": 51
+ "doc_count": 0
+ }
+ }
},
{
+ "key": "VAREPOP-APOLLO",
+ "doc_count": 7,
"case_summary": {
+ "doc_count": 14,
"case_with_ssm": {
- "doc_count": 0
- },
- "doc_count": 13
- },
- "key": "TARGET-CCSK",
- "doc_count": 13
+ "doc_count": 7
+ }
+ }
}
- ],
- "sum_other_doc_count": 0,
- "doc_count_error_upper_bound": 0
+ ]
}
- },
- "timed_out": false
+ }
}
```
+
### Survival Analysis Endpoint
-[Survival plots](/Data_Portal/Projects/#Survival-Analysis) are generated for different subsets of data, based on variants or projects, in the GDC Data Portal. The `/analysis/survival` endpoint can be used to programmatically retrieve the raw data used to generate these plots and apply different filters. Note that the `fields` and `format` parameters cannot be modified.
+[Survival plots](/Data_Portal/Users_Guide/Exploration/#survival-analysis) are generated for different subsets of data, based on variants or projects, in the GDC Data Portal. The `/analysis/survival` endpoint can be used to programmatically retrieve the raw data used to generate these plots and apply different filters. Note that the `fields` and `format` parameters cannot be modified.
__Example 1:__ A user wants to download data to generate a survival plot for cases from the project TCGA-DLBC.
@@ -946,298 +2704,392 @@ curl "https://api.gdc.cancer.gov/analysis/survival?filters=%5B%7B%22op%22%3A%22%
```
```Response
{
- "overallStats": {},
"results": [
{
+ "meta": {
+ "id": 139834474037000
+ },
"donors": [
{
+ "time": 1.0,
+ "censored": true,
"survivalEstimate": 1,
"id": "dc87a809-95de-4eb7-a1c2-2650475f2d7e",
- "censored": true,
- "time": 1
+ "submitter_id": "TCGA-RQ-A6JB",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 17.0,
+ "censored": true,
"survivalEstimate": 1,
"id": "4dd86ebd-ef16-4b2b-9ea0-5d1d7afef257",
- "censored": true,
- "time": 17
+ "submitter_id": "TCGA-RQ-AAAT",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 58,
+ "censored": false,
"survivalEstimate": 1,
"id": "0bf573ac-cd1e-42d8-90cf-b30d7b08679c",
- "censored": false,
- "time": 58
+ "submitter_id": "TCGA-FA-A6HN",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 126.0,
+ "censored": true,
"survivalEstimate": 0.9777777777777777,
"id": "f978cb0f-d319-4c01-b4c5-23ae1403a106",
- "censored": true,
- "time": 126
+ "submitter_id": "TCGA-FF-8047",
+ "project_id": "TCGA-DLBC"
},
{
- "survivalEstimate": 0.9777777777777777,
- "id": "a43e5f0e-a21f-48d8-97e0-084d413680b7",
+ "time": 132.0,
"censored": true,
- "time": 132
- },
- {
"survivalEstimate": 0.9777777777777777,
"id": "1843c82e-7a35-474f-9f79-c0a9af9aa09c",
+ "submitter_id": "TCGA-FA-A4BB",
+ "project_id": "TCGA-DLBC"
+ },
+ {
+ "time": 132.0,
"censored": true,
- "time": 132
+ "survivalEstimate": 0.9777777777777777,
+ "id": "a43e5f0e-a21f-48d8-97e0-084d413680b7",
+ "submitter_id": "TCGA-FA-8693",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 248,
+ "censored": false,
"survivalEstimate": 0.9777777777777777,
"id": "0030a28c-81aa-44b0-8be0-b35e1dcbf98c",
- "censored": false,
- "time": 248
+ "submitter_id": "TCGA-FA-A7Q1",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 298.0,
+ "censored": true,
"survivalEstimate": 0.9539295392953929,
"id": "f553f1a9-ecf2-4783-a609-6adca7c4c597",
- "censored": true,
- "time": 298
+ "submitter_id": "TCGA-FF-A7CQ",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 313,
+ "censored": false,
"survivalEstimate": 0.9539295392953929,
"id": "f784bc3a-751b-4025-aab2-0af2f6f24266",
- "censored": false,
- "time": 313
+ "submitter_id": "TCGA-FF-A7CR",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 385.0,
+ "censored": true,
"survivalEstimate": 0.929469807518588,
"id": "29e3d122-15a1-4235-a356-b1a9f94ceb39",
- "censored": true,
- "time": 385
+ "submitter_id": "TCGA-FF-A7CX",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 391,
+ "censored": false,
"survivalEstimate": 0.929469807518588,
"id": "0e251c03-bf86-4ed8-b45d-3cbc97160502",
- "censored": false,
- "time": 391
+ "submitter_id": "TCGA-GS-A9U4",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 427.0,
+ "censored": true,
"survivalEstimate": 0.9043490019099776,
"id": "e6365b38-bc44-400c-b4aa-18ce8ff5bfce",
- "censored": true,
- "time": 427
+ "submitter_id": "TCGA-FA-A82F",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 553.0,
+ "censored": true,
"survivalEstimate": 0.9043490019099776,
"id": "b56bdbdb-43af-4a03-a072-54dd22d7550c",
- "censored": true,
- "time": 553
+ "submitter_id": "TCGA-FA-A86F",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 595,
+ "censored": false,
"survivalEstimate": 0.9043490019099776,
"id": "31bbad4e-3789-42ec-9faa-1cb86970f723",
- "censored": false,
- "time": 595
+ "submitter_id": "TCGA-G8-6907",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 679.0,
+ "censored": true,
"survivalEstimate": 0.8777505018538018,
"id": "0e9fcccc-0630-408d-a121-2c6413824cb7",
- "censored": true,
- "time": 679
+ "submitter_id": "TCGA-FF-8062",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 708,
+ "censored": false,
"survivalEstimate": 0.8777505018538018,
"id": "a5b188f0-a6d3-4d4a-b04f-36d47ec05338",
- "censored": false,
- "time": 708
+ "submitter_id": "TCGA-FA-A4XK",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 719.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "ed746cb9-0f2f-48ce-923a-3a9f9f00b331",
- "censored": true,
- "time": 719
+ "submitter_id": "TCGA-FA-A7DS",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 730.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "c85f340e-584b-4f3b-b6a5-540491fc8ad2",
- "censored": true,
- "time": 730
+ "submitter_id": "TCGA-GS-A9TV",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 749.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "69f23725-adca-48ac-9b33-80a7aae24cfe",
- "censored": true,
- "time": 749
+ "submitter_id": "TCGA-FA-A6HO",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 751.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "67325322-483f-443f-9ffa-2a20d108a2fb",
- "censored": true,
- "time": 751
+ "submitter_id": "TCGA-FF-8046",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 765.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "eda9496e-be80-4a13-bf06-89f0cc9e937f",
- "censored": true,
- "time": 765
+ "submitter_id": "TCGA-GS-A9TZ",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 788.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "25ff86af-beb4-480c-b706-f3fe0306f7cf",
- "censored": true,
- "time": 788
+ "submitter_id": "TCGA-RQ-A68N",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 791.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "1d0db5d7-39ca-466d-96b3-0d278c5ea768",
- "censored": true,
- "time": 791
+ "submitter_id": "TCGA-FF-A7CW",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 832.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "c8cde9ea-89e9-4ee8-8a46-417a48f6d3ab",
- "censored": true,
- "time": 832
+ "submitter_id": "TCGA-FF-8061",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 946.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "f0a326d2-1f3e-4a5d-bca8-32aaccc52338",
- "censored": true,
- "time": 946
+ "submitter_id": "TCGA-FF-8042",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 965.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "a8e2df1e-4042-42af-9231-3a00e83489f0",
- "censored": true,
- "time": 965
+ "submitter_id": "TCGA-FF-8043",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 972.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "e56e4d9c-052e-4ec6-a81b-dbd53e9c8ffe",
- "censored": true,
- "time": 972
+ "submitter_id": "TCGA-FM-8000",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 982.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "45b0cf9f-a879-417f-8f39-7770552252c0",
- "censored": true,
- "time": 982
+ "submitter_id": "TCGA-GS-A9TQ",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1081.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "1f971af1-6772-4fe6-8d35-bbe527a037fe",
- "censored": true,
- "time": 1081
+ "submitter_id": "TCGA-FF-8041",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1163.0,
+ "censored": true,
"survivalEstimate": 0.8503207986708705,
"id": "33365d22-cb83-4d8e-a2d1-06b675f75f6e",
- "censored": true,
- "time": 1163
+ "submitter_id": "TCGA-GS-A9TT",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1252,
+ "censored": false,
"survivalEstimate": 0.8503207986708705,
"id": "6a21c948-cd85-4150-8c01-83017d7dc1ed",
- "censored": false,
- "time": 1252
+ "submitter_id": "TCGA-G8-6324",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1299.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "f855dad1-6ffc-493e-ba6c-970874bc9210",
- "censored": true,
- "time": 1299
+ "submitter_id": "TCGA-GR-A4D5",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1334.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "c1c06604-5ae2-4a53-b9c0-eb210d38e3f0",
- "censored": true,
- "time": 1334
+ "submitter_id": "TCGA-GR-A4D6",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1373.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "58e66976-4507-4552-ac53-83a49a142dde",
- "censored": true,
- "time": 1373
+ "submitter_id": "TCGA-GS-A9TX",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1581.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "ea54dbad-1b23-41cc-9378-d4002a8fca51",
- "censored": true,
- "time": 1581
+ "submitter_id": "TCGA-G8-6325",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1581.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "d7df78b5-24f1-4ff4-bd9b-f0e6bec8289a",
- "censored": true,
- "time": 1581
+ "submitter_id": "TCGA-GR-A4D4",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1617.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "29aff186-c321-4ff9-b81b-105e27e620ff",
- "censored": true,
- "time": 1617
+ "submitter_id": "TCGA-GS-A9TW",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 1739.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "5eff68ff-f6c3-40c9-9fc8-00e684a7b712",
- "censored": true,
- "time": 1739
+ "submitter_id": "TCGA-GR-A4D9",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 2131.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "f8cf647b-1447-4ac3-8c43-bef07765cabf",
- "censored": true,
- "time": 2131
+ "submitter_id": "TCGA-G8-6326",
+ "project_id": "TCGA-DLBC"
},
{
- "survivalEstimate": 0.8003019281608192,
- "id": "c3d662ee-48d0-454a-bb0c-77d3338d3747",
+ "time": 2616.0,
"censored": true,
- "time": 2983
- },
- {
"survivalEstimate": 0.8003019281608192,
"id": "6e9437f0-a4ed-475c-ab0e-bf1431c70a90",
+ "submitter_id": "TCGA-GS-A9TY",
+ "project_id": "TCGA-DLBC"
+ },
+ {
+ "time": 2983.0,
"censored": true,
- "time": 3333
+ "survivalEstimate": 0.8003019281608192,
+ "id": "c3d662ee-48d0-454a-bb0c-77d3338d3747",
+ "submitter_id": "TCGA-GR-7353",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 3394.0,
+ "censored": true,
"survivalEstimate": 0.8003019281608192,
"id": "fdecb74f-ac4e-46b1-b23a-5f7fde96ef9f",
- "censored": true,
- "time": 3394
+ "submitter_id": "TCGA-GS-A9U3",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 3553,
+ "censored": false,
"survivalEstimate": 0.8003019281608192,
"id": "a468e725-ad4b-411d-ac5c-2eacc68ec580",
- "censored": false,
- "time": 3553
+ "submitter_id": "TCGA-G8-6909",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 3897.0,
+ "censored": true,
"survivalEstimate": 0.6402415425286554,
"id": "1ea575f1-f731-408b-a629-f5f4abab569e",
- "censored": true,
- "time": 3897
+ "submitter_id": "TCGA-GS-A9TU",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 4578.0,
+ "censored": true,
"survivalEstimate": 0.6402415425286554,
"id": "7a589441-11ef-4158-87e7-3951d86bc2aa",
- "censored": true,
- "time": 4578
+ "submitter_id": "TCGA-GR-7351",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 5980.0,
+ "censored": true,
"survivalEstimate": 0.6402415425286554,
"id": "3622cf29-600f-4410-84d4-a9afeb41c475",
- "censored": true,
- "time": 5980
+ "submitter_id": "TCGA-G8-6914",
+ "project_id": "TCGA-DLBC"
},
{
+ "time": 6425,
+ "censored": false,
"survivalEstimate": 0.6402415425286554,
"id": "3f5a897d-1eaa-4d4c-8324-27ac07c90927",
- "censored": false,
- "time": 6425
+ "submitter_id": "TCGA-G8-6906",
+ "project_id": "TCGA-DLBC"
}
- ],
- "meta": {
- "id": 140429063094496
- }
+ ]
}
- ]
+ ],
+ "overallStats": {}
}
```
diff --git a/docs/API/Users_Guide/Downloading_Files.md b/docs/API/Users_Guide/Downloading_Files.md
index 975902ed2..997296ed3 100644
--- a/docs/API/Users_Guide/Downloading_Files.md
+++ b/docs/API/Users_Guide/Downloading_Files.md
@@ -2,9 +2,7 @@
The GDC API implements file download functionality using `data` and `manifest` endpoints. The `data` endpoint allows users to download files stored in the GDC by specifying file UUID(s). The `manifest` endpoint generates a download manifest file that can be used with the GDC Data Transfer Tool to transfer large volumes of data.
-**Note:** Downloading controlled access data requires the use of an authentication token. See [Getting Started: Authentication](Getting_Started.md#authentication) for details.
-
-**Note:** Requests to download data from the GDC Legacy Archive may be directed to `legacy/data` or `data`. See [Getting Started: Legacy Archive](Getting_Started.md#gdc-legacy-archive) for details.
+>**Note:** Downloading controlled access data requires the use of an authentication token. See [Getting Started: Authentication](Getting_Started.md#authentication) for details.
## Data endpoint
@@ -24,25 +22,25 @@ curl --remote-name --remote-header-name 'https://api.gdc.cancer.gov/data/5b2974a
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 6111k 100 6111k 0 0 414k 0 0:00:14 0:00:14 --:--:-- 412k
-curl: Saved to filename '14-3-3_beta-R-V_GBL1112940.tif'
+
```
### Related Files
If the `related_files=true` parameter is specified, the following related files, if available, will be included in the download package by the GDC API:
* BAM index files (BAI files)
-* Metadata files (such as SRA XML or MAGE-TAB files)
+* VCF index files (TBI files)
-For example, this request will download a legacy copy number segmentation file and its associated MAGE-TAB metadata file:
+For example, this request will download a BAM file and its associated BAI file:
```shell
-curl --remote-name --remote-header-name 'https://api.gdc.cancer.gov/data/7efc039a-fde3-4bc1-9433-2fc6b5e3ffa5?related_files=true'
+curl --remote-name --remote-header-name -H "x-auth-token: $token" "https://api.gdc.cancer.gov/data/f587ef82-acbe-44f9-ad5a-6207e148f61f?related_files=true"
```
```Output
% Total % Received % Xferd Average Speed Time Time Time Current
- Dload Upload Total Spent Left Speed
-100 65353 0 65353 0 0 65353 0 --:--:-- --:--:-- --:--:-- 102k
-curl: Saved to filename 'gdc_download_20180830_131817.826097.tar.gz'
+ Dload Upload Total Spent Left Speed
+100 63.4M 0 63.4M 0 0 7541k 0 --:--:-- 0:00:08 --:--:-- 9.9M
+
```
@@ -57,10 +55,10 @@ curl --remote-name --remote-header-name 'https://api.gdc.cancer.gov/data/e322802
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 287k 0 287k 0 0 30131 0 --:--:-- 0:00:09 --:--:-- 42759
-curl: Saved to filename 'gdc_download_064d1aa8cc8cbab33e93979bebbf7d6af2d6a802.tar.gz'
+
```
-**Note:** This method supports downloading a limited number of files at one time. To download a large number of files, please use [POST](#downloading-multiple-files-using-post).
+>**Note:** This method supports downloading a limited number of files at one time. To download a large number of files, please use [POST](#downloading-multiple-files-using-post).
#### Downloading an Uncompressed Group of Files
@@ -83,19 +81,18 @@ The payload is a string in the following format:
where UUID# corresponds to the UUIDs of the files to be downloaded.
-In this example we use `curl` to download a set of files from the GDC Legacy Archive. The payload is stored in a plain text file named `Payload`; `curl` includes the `Content-Type: application/x-www-form-urlencoded` header by default.
+In this example we use `curl` to download a set of files from the GDC Data Portal. The payload is stored in a plain text file named `Payload`; `curl` includes the `Content-Type: application/x-www-form-urlencoded` header by default.
```Payload
-ids=556e5e3f-0ab9-4b6c-aa62-c42f6a6cf20c&ids=e0de63e2-02f3-4309-9b24-69f4c24e85fc&ids=f1a06178-2ec2-4b06-83f3-3aedac332cfe&ids=11a8aca0-c8e6-4ff8-8ab6-fe18a1b8ba82&ids=69a69c84-00de-45ff-b397-fd2b6713ed4f&ids=9ec48233-395d-401e-b205-951c971f8dd4&ids=93129547-378c-4b69-b858-532abfff678e&ids=8d4277e9-a472-4590-886d-24dc2538ea65&ids=6733b412-56da-4f1c-a12b-ff804cb656d7&ids=a72eec98-c5e0-4866-8953-765780acb6c1&ids=e77b2294-1bdd-4fba-928a-d81d2622312f&ids=965e01fc-318e-4c02-a801-d6fad60bfae4&ids=21ad5409-fe0b-4728-97e4-15520b9fc287&ids=1a777521-277c-4aeb-baf1-66871a7c2d2a&ids=c13a3449-9e0d-45a9-bcc0-518f55e45c8a&ids=5f2d329b-d59d-4112-b490-5114b830e34d&ids=bb966617-6c1f-4bb0-a1ed-ceb37ecade67&ids=05d11519-2b33-4742-aa87-3934632f2f2b&ids=39bfafe2-9628-434e-bd72-148051a47477&ids=481bea69-3cd5-45f3-8a52-2d4cc8fc8df7&ids=f95e407b-de69-416c-920c-6be8c9414862&ids=75940293-8fa6-47f9-ad5d-155b61933fdc&ids=e8e84ccf-f8a8-4551-9257-ef731d02116f&ids=e4991159-f088-4a2a-88b7-38d6ac47c6bc
+ids=59eb3fc5-9172-4828-8dec-0d9988073103&ids=869b7d7c-ff35-482a-aa8d-1a8675c161d3&ids=b8ffff40-aa0e-4534-b05f-9311f16c2f6b&ids=51e14969-30a7-42d9-8168-4a5ea422ca4a&ids=adcfc856-990b-40fc-8f1e-67dfc2343fb7&ids=7f1e9aee-eb4e-4c79-8626-b603c9be124d&ids=62a8feb5-c660-4261-bcd6-67fbb79bb422
```
```Shell
curl --remote-name --remote-header-name --request POST 'https://api.gdc.cancer.gov/data' --data @Payload
```
```Output
% Total % Received % Xferd Average Speed Time Time Time Current
- Dload Upload Total Spent Left Speed
-100 2563k 0 2562k 100 983 854k 327 0:00:03 0:00:03 --:--:-- 776k
-curl: Saved to filename 'gdc_download_20180830_132402.379282.tar.gz'
+ Dload Upload Total Spent Left Speed
+100 6804k 0 6804k 100 286 245k 10 0:00:28 0:00:27 0:00:01 357k
```
#### POST request with JSON payload
@@ -116,36 +113,16 @@ The payload is a string in the following format:
where UUID# corresponds to the UUIDs of the files to be downloaded.
-In this example we use `curl` to download a set of files from the GDC Legacy Archive; the payload is stored in a plain text file named `Payload`.
+In this example we use `curl` to download a set of files from the GDC Portal; the payload is stored in a plain text file named `Payload`.
```Payload
{
"ids":[
- "556e5e3f-0ab9-4b6c-aa62-c42f6a6cf20c",
- "e0de63e2-02f3-4309-9b24-69f4c24e85fc",
- "f1a06178-2ec2-4b06-83f3-3aedac332cfe",
- "11a8aca0-c8e6-4ff8-8ab6-fe18a1b8ba82",
- "69a69c84-00de-45ff-b397-fd2b6713ed4f",
- "9ec48233-395d-401e-b205-951c971f8dd4",
- "93129547-378c-4b69-b858-532abfff678e",
- "8d4277e9-a472-4590-886d-24dc2538ea65",
- "6733b412-56da-4f1c-a12b-ff804cb656d7",
- "a72eec98-c5e0-4866-8953-765780acb6c1",
- "e77b2294-1bdd-4fba-928a-d81d2622312f",
- "965e01fc-318e-4c02-a801-d6fad60bfae4",
- "21ad5409-fe0b-4728-97e4-15520b9fc287",
- "1a777521-277c-4aeb-baf1-66871a7c2d2a",
- "c13a3449-9e0d-45a9-bcc0-518f55e45c8a",
- "5f2d329b-d59d-4112-b490-5114b830e34d",
- "bb966617-6c1f-4bb0-a1ed-ceb37ecade67",
- "05d11519-2b33-4742-aa87-3934632f2f2b",
- "39bfafe2-9628-434e-bd72-148051a47477",
- "481bea69-3cd5-45f3-8a52-2d4cc8fc8df7",
- "f95e407b-de69-416c-920c-6be8c9414862",
- "75940293-8fa6-47f9-ad5d-155b61933fdc",
- "e8e84ccf-f8a8-4551-9257-ef731d02116f",
- "e4991159-f088-4a2a-88b7-38d6ac47c6bc"
+ "0451fc55-33ef-4151-a68c-cac59be716dc",
+ "0cc3d450-2c60-4cb0-a073-d92dc979fa5e",
+ "0de9bc40-3ef8-4fe7-b7d6-80a9339b0bf8",
+ "0f8d8202-a1ca-4ea1-98b2-c20a6b08479a"
]
}
```
@@ -153,10 +130,9 @@ In this example we use `curl` to download a set of files from the GDC Legacy Arc
curl --remote-name --remote-header-name --request POST --header 'Content-Type: application/json' --data @request.txt 'https://api.gdc.cancer.gov/data'
```
```Output
- % Total % Received % Xferd Average Speed Time Time Time Current
- Dload Upload Total Spent Left Speed
-100 2562k 0 2561k 100 1145 788k 352 0:00:03 0:00:03 --:--:-- 788k
-curl: Saved to filename 'gdc_download_20160701_011007.tar.gz'
+% Total % Received % Xferd Average Speed Time Time Time Current
+ Dload Upload Total Spent Left Speed
+100 5878k 0 5878k 100 205 290k 10 0:00:20 0:00:20 --:--:-- 198k
```
### Downloading Controlled-access Files
@@ -166,31 +142,31 @@ To download controlled-access files, a valid authentication token must be passed
```shell
token=$(**NOTE:** When using the command line tool curl be sure to use the bash shell only for compatibility reasons. All examples using curl are using the bash shell.
+
+The authentication token should be kept in a secure location, as it allows access to all data accessible by the associated user account.
## API Endpoints
@@ -52,12 +55,6 @@ The HTTP URL of an endpoint corresponding to a specific major version of the GDC
For example, the address of the latest version of the `status` endpoint is `https://api.gdc.cancer.gov/status`, whereas the address of the `status` endpoint corresponding to version 0 of GDC API is `https://api.gdc.cancer.gov/v0/status`.
-### GDC Legacy Archive
-
-To interact with data in the GDC Legacy Archive, add `legacy` to the endpoint URL:
-
- https://api.gdc.cancer.gov//legacy/
-
## Entity UUIDs
All objects (*entities*) in the GDC are assigned a unique identifier in the form of a [version 4 universally unique identifier (UUID)](https://en.wikipedia.org/wiki/Universally_unique_identifier). The UUID uniquely identifies the entity in the GDC, and is stored in the entity's `id` property.
@@ -68,51 +65,33 @@ See [GDC Data Model](../../Data/Data_Model/GDC_Data_Model.md) for details.
## Sample Request
-The following is an example of a request to the `files` endpoint, which retrieves information about a BAM file stored in the GDC.
+The following is an example of a request to the `files` endpoint, which retrieves information about a MAF file stored in the GDC.
``` shell
-curl https://api.gdc.cancer.gov/files/d853e541-f16a-4345-9f00-88e03c2dc0bc?pretty=true
+curl https://api.gdc.cancer.gov/files/cb92f61d-041c-4424-a3e9-891b7545f351?pretty=true
```
``` python
import requests
import json
file_endpt = 'https://api.gdc.cancer.gov/files/'
-file_uuid = 'd853e541-f16a-4345-9f00-88e03c2dc0bc'
+file_uuid = 'cb92f61d-041c-4424-a3e9-891b7545f351'
response = requests.get(file_endpt + file_uuid)
-print json.dumps(response.json(), indent=2)
-``` Response
-{
- "data": {
- "data_type": "Aligned Reads",
- "updated_datetime": "2016-05-26T17:06:40.003624-05:00",
- "created_datetime": "2016-05-26T17:06:40.003624-05:00",
- "file_name": "0017ba4c33a07ba807b29140b0662cb1_gdc_realn.bam",
- "md5sum": "a08304b120c5df76b6532da0e9a35ced",
- "data_format": "BAM",
- "acl": [
- "phs000178"
- ],
- "access": "controlled",
- "platform": "Illumina",
- "state": "submitted",
- "file_id": "d853e541-f16a-4345-9f00-88e03c2dc0bc",
- "data_category": "Raw Sequencing Data",
- "file_size": 23650901931,
- "submitter_id": "c30188d7-be1a-4b43-9a17-e19ccd71792e",
- "type": "aligned_reads",
- "file_state": "processed",
- "experimental_strategy": "WXS"
- },
- "warnings": {}
-}
-```
+# OUTPUT METHOD 1: Write to a file.
+file = open("sample_request.json", "w")
+file.write(response.text)
+file.close()
+
+# OUTPUT METHOD 2: View on screen.
+print(json.dumps(response.json(), indent=2))
+```
+[Download Script](scripts/Sample_Request.py)
## Authentication
Authentication is required for downloading controlled-access data, and for all data submission functionality. The GDC API uses tokens for authentication.
-Users can obtain authentication tokens from the [GDC Data Portal](https://portal.gdc.cancer.gov) and the [GDC Data Submission Portal](https://portal.gdc.cancer.gov/submission). See the [GDC Data Portal User's Guide](../../Data_Portal/Users_Guide/Authentication.md#gdc-authentication-tokens) and the [GDC Data Submission Portal User's Guide](../../Data_Submission_Portal/Users_Guide/Authentication.md#gdc-authentication-tokens) for instructions.
+Users can obtain authentication tokens from the [GDC Data Portal](https://portal.gdc.cancer.gov) and the [GDC Data Submission Portal](https://portal.gdc.cancer.gov/submission). See the [GDC Data Submission Portal User's Guide](../../Data_Submission_Portal/Users_Guide/Data_Submission_Process.md#authentication) for instructions.
### Using Authentication Tokens
@@ -120,18 +99,52 @@ All API requests that require authentication must include a token as an `X-Auth-
In the following example, an authentication token is saved as an environment variable and passed to `curl` to download a controlled-access file:
-``` shell
-token=$()
-curl -O -J -H "X-Auth-Token: $token" 'https://api.gdc.cancer.gov/data/a1c1b23b-cc41-4e85-b1b7-62a42873c5af'
+curl -O -J -H "X-Auth-Token: $token" 'https://api.gdc.cancer.gov/data/fd89bfa5-b3a7-4079-bf90-709580c006e5'
```
```Output
- % Total % Received % Xferd Average Speed Time Time Time Current
- Dload Upload Total Spent Left Speed
-100 31.4M 100 31.4M 0 0 290k 0 0:01:50 0:01:50 --:--:-- 172k
-curl: Saved to filename 'ACOLD_p_TCGA_Batch17_SNP_N_GenomeWideSNP_6_A03_466078.tangent.copynumber.data.txt'
+% Total % Received % Xferd Average Speed Time Time Time Current
+ Dload Upload Total Spent Left Speed
+100 4161M 100 4161M 0 0 281k 0 4:12:45 4:12:45 --:--:-- 1894k
+
```
+```Python
+import requests
+import json
+import re
+
+'''
+ This script will not work until $TOKEN_FILE_PATH
+ is replaced with an actual path.
+'''
+
+with open("$TOKEN_FILE_PATH","r") as token:
+ token_string = str(token.read().strip())
+
+headers = {
+ 'X-Auth-Token': token_string
+ }
+
+data_endpt = 'https://api.gdc.cancer.gov/data/'
+data_uuid = 'fd89bfa5-b3a7-4079-bf90-709580c006e5'
+headers = {
+ 'X-Auth-Token': token_string
+ }
+response = requests.get(data_endpt + data_uuid, headers=headers)
+
+# The file name can be found in the header within the Content-Disposition key.
+response_head_cd = response.headers["Content-Disposition"]
+
+file_name = re.findall("filename=(.+)", response_head_cd)[0]
+
+with open(file_name, "wb") as output_file:
+ output_file.write(response.content)
+```
+[Download Python Script](scripts/Authentication_Tokens.py)
+
For more information about authentication tokens, including token expiration and rotation, see [Data Security](../../Data/Data_Security/Data_Security.md#authentication-tokens).
-**NOTE:** The authentication token should be kept in a secure location, as it allows access to all data accessible by the associated user account.
+>**NOTE:** The authentication token should be kept in a secure location, as it allows access to all data accessible by the associated user account.
diff --git a/docs/API/Users_Guide/GraphQL_Examples.md b/docs/API/Users_Guide/GraphQL_Examples.md
new file mode 100644
index 000000000..fe9d7888d
--- /dev/null
+++ b/docs/API/Users_Guide/GraphQL_Examples.md
@@ -0,0 +1,174 @@
+# Introduction to GDC GraphQL
+[GraphQL](https://graphql.org/) is a query language for APIs. The [GDC REST API](https://docs.gdc.cancer.gov/API/Users_Guide/Getting_Started/) has structured and specifically defined query parameters as well as endpoints that have set requests and responses. The GDC GraphQL provides advanced GDC users greater flexibility to specify the data they would like to be returned. This allows queries to be cleaner and easier to understand, especially when combining multiple queries into one request.
+
+To produce queries in a visual interface, the GDC recommends using [GraphiQL](https://github.com/graphql/graphiql). See below for the correct endpoint URLs.
+
+## Using GDC GraphQL vs GDC REST API
+
+If the query requires only a subset of the data to be returned, GDC GraphQL may speed up requests as GraphQL queries return only the specified data. This may require less work on the GDC server-side to fulfill those requests. Conversely, if an entire data-set is required for each request, the GDC REST API may be a better fit. No matter which method is used, the data returned by the GDC REST API and the GraphQL query will be identical as they query the same source.
+
+## GDC GraphQL Overview
+GraphQL is not a storage model or a database query language. The graph refers to graph structures defined in the schema, where nodes define objects and edges define relationships between objects. The API traverses and returns application data based on the schema definitions, independent of how the data is stored.
+
+## GDC GraphQL Endpoints
+
+The GDC GraphQL has only two endpoints:
+* __GDC Search and Retrieval Endpoint:__ https://api.gdc.cancer.gov/v0/graphql
+* __GDC Submission Endpoint:__ https://api.gdc.cancer.gov/v0/submission/graphql
+
+This page covers the search and retrieval endpoint, see the [GDC Submission API](Submission.md) for additional details on the submission endpoint.
+
+## GDC GraphQL Schema
+All GDC GraphQL queries are validated and executed against the [GDC GraphQL schema]( https://github.com/NCI-GDC/portal-ui/blob/92f0dfa17838746093c3c011141d08391016da91/data/schema.graphql). Because the GraphQL parameters are discoverable, the GDC GraphQL schema can be queried for details about itself.
+
+The `__schema` keyword can be queried to list all types defined in the schema and retrieve details about each:
+
+```GraphQL
+{
+ __schema {
+ types {
+ name
+ kind
+ fields {
+ name
+ }
+ }
+ }
+}
+```
+The `__type` keyword can also be queried to retrieve details about any type such as "Explore" or "Case":
+```GraphQL
+
+{
+ __type(name: "Explore") {
+ name
+ kind
+ description
+ fields {
+ name
+ }
+ }
+}
+```
+
+```GraphQL
+{
+ __type(name: "Case") {
+ name
+ kind
+ description
+ fields {
+ name
+ }
+ }
+}
+```
+
+## Basic GraphQL queries in GDC
+The two types of allowed operations in GDC GraphQL API are queries and mutations. Comparing GraphQL to REST, queries operate like `GET` requests, while mutations operate like `POST`/`PATCH`/`DELETE`.
+
+__Note:__ This guide does not cover GDC GraphQL mutation operations.
+
+GraphQL queries return only the data that is specified. Queries are built by specifying fields within fields (also called nested *subfields*) until only scalars are returned. Scalars are primitive values such as: `Int`, `Float`, `String`, `Boolean`, or `ID`.
+
+## Anatomy of a typical GDC GraphQL Query
+
+ [](images/graphql-query.png "Click to see the full image.")
+
+- __Operation type:__ Describes what type of operation that is being performed, such as query, mutation, or subscription
+- __Operation name:__ Similar to a function name, gives queries meaningful names
+- __Field:__ Denotes the specific fields on objects that will be included with the response data
+- __Arguments:__ A set of key-value pairs associated with a specific field. The parameters can be literal values or variables. __NOTE:__ Arguments can appear on any field, even fields nested deep in an operation.
+- __Variable definitions:__ As GraphQL is strong typed, it validates the variable being passed dynamically. __NOTE:__ Variables are passed separately from the query document as JSON such as:
+
+```json
+ { "filters_1": {"op":"in","content":{"field":"projects.program.name","value":["TARGET"]}}}
+```
+
+## GDC GraphQL Examples
+### Nodes And Edges Example
+A very powerful feature of GDC GraphQL API is that the graph structures defined in the [GDC GraphQL schema]( https://github.com/NCI-GDC/portal-ui/blob/92f0dfa17838746093c3c011141d08391016da91/data/schema.graphql ) can be queried and traversed. In these queries, nodes define objects and edges define relationships between objects.
+
+```GraphQL
+
+query PROJECTS_EDGES($filters_1: FiltersArgument) {
+ projects {
+ hits(filters: $filters_1) {
+ total
+ edges {
+ node {
+ primary_site
+ disease_type
+ project_id
+ dbgap_accession_number
+ }
+ }
+ }
+ }
+}
+
+ variable:
+ { "filters_1": {"op": "in", "content": {"field": "projects.primary_site", "value": ["Kidney"]}}}
+```
+
+### Query Case File Counts
+
+```GraphQL
+query CaseFileCounts($filters: FiltersArgument) {
+ viewer {
+ repository {
+ cases {
+ hits(first: 1, filters: $filters) {
+ edges {
+ node {
+ case_id
+ files {
+ hits(first: 0) {
+ total
+ }
+ }
+ summary {
+ experimental_strategies {
+ experimental_strategy
+ file_count
+ }
+ data_categories {
+ data_category
+ file_count
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+variable:
+{"filters":{"op":"in","content":{"field":"cases.case_id","value":["dcd5860c-7e3a-44f3-a732-fe92fe3fe300"]}}}
+```
+
+### Query Simple Static Mutations Based on Gene IDs
+
+```GraphQL
+
+query PROJECTS_EDGES($filters_2: FiltersArgument) {
+ explore {
+ ssms {
+ hits(filters: $filters_2) {
+ total
+ edges {
+ node {
+ ssm_id
+ gene_aa_change
+ }
+ }
+ }
+ }
+ }
+}
+
+variable:
+{"filters_2": {"op":"in","content":{"field":"consequence.transcript.gene.gene_id","value":["ENSG00000155657"]}}}
+```
diff --git a/docs/API/Users_Guide/Python_Examples.md b/docs/API/Users_Guide/Python_Examples.md
index 25ab3b6d3..09db68a8f 100644
--- a/docs/API/Users_Guide/Python_Examples.md
+++ b/docs/API/Users_Guide/Python_Examples.md
@@ -1,6 +1,6 @@
# Using Python to Query the GDC API
-Python can be a versatile tool for retrieving information from the GDC API and performing downstream processing. This page details some examples that demonstrate the basic API queries using Python. The examples in this guide will use the [requests](http://docs.python-requests.org/en/master/) Python library and should be compatible with Python3.
+Python can be a versatile tool for retrieving information from the GDC API and performing downstream processing. This page details some examples that demonstrate the basic API queries using Python. The examples in this guide will use the [requests](https://pypi.org/project/requests/) Python library and should be compatible with Python3.
## Querying Metadata
@@ -438,5 +438,13 @@ The following script should produce an unformatted JSON string with information
import requests
status_endpt = "https://api.gdc.cancer.gov/status"
response = requests.get(status_endpt)
+
+# OUTPUT METHOD 1: Write to a file.
+file = open("api_status.json", "w")
+file.write(response.text)
+file.close()
+
+# OUTPUT METHOD 2: View on screen.
print(response.content)
```
+[Download Script](scripts/Basic_Troubleshooting.py)
diff --git a/docs/API/Users_Guide/Search_and_Retrieval.md b/docs/API/Users_Guide/Search_and_Retrieval.md
index f156ae92e..2c8120489 100644
--- a/docs/API/Users_Guide/Search_and_Retrieval.md
+++ b/docs/API/Users_Guide/Search_and_Retrieval.md
@@ -4,7 +4,7 @@
The GDC API provides endpoints that search and retrieve information stored in the GDC according to the [GDC Data Model](../../Data/Data_Model/GDC_Data_Model.md). The general format of requests to search & retrieval endpoints is described below.
-**Note:** Queries described in this section work for datasets that have been released to the GDC Data Portal. Unreleased data that is in the process of being submitted to GDC cannot be queried using these methods. See [Submission](Submission.md) to learn how to query unreleased data using GraphQL.
+>**Note:** Queries described in this section work for datasets that have been released to the GDC Data Portal. Unreleased data that is in the process of being submitted to GDC cannot be queried using these methods. See [Submission](Submission.md) to learn how to query unreleased data using GraphQL.
### Components of a Request
@@ -19,8 +19,6 @@ A typical search and retrieval API request specifies the following parameters:
Requests can be executed using HTTP GET or HTTP POST. GET requests are limited by maximum URL length, so the POST method is recommended for large queries.
-**Note:** Requests for information stored in the GDC Legacy Archive must be directed to `legacy/` endpoints. See [Getting Started](Getting_Started.md#gdc-legacy-archive) for details.
-
### POST Example
The following is an example of an HTTP POST request to the `files` endpoint of the GDC API. It looks for Gene Expression Quantification files associated with specific TCGA cases (represented by TCGA barcodes) and retrieves the associated biospecimen metadata in TSV format.
@@ -87,56 +85,63 @@ The following search and retrieval endpoints are available in the GDC API:
The choice of endpoint determines what is listed in the search results. The `files` endpoint will generate a list of files, whereas the `cases` endpoint will generate a list of cases. Each of the above endpoints, other than `_mapping`, can query and return any of the related fields in the [GDC Data Model](../../Data/Data_Model/GDC_Data_Model.md). So the `cases` endpoint can be queried for file fields (e.g. to look for cases that have certain types of experimental data), and the `files` endpoint can be queried for clinical metadata associated with a case (e.g. to look for files from cases diagnosed with a specific cancer type).
-### Project Endpoint
+### `Project` Endpoint
The `projects` endpoint provides access to project records, the highest level of data organization in the GDC.
#### Example
This example is a query for projects contained in the GDC. It uses the [from](#from), [size](#size), [sort](#sort), and [pretty](#pretty) parameters, and returns the first two projects sorted by project id.
```shell
-curl 'https://api.gdc.cancer.gov/projects?from=0&size=2&sort=project.project_id:asc&pretty=true'
+curl 'https://api.gdc.cancer.gov/projects?from=0&size=2&sort=project_id:asc&pretty=true'
```
``` Output
{
"data": {
"hits": [
{
- "dbgap_accession_number": null,
- "disease_type": [
- "Brain Lower Grade Glioma"
- ],
- "released": true,
- "state": "legacy",
+ "id": "APOLLO-LUAD",
"primary_site": [
- "Brain"
+ "Bronchus and lung"
],
- "project_id": "TCGA-LGG",
- "id": "TCGA-LGG",
- "name": "Brain Lower Grade Glioma"
- },
- {
- "dbgap_accession_number": null,
+ "dbgap_accession_number": "phs003011",
+ "project_id": "APOLLO-LUAD",
"disease_type": [
- "Thyroid Carcinoma"
+ "Adenomas and Adenocarcinomas"
],
- "released": true,
- "state": "legacy",
+ "name": "APOLLO1: Proteogenomic characterization of lung adenocarcinoma",
+ "releasable": false,
+ "state": "open",
+ "released": true
+ },
+ {
+ "id": "BEATAML1.0-COHORT",
"primary_site": [
- "Thyroid"
+ "Hematopoietic and reticuloendothelial systems"
],
- "project_id": "TCGA-THCA",
- "id": "TCGA-THCA",
- "name": "Thyroid Carcinoma"
+ "dbgap_accession_number": "phs001657",
+ "project_id": "BEATAML1.0-COHORT",
+ "disease_type": [
+ "Myelodysplastic Syndromes",
+ "Leukemias, NOS",
+ "Unknown",
+ "Myeloid Leukemias",
+ "Plasma Cell Tumors",
+ "Chronic Myeloproliferative Disorders"
+ ],
+ "name": "Functional Genomic Landscape of Acute Myeloid Leukemia",
+ "releasable": true,
+ "state": "open",
+ "released": true
}
],
"pagination": {
"count": 2,
- "sort": "project.project_id:asc",
+ "total": 78,
+ "size": 2,
"from": 0,
+ "sort": "None",
"page": 1,
- "total": 39,
- "pages": 20,
- "size": 2
+ "pages": 39
}
},
"warnings": {}
@@ -153,67 +158,109 @@ curl 'https://api.gdc.cancer.gov/projects/TARGET-NBL?expand=summary,summary.expe
```Response
{
"data": {
- "dbgap_accession_number": "phs000467",
- "disease_type": [
- "Neuroblastoma"
- ],
"summary": {
+ "file_count": 5705,
"data_categories": [
{
- "case_count": 151,
- "file_count": 471,
- "data_category": "Transcriptome Profiling"
+ "file_count": 943,
+ "case_count": 278,
+ "data_category": "Sequencing Reads"
+ },
+ {
+ "file_count": 3080,
+ "case_count": 220,
+ "data_category": "Simple Nucleotide Variation"
},
{
- "case_count": 1127,
"file_count": 3,
- "data_category": "Biospecimen"
+ "case_count": 1119,
+ "data_category": "Clinical"
},
{
- "case_count": 216,
- "file_count": 1732,
- "data_category": "Simple Nucleotide Variation"
+ "file_count": 705,
+ "case_count": 225,
+ "data_category": "DNA Methylation"
},
{
- "case_count": 7,
- "file_count": 1,
- "data_category": "Clinical"
+ "file_count": 2,
+ "case_count": 1132,
+ "data_category": "Biospecimen"
+ },
+ {
+ "file_count": 324,
+ "case_count": 155,
+ "data_category": "Transcriptome Profiling"
},
{
- "case_count": 270,
- "file_count": 599,
- "data_category": "Raw Sequencing Data"
+ "file_count": 648,
+ "case_count": 155,
+ "data_category": "Structural Variation"
}
],
- "case_count": 1127,
- "file_count": 2806,
"experimental_strategies": [
{
- "case_count": 221,
- "file_count": 2174,
+ "file_count": 1458,
+ "case_count": 155,
+ "experimental_strategy": "RNA-Seq"
+ },
+ {
+ "file_count": 15,
+ "case_count": 8,
+ "experimental_strategy": "WGS"
+ },
+ {
+ "file_count": 3522,
+ "case_count": 222,
"experimental_strategy": "WXS"
},
{
- "case_count": 151,
- "file_count": 628,
- "experimental_strategy": "RNA-Seq"
+ "file_count": 705,
+ "case_count": 225,
+ "experimental_strategy": "Methylation Array"
}
],
- "file_size": 8157614402888
+ "case_count": 1132,
+ "file_size": 16968781125824
},
- "released": true,
- "state": "legacy",
"primary_site": [
- "Nervous System"
+ "Stomach",
+ "Bones, joints and articular cartilage of limbs",
+ "Heart, mediastinum, and pleura",
+ "Peripheral nerves and autonomic nervous system",
+ "Uterus, NOS",
+ "Bones, joints and articular cartilage of other and unspecified sites",
+ "Other endocrine glands and related structures",
+ "Renal pelvis",
+ "Retroperitoneum and peritoneum",
+ "Liver and intrahepatic bile ducts",
+ "Meninges",
+ "Connective, subcutaneous and other soft tissues",
+ "Adrenal gland",
+ "Unknown",
+ "Spinal cord, cranial nerves, and other parts of central nervous system",
+ "Skin",
+ "Other and ill-defined sites",
+ "Kidney",
+ "Lymph nodes",
+ "Hematopoietic and reticuloendothelial systems"
],
+ "dbgap_accession_number": "phs000467",
"project_id": "TARGET-NBL",
- "name": "Neuroblastoma"
+ "disease_type": [
+ "Neuroepitheliomatous Neoplasms",
+ "Not Applicable"
+ ],
+ "name": "Neuroblastoma",
+ "releasable": true,
+ "state": "open",
+ "released": true
},
"warnings": {}
}
+
```
-### Files Endpoint
+### `Files` Endpoint
The GDC Files Endpoint `https://api.gdc.cancer.gov/files` enables search and retrieval of information relating to files stored in the GDC, including file properties such as `file_name`, `md5sum`, `data_format`, and others.
@@ -229,99 +276,97 @@ curl 'https://api.gdc.cancer.gov/files?from=0&size=2&sort=file_size:asc&pretty=t
"data": {
"hits": [
{
- "data_release": "13.0",
- "data_type": "Raw Simple Somatic Mutation",
- "updated_datetime": "2018-07-20T22:27:55.342974+00:00",
- "file_name": "333193d5-ca9a-4262-81f5-e9f3b44358fe.vcf.gz",
- "submitter_id": "AD19_SimpleSomaticMutation",
- "file_id": "333193d5-ca9a-4262-81f5-e9f3b44358fe",
- "file_size": 866,
- "id": "333193d5-ca9a-4262-81f5-e9f3b44358fe",
- "created_datetime": "2017-09-10T19:16:02.549312-05:00",
- "md5sum": "e33e95edb778fe67643162ef0ae3297e",
- "data_format": "VCF",
- "acl": [
- "phs001179"
- ],
+ "id": "0ab5e358-b1ff-4433-8959-c37c5890d9aa",
+ "data_format": "BEDPE",
"access": "controlled",
+ "file_name": "090e2828-079c-48e6-97cb-735c763da8d3.wgs.BRASS.rerun_structural_variation.bedpe.gz",
+ "submitter_id": "247c3c9a-58b9-4b70-bda8-cb197acb5609",
+ "data_category": "Somatic Structural Variation",
+ "acl": [
+ "phs001287"
+ ],
+ "type": "structural_variation",
+ "file_size": 20,
+ "created_datetime": "2022-04-08T20:27:04.633842-05:00",
+ "updated_datetime": "2022-07-07T11:02:27.204310-05:00",
+ "file_id": "0ab5e358-b1ff-4433-8959-c37c5890d9aa",
+ "data_type": "Structural Rearrangement",
"state": "released",
+ "experimental_strategy": "WGS",
"version": "1",
- "data_category": "Simple Nucleotide Variation",
- "type": "simple_somatic_mutation",
- "experimental_strategy": "Targeted Sequencing"
+ "data_release": "34.0 - 37.0"
},
{
- "data_release": "13.0",
- "data_type": "Raw Simple Somatic Mutation",
- "updated_datetime": "2018-07-20T22:27:55.342974+00:00",
- "file_name": "d9114e23-0f62-4979-aefc-0dd4d5eb891b.vcf.gz",
- "submitter_id": "AD116_SimpleSomaticMutation",
- "file_id": "d9114e23-0f62-4979-aefc-0dd4d5eb891b",
- "file_size": 866,
- "id": "d9114e23-0f62-4979-aefc-0dd4d5eb891b",
- "created_datetime": "2017-09-10T21:53:02.376246-05:00",
- "md5sum": "95bbfd0586d3c284e9f88edf3bf26065",
- "data_format": "VCF",
- "acl": [
- "phs001179"
- ],
+ "id": "a8bc2405-b57d-48bb-b241-18b3e28caa56",
+ "data_format": "BEDPE",
"access": "controlled",
+ "file_name": "eae76f14-8aa7-427f-a90c-4e0ed095e0c2.wgs.BRASS.rerun_structural_variation.bedpe.gz",
+ "submitter_id": "618cd251-ddcb-4a7e-9a6d-efb132b0bd7a",
+ "data_category": "Somatic Structural Variation",
+ "acl": [
+ "phs001287"
+ ],
+ "type": "structural_variation",
+ "file_size": 20,
+ "created_datetime": "2022-04-08T20:43:16.505747-05:00",
+ "updated_datetime": "2022-07-07T11:00:43.345766-05:00",
+ "file_id": "a8bc2405-b57d-48bb-b241-18b3e28caa56",
+ "data_type": "Structural Rearrangement",
"state": "released",
+ "experimental_strategy": "WGS",
"version": "1",
- "data_category": "Simple Nucleotide Variation",
- "type": "simple_somatic_mutation",
- "experimental_strategy": "Targeted Sequencing"
+ "data_release": "34.0 - 37.0"
}
],
"pagination": {
"count": 2,
- "sort": "file_size:asc",
+ "total": 931947,
+ "size": 2,
"from": 0,
+ "sort": "None",
"page": 1,
- "total": 356381,
- "pages": 178191,
- "size": 2
+ "pages": 465974
}
},
"warnings": {}
}
+
```
#### Retrieval of file metadata using individual UUIDs:
-The `\files` endpoint supports a simple query format that retrieves the metadata of a single file using its UUID. Note that the `\files` endpoint is inactive when querying for earlier file versions. In that case, the `\history` or `/files/versions` endpoints should be used instead.
+The `/files` endpoint supports a simple query format that retrieves the metadata of a single file using its UUID. Note that the `/files` endpoint is inactive when querying for earlier file versions. In that case, the `/history` or `/files/versions` endpoints should be used instead.
```Shell
-curl 'https://api.gdc.cancer.gov/files/874e71e0-83dd-4d3e-8014-10141b49f12c?pretty=true'
+curl 'https://api.gdc.cancer.gov/files/20f45e04-3c10-4f11-b57b-719880eab69e?pretty=true'
```
``` Output
{
"data": {
- "data_release": "13.0",
- "data_type": "Raw Simple Somatic Mutation",
- "updated_datetime": "2018-07-20T22:27:55.342974+00:00",
- "created_datetime": "2016-06-03T17:03:06.608739-05:00",
- "file_name": "874e71e0-83dd-4d3e-8014-10141b49f12c.vcf.gz",
- "md5sum": "acf2929b1b825bcd1377023e8b8767ec",
"data_format": "VCF",
+ "access": "controlled",
+ "file_name": "TCGA_BRCA.8d9cb5ae-e568-41fc-8b53-14467c2623dc.wxs.MuTect2.somatic_annotation.vcf.gz",
+ "submitter_id": "675f31dd-70e5-4a72-8139-423b14b31564",
+ "data_category": "Simple Nucleotide Variation",
"acl": [
"phs000178"
],
- "access": "controlled",
- "state": "live",
- "version": "1",
- "file_id": "874e71e0-83dd-4d3e-8014-10141b49f12c",
- "data_category": "Simple Nucleotide Variation",
- "file_size": 122293,
- "submitter_id": "TCGA-V4-A9EZ-01A-11D-A39W-08_TCGA-V4-A9EZ-10A-01D-A39Z-08_mutect",
- "type": "simple_somatic_mutation",
- "experimental_strategy": "WXS"
+ "type": "annotated_somatic_mutation",
+ "file_size": 6894331,
+ "created_datetime": "2022-02-07T08:48:39.178606-06:00",
+ "updated_datetime": "2022-02-09T12:11:12.781445-06:00",
+ "file_id": "20f45e04-3c10-4f11-b57b-719880eab69e",
+ "data_type": "Annotated Somatic Mutation",
+ "state": "released",
+ "experimental_strategy": "WXS",
+ "version": "2",
+ "data_release": "32.0 - 37.0"
},
"warnings": {}
}
```
-__Note:__ The `file_size` field associated with each file is reported in bytes.
+>__Note:__ The `file_size` field associated with each file is reported in bytes.
#### Example of retrieving file version information:
@@ -335,40 +380,50 @@ curl 'https://api.gdc.cancer.gov/files/versions/1dd28069-5777-4ff9-bd2b-d1ba68e8
``` Output1
[
{
- "latest_size": 332092,
- "latest_id": "1dd28069-5777-4ff9-bd2b-d1ba68e88b06",
- "latest_version": "1",
+ "id": "1dd28069-5777-4ff9-bd2b-d1ba68e88b06",
"filename": "1dd28069-5777-4ff9-bd2b-d1ba68e88b06.vcf.gz",
- "state": "validated",
"version": "1",
- "latest_filename": "1dd28069-5777-4ff9-bd2b-d1ba68e88b06.vcf.gz",
- "latest_release": [
- "13.0"
- ],
- "latest_state": "validated",
- "release": "13.0",
- "latest_md5": "c2f9b196e154906a70c7ec46492a859d",
+ "md5": "c2f9b196e154906a70c7ec46492a859d",
"size": 332092,
- "id": "1dd28069-5777-4ff9-bd2b-d1ba68e88b06",
- "md5": "c2f9b196e154906a70c7ec46492a859d"
+ "state": "validated",
+ "release": "12.0",
+ "latest_id": "76b3f4d8-c6b7-4662-ac42-1d27d4684281",
+ "latest_filename": "def1cc5b-55f0-4372-a3ff-df3ea93cf3e7.wxs.somaticsniper.raw_somatic_mutation.vcf.gz",
+ "latest_version": "2",
+ "latest_size": 357706,
+ "latest_state": "validated",
+ "latest_release": [
+ "32.0",
+ "33.0",
+ "33.1",
+ "34.0",
+ "35.0",
+ "36.0",
+ "37.0"
+ ]
},
{
- "latest_size": 6653119038,
- "latest_id": "2a03abac-f1a2-49a9-a57c-7543739dd862",
- "latest_version": "1",
+ "id": "2a03abac-f1a2-49a9-a57c-7543739dd862",
"filename": "a5d86cde-32ca-4ed6-b1a5-5a47575f2ac6_gdc_realn_rehead.bam",
- "state": "validated",
"version": "1",
- "latest_filename": "a5d86cde-32ca-4ed6-b1a5-5a47575f2ac6_gdc_realn_rehead.bam",
- "latest_release": [
- "13.0"
- ],
- "latest_state": "validated",
- "release": "13.0",
- "latest_md5": "48686fcd84ac713d44261ca9e26b89fb",
+ "md5": "48686fcd84ac713d44261ca9e26b89fb",
"size": 6653119038,
- "id": "2a03abac-f1a2-49a9-a57c-7543739dd862",
- "md5": "48686fcd84ac713d44261ca9e26b89fb"
+ "state": "validated",
+ "release": "12.0",
+ "latest_id": "de0ce84d-c286-405c-a556-39dac14c7c74",
+ "latest_filename": "d45c33cc-88e2-4de5-a578-f7e31a6c0738.rna_seq.genomic.gdc_realn.bam",
+ "latest_version": "2",
+ "latest_size": 6223445806,
+ "latest_state": "validated",
+ "latest_release": [
+ "32.0",
+ "33.0",
+ "33.1",
+ "34.0",
+ "35.0",
+ "36.0",
+ "37.0"
+ ]
}
]
```
@@ -377,82 +432,126 @@ curl --request POST --header "Content-Type: text/tsv" https://api.gdc.cancer.go
```
``` Output2
-[{
- "latest_size": 44857,
- "state": "validated",
- "latest_version": "1",
- "filename": "nationwidechildrens.org_clinical.TCGA-13-1500.xml",
- "latest_id": "0b20e27c-9a09-4f15-923f-d5b4f185dc22",
- "version": "1",
- "latest_filename": "nationwidechildrens.org_clinical.TCGA-13-1500.xml",
- "latest_release": [
- "12.0"
- ],
- "latest_state": "validated",
- "release": "12.0",
- "latest_md5": "597aa4df24c4d544b6c25cbd8b25a33e",
- "md5": "597aa4df24c4d544b6c25cbd8b25a33e",
- "id": "0b20e27c-9a09-4f15-923f-d5b4f185dc22",
- "size": 44857
-},{
- "latest_size": 27620,
- "state": "validated",
- "latest_version": "1",
- "filename": "BUCKS_p_TCGA_272_273_N_GenomeWideSNP_6_G05_1320676.grch38.seg.v2.txt",
- "latest_id": "3edc7084-013c-4493-8507-c00b0e9962d8",
- "version": "1",
- "latest_filename": "BUCKS_p_TCGA_272_273_N_GenomeWideSNP_6_G05_1320676.grch38.seg.v2.txt",
- "latest_release": [
- "12.0"
- ],
- "latest_state": "validated",
- "release": "12.0",
- "latest_md5": "35a18d990a05eedfaf96e753bee0b96d",
- "md5": "35a18d990a05eedfaf96e753bee0b96d",
- "id": "3edc7084-013c-4493-8507-c00b0e9962d8",
- "size": 27620
-},{
- "latest_size": 2346,
- "state": "validated",
- "latest_version": "1",
- "filename": "a22f5e32-b16e-458f-a412-7e438056ece6.vep.vcf.gz",
- "latest_id": "a22f5e32-b16e-458f-a412-7e438056ece6",
- "version": "1",
- "latest_filename": "a22f5e32-b16e-458f-a412-7e438056ece6.vep.vcf.gz",
- "latest_release": [
- "12.0"
- ],
- "latest_state": "validated",
- "release": "12.0",
- "latest_md5": "68b2433b31679bbbc6681919a1b81762",
- "md5": "68b2433b31679bbbc6681919a1b81762",
- "id": "a22f5e32-b16e-458f-a412-7e438056ece6",
- "size": 2346
-},{
- "latest_size": 35411,
- "state": "validated",
- "latest_version": "1",
- "filename": "CYANS_p_TCGAb_422_423_424_NSP_GenomeWideSNP_6_G12_1513758.nocnv_grch38.seg.v2.txt",
- "latest_id": "ac7d2078-bd6b-446e-b30a-d889da5624b6",
- "version": "1",
- "latest_filename": "CYANS_p_TCGAb_422_423_424_NSP_GenomeWideSNP_6_G12_1513758.nocnv_grch38.seg.v2.txt",
- "latest_release": [
- "12.0"
- ],
- "latest_state": "validated",
- "release": "12.0",
- "latest_md5": "6338826b620773062232830fad51ae64",
- "md5": "6338826b620773062232830fad51ae64",
- "id": "ac7d2078-bd6b-446e-b30a-d889da5624b6",
- "size": 35411
-}]
-```
-
-### Cases Endpoint
+[
+ {
+ "id": "0b20e27c-9a09-4f15-923f-d5b4f185dc22",
+ "filename": "nationwidechildrens.org_clinical.TCGA-13-1500.xml",
+ "version": "1",
+ "md5": "597aa4df24c4d544b6c25cbd8b25a33e",
+ "size": 44857,
+ "state": "validated",
+ "release": "12.0",
+ "latest_id": "0b20e27c-9a09-4f15-923f-d5b4f185dc22",
+ "latest_filename": "nationwidechildrens.org_clinical.TCGA-13-1500.xml",
+ "latest_version": "1",
+ "latest_md5": "597aa4df24c4d544b6c25cbd8b25a33e",
+ "latest_size": 44857,
+ "latest_state": "validated",
+ "latest_release": [
+ "12.0",
+ "13.0",
+ "14.0",
+ "15.0",
+ "16.0",
+ "17.0",
+ "18.0",
+ "19.0",
+ "20.0",
+ "21.0",
+ "22.0",
+ "23.0",
+ "24.0",
+ "25.0",
+ "26.0",
+ "27.0",
+ "28.0",
+ "29.0",
+ "30.0",
+ "31.0",
+ "32.0",
+ "33.0",
+ "33.1",
+ "34.0",
+ "35.0",
+ "36.0",
+ "37.0"
+ ]
+ },
+ {
+ "id": "3edc7084-013c-4493-8507-c00b0e9962d8",
+ "filename": "BUCKS_p_TCGA_272_273_N_GenomeWideSNP_6_G05_1320676.grch38.seg.v2.txt",
+ "version": "1",
+ "md5": "35a18d990a05eedfaf96e753bee0b96d",
+ "size": 27620,
+ "state": "validated",
+ "release": "12.0",
+ "latest_id": "3edc7084-013c-4493-8507-c00b0e9962d8",
+ "latest_filename": "BUCKS_p_TCGA_272_273_N_GenomeWideSNP_6_G05_1320676.grch38.seg.v2.txt",
+ "latest_version": "1",
+ "latest_md5": "35a18d990a05eedfaf96e753bee0b96d",
+ "latest_size": 27620,
+ "latest_state": "validated",
+ "latest_release": [
+ "12.0",
+ "13.0",
+ "14.0",
+ "15.0",
+ "16.0",
+ "17.0",
+ "18.0",
+ "19.0",
+ "20.0",
+ "21.0",
+ "22.0",
+ "23.0",
+ "24.0",
+ "25.0",
+ "26.0",
+ "27.0",
+ "28.0",
+ "29.0",
+ "30.0",
+ "31.0",
+ "32.0",
+ "33.0",
+ "33.1",
+ "34.0",
+ "35.0",
+ "36.0",
+ "37.0"
+ ]
+ },
+ {
+ "id": "a22f5e32-b16e-458f-a412-7e438056ece6",
+ "filename": "a22f5e32-b16e-458f-a412-7e438056ece6.vep.vcf.gz",
+ "version": "1",
+ "md5": "68b2433b31679bbbc6681919a1b81762",
+ "size": 2346,
+ "state": "validated",
+ "release": "12.0",
+ "latest_id": "55491171-6170-45cb-af9d-d99345b289e5",
+ "latest_filename": "4b89bb97-41f6-43c4-a481-287556f7bb4a.targeted_sequencing.annotated_somatic_mutation.vcf.gz",
+ "latest_version": "2",
+ "latest_size": 2618,
+ "latest_state": "validated",
+ "latest_release": [
+ "32.0",
+ "33.0",
+ "33.1",
+ "34.0",
+ "35.0",
+ "36.0",
+ "37.0"
+ ]
+ }
+]
+```
+
+### `Cases` Endpoint
The GDC Cases Endpoint `https://api.gdc.cancer.gov/cases` enables search and retrieval of information related to a specific case.
-__Note:__ The `cases` endpoint is designed to retrieve the metadata associated with one or more cases, including all nested biospecimen entities. Filters can be applied to retrieve information for entire cases, but not for lower-level biospecimen entities. For example, a sample within a case cannot be used to query for aliquots that are associated only with that sample. All aliquots associated with the case would be retrieved.
+The `cases` endpoint is designed to retrieve the metadata associated with one or more cases, including all nested biospecimen entities. Filters can be applied to retrieve information for entire cases, but not for lower-level biospecimen entities. For example, a sample within a case cannot be used to query for aliquots that are associated only with that sample. All aliquots associated with the case would be retrieved.
#### Example
@@ -468,100 +567,109 @@ curl 'https://api.gdc.cancer.gov/cases?filters=%7B%22op%22%3A%22and%22%2C%22cont
```
``` Output
{
- {
- "data": {
- "hits": [
- {
- "updated_datetime": "2017-03-04T16:39:19.244769-06:00",
- "submitter_analyte_ids": [
- "TCGA-BH-A0EA-01A-11R",
- "TCGA-BH-A0EA-10A-01W",
- "TCGA-BH-A0EA-01A-11W",
- "TCGA-BH-A0EA-01A-11D",
- "TCGA-BH-A0EA-10A-01D"
- ],
- "analyte_ids": [
- "fe678556-acf4-4bde-a95e-860bb0150a95",
- "66ed0f86-5ca5-4dec-ba76-7ee4dcf31831",
- "f19f408a-815f-43d9-8032-e9482b796371",
- "69ddc092-88a0-4839-a2bb-9f1c9e760409",
- "30cb470f-66d4-4085-8c30-83a42e8453d4"
- ],
- "submitter_id": "TCGA-BH-A0EA",
- "case_id": "1f601832-eee3-48fb-acf5-80c4a454f26e",
- "id": "1f601832-eee3-48fb-acf5-80c4a454f26e",
- "disease_type": "Breast Invasive Carcinoma",
- "sample_ids": [
- "9a6c71a6-82cd-42b1-a93f-f569370848d6",
- "7f791228-dd77-4ab0-8227-d784a4c7fea1"
- ],
- "portion_ids": [
- "cb6086d1-3416-4310-b109-e8fa6e8b72d4",
- "8629bf5a-cdaf-4f6a-90bb-27dd4a7565c5",
- "ae4f5816-f97a-4605-9b05-9ab820467dee"
- ],
- "submitter_portion_ids": [
- "TCGA-BH-A0EA-01A-21-A13C-20",
- "TCGA-BH-A0EA-01A-11",
- "TCGA-BH-A0EA-10A-01"
- ],
- "created_datetime": null,
- "slide_ids": [
- "90154ea1-6b76-4445-870e-d531d6fa1239",
- "a0826f0d-986a-491b-8c6f-b34f8929f3ee"
- ],
- "state": "live",
- "aliquot_ids": [
- "eef9dce1-6ba6-432b-bbe2-53c7dbe64fe7",
- "cde982b7-3b0a-49eb-8710-a599cb0e44c1",
- "b1a3739d-d554-4202-b96f-f25a444e2042",
- "97c64d6a-7dce-4d0f-9cb3-b3e4eb4719c5",
- "561b8777-801a-49ed-a306-e7dafeb044b6",
- "42d050e4-e8ee-4442-b9c0-0ee14706b138",
- "ca71ca96-cbb7-4eab-9487-251dda34e107",
- "cfbd5476-e83a-401d-9f9a-639c73a0e35b",
- "edad5bd3-efe0-4c5f-b05c-2c0c2951c45a",
- "262715e1-835c-4f16-8ee7-6900e26f7cf5",
- "2beb34c4-d493-4a73-b21e-de77d43251ff",
- "bcb7fc6d-60a0-48b7-aa81-14c0dda72d76"
- ],
- "primary_site": "Breast",
- "submitter_aliquot_ids": [
- "TCGA-BH-A0EA-10A-01D-A113-01",
- "TCGA-BH-A0EA-01A-11R-A115-07",
- "TCGA-BH-A0EA-01A-11D-A10Y-09",
- "TCGA-BH-A0EA-01A-11D-A314-09",
- "TCGA-BH-A0EA-01A-11R-A114-13",
- "TCGA-BH-A0EA-01A-11D-A111-01",
- "TCGA-BH-A0EA-01A-11D-A112-05",
- "TCGA-BH-A0EA-01A-11D-A10X-02",
- "TCGA-BH-A0EA-10A-01D-A110-09",
- "TCGA-BH-A0EA-10A-01W-A12U-09",
- "TCGA-BH-A0EA-10A-01D-A10Z-02",
- "TCGA-BH-A0EA-01A-11W-A12T-09"
- ],
- "submitter_sample_ids": [
- "TCGA-BH-A0EA-10A",
- "TCGA-BH-A0EA-01A"
- ],
- "submitter_slide_ids": [
- "TCGA-BH-A0EA-01A-01-MSA",
- "TCGA-BH-A0EA-01A-01-TSA"
- ]
- }
- ],
- "pagination": {
- "count": 1,
- "sort": "",
- "from": 0,
- "page": 1,
- "total": 1,
- "pages": 1,
- "size": 10
- }
- },
- "warnings": {}
- }
+ "data": {
+ "hits": [
+ {
+ "id": "1f601832-eee3-48fb-acf5-80c4a454f26e",
+ "slide_ids": [
+ "a0826f0d-986a-491b-8c6f-b34f8929f3ee",
+ "90154ea1-6b76-4445-870e-d531d6fa1239",
+ "1dd1cab5-5a81-428a-8153-91e8c4cf9905"
+ ],
+ "submitter_slide_ids": [
+ "TCGA-BH-A0EA-01Z-00-DX1",
+ "TCGA-BH-A0EA-01A-01-MSA",
+ "TCGA-BH-A0EA-01A-01-TSA"
+ ],
+ "disease_type": "Ductal and Lobular Neoplasms",
+ "analyte_ids": [
+ "f19f408a-815f-43d9-8032-e9482b796371",
+ "fe678556-acf4-4bde-a95e-860bb0150a95",
+ "69ddc092-88a0-4839-a2bb-9f1c9e760409",
+ "66ed0f86-5ca5-4dec-ba76-7ee4dcf31831",
+ "30cb470f-66d4-4085-8c30-83a42e8453d4"
+ ],
+ "submitter_id": "TCGA-BH-A0EA",
+ "submitter_analyte_ids": [
+ "TCGA-BH-A0EA-10A-01D",
+ "TCGA-BH-A0EA-01A-11D",
+ "TCGA-BH-A0EA-01A-11R",
+ "TCGA-BH-A0EA-10A-01W",
+ "TCGA-BH-A0EA-01A-11W"
+ ],
+ "aliquot_ids": [
+ "cde982b7-3b0a-49eb-8710-a599cb0e44c1",
+ "b1a3739d-d554-4202-b96f-f25a444e2042",
+ "eef9dce1-6ba6-432b-bbe2-53c7dbe64fe7",
+ "97c64d6a-7dce-4d0f-9cb3-b3e4eb4719c5",
+ "262715e1-835c-4f16-8ee7-6900e26f7cf5",
+ "cfbd5476-e83a-401d-9f9a-639c73a0e35b",
+ "bcb7fc6d-60a0-48b7-aa81-14c0dda72d76",
+ "561b8777-801a-49ed-a306-e7dafeb044b6",
+ "edad5bd3-efe0-4c5f-b05c-2c0c2951c45a",
+ "42d050e4-e8ee-4442-b9c0-0ee14706b138",
+ "2beb34c4-d493-4a73-b21e-de77d43251ff",
+ "ca71ca96-cbb7-4eab-9487-251dda34e107"
+ ],
+ "submitter_aliquot_ids": [
+ "TCGA-BH-A0EA-10A-01W-A12U-09",
+ "TCGA-BH-A0EA-01A-11D-A111-01",
+ "TCGA-BH-A0EA-01A-11D-A314-09",
+ "TCGA-BH-A0EA-01A-11D-A10X-02",
+ "TCGA-BH-A0EA-10A-01D-A10Z-02",
+ "TCGA-BH-A0EA-10A-01D-A110-09",
+ "TCGA-BH-A0EA-01A-11D-A10Y-09",
+ "TCGA-BH-A0EA-10A-01D-A113-01",
+ "TCGA-BH-A0EA-01A-11D-A112-05",
+ "TCGA-BH-A0EA-01A-11R-A115-07",
+ "TCGA-BH-A0EA-01A-11W-A12T-09",
+ "TCGA-BH-A0EA-01A-11R-A114-13"
+ ],
+ "created_datetime": null,
+ "diagnosis_ids": [
+ "84654ad5-2a2c-5c3b-8340-ecac6a5550fe"
+ ],
+ "sample_ids": [
+ "55864d86-dab8-47bb-a3e3-8cfb198b06c1",
+ "9a6c71a6-82cd-42b1-a93f-f569370848d6",
+ "7f791228-dd77-4ab0-8227-d784a4c7fea1"
+ ],
+ "submitter_sample_ids": [
+ "TCGA-BH-A0EA-01A",
+ "TCGA-BH-A0EA-01Z",
+ "TCGA-BH-A0EA-10A"
+ ],
+ "primary_site": "Breast",
+ "submitter_diagnosis_ids": [
+ "TCGA-BH-A0EA_diagnosis"
+ ],
+ "updated_datetime": "2019-08-06T14:15:54.128069-05:00",
+ "case_id": "1f601832-eee3-48fb-acf5-80c4a454f26e",
+ "state": "released",
+ "portion_ids": [
+ "cb6086d1-3416-4310-b109-e8fa6e8b72d4",
+ "8629bf5a-cdaf-4f6a-90bb-27dd4a7565c5",
+ "ae4f5816-f97a-4605-9b05-9ab820467dee"
+ ],
+ "submitter_portion_ids": [
+ "TCGA-BH-A0EA-10A-01",
+ "TCGA-BH-A0EA-01A-21-A13C-20",
+ "TCGA-BH-A0EA-01A-11"
+ ]
+ }
+ ],
+ "pagination": {
+ "count": 1,
+ "total": 1,
+ "size": 10,
+ "from": 0,
+ "sort": "",
+ "page": 1,
+ "pages": 1
+ }
+ },
+ "warnings": {}
+}
```
#### Retrieval of case metadata using individual UUIDs:
@@ -572,133 +680,133 @@ The `cases` endpoint supports a simple query format that retrieves the metadata
curl 'https://api.gdc.cancer.gov/cases/1f601832-eee3-48fb-acf5-80c4a454f26e?pretty=true&expand=diagnoses'
```
```Response
-{
+
"data": {
- "diagnoses": [
- {
- "classification_of_tumor": "not reported",
- "last_known_disease_status": "not reported",
- "updated_datetime": "2016-05-16T10:59:16.740358-05:00",
- "primary_diagnosis": "c50.9",
- "submitter_id": "TCGA-BH-A0EA_diagnosis",
- "tumor_stage": "stage iia",
- "age_at_diagnosis": 26548.0,
- "vital_status": "dead",
- "morphology": "8500/3",
- "days_to_death": 991.0,
- "days_to_last_known_disease_status": null,
- "days_to_last_follow_up": null,
- "state": null,
- "days_to_recurrence": null,
- "diagnosis_id": "84654ad5-2a2c-5c3b-8340-ecac6a5550fe",
- "tumor_grade": "not reported",
- "tissue_or_organ_of_origin": "c50.9",
- "days_to_birth": -26548.0,
- "progression_or_recurrence": "not reported",
- "prior_malignancy": "not reported",
- "site_of_resection_or_biopsy": "c50.9",
- "created_datetime": null
- }
- ],
- "sample_ids": [
- "7f791228-dd77-4ab0-8227-d784a4c7fea1",
- "9a6c71a6-82cd-42b1-a93f-f569370848d6"
- ],
- "portion_ids": [
- "cb6086d1-3416-4310-b109-e8fa6e8b72d4",
- "8629bf5a-cdaf-4f6a-90bb-27dd4a7565c5",
- "ae4f5816-f97a-4605-9b05-9ab820467dee"
+ "slide_ids": [
+ "90154ea1-6b76-4445-870e-d531d6fa1239",
+ "1dd1cab5-5a81-428a-8153-91e8c4cf9905",
+ "a0826f0d-986a-491b-8c6f-b34f8929f3ee"
],
- "submitter_portion_ids": [
- "TCGA-BH-A0EA-01A-11",
- "TCGA-BH-A0EA-01A-21-A13C-20",
- "TCGA-BH-A0EA-10A-01"
+ "submitter_slide_ids": [
+ "TCGA-BH-A0EA-01A-01-MSA",
+ "TCGA-BH-A0EA-01A-01-TSA",
+ "TCGA-BH-A0EA-01Z-00-DX1"
],
- "created_datetime": null,
- "submitter_aliquot_ids": [
- "TCGA-BH-A0EA-01A-11R-A114-13",
- "TCGA-BH-A0EA-01A-11D-A111-01",
- "TCGA-BH-A0EA-01A-11W-A12T-09",
- "TCGA-BH-A0EA-01A-11R-A114-13",
- "TCGA-BH-A0EA-01A-11R-A115-07",
- "TCGA-BH-A0EA-01A-11D-A111-01",
- "TCGA-BH-A0EA-01A-11D-A314-09",
- "TCGA-BH-A0EA-01A-11D-A112-05",
- "TCGA-BH-A0EA-01A-11D-A10Y-09",
- "TCGA-BH-A0EA-01A-11D-A10X-02",
- "TCGA-BH-A0EA-01A-11W-A12T-09",
- "TCGA-BH-A0EA-01A-11D-A10X-02",
- "TCGA-BH-A0EA-01A-11D-A10Y-09",
- "TCGA-BH-A0EA-01A-11D-A314-09",
- "TCGA-BH-A0EA-01A-11R-A115-07",
- "TCGA-BH-A0EA-01A-11D-A112-05",
- "TCGA-BH-A0EA-10A-01D-A110-09",
- "TCGA-BH-A0EA-10A-01D-A113-01",
- "TCGA-BH-A0EA-10A-01W-A12U-09",
- "TCGA-BH-A0EA-10A-01D-A10Z-02",
- "TCGA-BH-A0EA-10A-01D-A113-01",
- "TCGA-BH-A0EA-10A-01D-A110-09",
- "TCGA-BH-A0EA-10A-01W-A12U-09",
- "TCGA-BH-A0EA-10A-01D-A10Z-02"
+ "disease_type": "Ductal and Lobular Neoplasms",
+ "analyte_ids": [
+ "fe678556-acf4-4bde-a95e-860bb0150a95",
+ "66ed0f86-5ca5-4dec-ba76-7ee4dcf31831",
+ "30cb470f-66d4-4085-8c30-83a42e8453d4",
+ "69ddc092-88a0-4839-a2bb-9f1c9e760409",
+ "f19f408a-815f-43d9-8032-e9482b796371"
],
- "updated_datetime": "2016-05-02T14:37:43.619198-05:00",
+ "submitter_id": "TCGA-BH-A0EA",
"submitter_analyte_ids": [
- "TCGA-BH-A0EA-01A-11R",
"TCGA-BH-A0EA-01A-11D",
- "TCGA-BH-A0EA-01A-11W",
+ "TCGA-BH-A0EA-01A-11R",
"TCGA-BH-A0EA-10A-01W",
+ "TCGA-BH-A0EA-01A-11W",
"TCGA-BH-A0EA-10A-01D"
],
- "analyte_ids": [
- "30cb470f-66d4-4085-8c30-83a42e8453d4",
- "66ed0f86-5ca5-4dec-ba76-7ee4dcf31831",
- "f19f408a-815f-43d9-8032-e9482b796371",
- "69ddc092-88a0-4839-a2bb-9f1c9e760409",
- "fe678556-acf4-4bde-a95e-860bb0150a95"
- ],
- "submitter_id": "TCGA-BH-A0EA",
- "case_id": "1f601832-eee3-48fb-acf5-80c4a454f26e",
- "state": null,
"aliquot_ids": [
- "bcb7fc6d-60a0-48b7-aa81-14c0dda72d76",
- "97c64d6a-7dce-4d0f-9cb3-b3e4eb4719c5",
- "edad5bd3-efe0-4c5f-b05c-2c0c2951c45a",
- "bcb7fc6d-60a0-48b7-aa81-14c0dda72d76",
- "ca71ca96-cbb7-4eab-9487-251dda34e107",
- "97c64d6a-7dce-4d0f-9cb3-b3e4eb4719c5",
"eef9dce1-6ba6-432b-bbe2-53c7dbe64fe7",
- "42d050e4-e8ee-4442-b9c0-0ee14706b138",
- "561b8777-801a-49ed-a306-e7dafeb044b6",
+ "2beb34c4-d493-4a73-b21e-de77d43251ff",
+ "b1a3739d-d554-4202-b96f-f25a444e2042",
"262715e1-835c-4f16-8ee7-6900e26f7cf5",
+ "cfbd5476-e83a-401d-9f9a-639c73a0e35b",
"edad5bd3-efe0-4c5f-b05c-2c0c2951c45a",
- "262715e1-835c-4f16-8ee7-6900e26f7cf5",
+ "bcb7fc6d-60a0-48b7-aa81-14c0dda72d76",
+ "42d050e4-e8ee-4442-b9c0-0ee14706b138",
+ "97c64d6a-7dce-4d0f-9cb3-b3e4eb4719c5",
"561b8777-801a-49ed-a306-e7dafeb044b6",
- "eef9dce1-6ba6-432b-bbe2-53c7dbe64fe7",
"ca71ca96-cbb7-4eab-9487-251dda34e107",
- "42d050e4-e8ee-4442-b9c0-0ee14706b138",
- "cfbd5476-e83a-401d-9f9a-639c73a0e35b",
- "2beb34c4-d493-4a73-b21e-de77d43251ff",
- "b1a3739d-d554-4202-b96f-f25a444e2042",
- "cde982b7-3b0a-49eb-8710-a599cb0e44c1",
- "2beb34c4-d493-4a73-b21e-de77d43251ff",
- "cfbd5476-e83a-401d-9f9a-639c73a0e35b",
- "b1a3739d-d554-4202-b96f-f25a444e2042",
"cde982b7-3b0a-49eb-8710-a599cb0e44c1"
],
- "slide_ids": [
- "90154ea1-6b76-4445-870e-d531d6fa1239",
- "a0826f0d-986a-491b-8c6f-b34f8929f3ee"
+ "submitter_aliquot_ids": [
+ "TCGA-BH-A0EA-01A-11R-A115-07",
+ "TCGA-BH-A0EA-01A-11D-A112-05",
+ "TCGA-BH-A0EA-10A-01W-A12U-09",
+ "TCGA-BH-A0EA-01A-11D-A10X-02",
+ "TCGA-BH-A0EA-10A-01D-A113-01",
+ "TCGA-BH-A0EA-10A-01D-A110-09",
+ "TCGA-BH-A0EA-01A-11D-A314-09",
+ "TCGA-BH-A0EA-01A-11D-A10Y-09",
+ "TCGA-BH-A0EA-01A-11D-A111-01",
+ "TCGA-BH-A0EA-10A-01D-A10Z-02",
+ "TCGA-BH-A0EA-01A-11R-A114-13",
+ "TCGA-BH-A0EA-01A-11W-A12T-09"
+ ],
+ "diagnoses": [
+ {
+ "synchronous_malignancy": "Not Reported",
+ "ajcc_pathologic_stage": "Stage IIA",
+ "days_to_diagnosis": 0,
+ "created_datetime": null,
+ "last_known_disease_status": "not reported",
+ "tissue_or_organ_of_origin": "Breast, NOS",
+ "days_to_last_follow_up": null,
+ "age_at_diagnosis": 26548,
+ "primary_diagnosis": "Infiltrating duct carcinoma, NOS",
+ "updated_datetime": "2019-08-08T16:25:42.215495-05:00",
+ "prior_malignancy": "yes",
+ "year_of_diagnosis": 2008,
+ "state": "released",
+ "prior_treatment": "No",
+ "days_to_last_known_disease_status": null,
+ "ajcc_staging_system_edition": "6th",
+ "ajcc_pathologic_t": "T1c",
+ "days_to_recurrence": null,
+ "morphology": "8500/3",
+ "ajcc_pathologic_n": "N1a",
+ "ajcc_pathologic_m": "M0",
+ "submitter_id": "TCGA-BH-A0EA_diagnosis",
+ "classification_of_tumor": "not reported",
+ "diagnosis_id": "84654ad5-2a2c-5c3b-8340-ecac6a5550fe",
+ "icd_10_code": "C50.9",
+ "site_of_resection_or_biopsy": "Breast, NOS",
+ "tumor_grade": "Not Reported",
+ "progression_or_recurrence": "not reported"
+ }
+ ],
+ "created_datetime": null,
+ "diagnosis_ids": [
+ "84654ad5-2a2c-5c3b-8340-ecac6a5550fe"
+ ],
+ "sample_ids": [
+ "55864d86-dab8-47bb-a3e3-8cfb198b06c1",
+ "7f791228-dd77-4ab0-8227-d784a4c7fea1",
+ "9a6c71a6-82cd-42b1-a93f-f569370848d6"
],
"submitter_sample_ids": [
"TCGA-BH-A0EA-01A",
+ "TCGA-BH-A0EA-01Z",
"TCGA-BH-A0EA-10A"
+ ],
+ "primary_site": "Breast",
+ "submitter_diagnosis_ids": [
+ "TCGA-BH-A0EA_diagnosis"
+ ],
+ "updated_datetime": "2019-08-06T14:15:54.128069-05:00",
+ "case_id": "1f601832-eee3-48fb-acf5-80c4a454f26e",
+ "state": "released",
+ "portion_ids": [
+ "1ef8b20e-43e5-49d7-ac9a-03ce14f58daa",
+ "cb6086d1-3416-4310-b109-e8fa6e8b72d4",
+ "8629bf5a-cdaf-4f6a-90bb-27dd4a7565c5",
+ "ae4f5816-f97a-4605-9b05-9ab820467dee"
+ ],
+ "submitter_portion_ids": [
+ "TCGA-BH-A0EA-01A-21-A13C-20",
+ "TCGA-BH-A0EA-10A-01",
+ "TCGA-BH-A0EA-01A-21",
+ "TCGA-BH-A0EA-01A-11"
]
},
"warnings": {}
}
```
-### Annotations Endpoint
+### `Annotations` Endpoint
The GDC Annotation Endpoint `https://api.gdc.cancer.gov/annotations` enables search and retrieval of annotations stored in the GDC.
@@ -801,7 +909,7 @@ curl 'https://api.gdc.cancer.gov/annotations?filters=%7B%22op%22%3A%22in%22%2C%2
"warnings": {}
}
```
-### History Endpoint
+### `History` Endpoint
The GDC History Endpoint `https://api.gdc.cancer.gov/history` enables search and retrieval of version and release information about a file. This endpoint will return the entire provenance of all versions of a file. A file may be versioned if a file is updated by the GDC (e.g. using a new alignment algorithm or fixing a file that contained an error). `Version` refers to the instance of a particular file. `Release` refers to which data release a file was part of. A file may be a part of many different data releases with no change in version number or content.
@@ -814,11 +922,12 @@ This example is a query for versioning information associated with the follow wi
curl 'https://api.gdc.cancer.gov/history/1dd28069-5777-4ff9-bd2b-d1ba68e88b06'
```
``` Output
-[{"release_date": "2018-07-23", "version": "1", "uuid": "1dd28069-5777-4ff9-bd2b-d1ba68e88b06", "file_change": "released", "data_release": "13.0"}]
+[{"uuid": "1dd28069-5777-4ff9-bd2b-d1ba68e88b06", "version": "1", "file_change": "superseded", "release_date": "2018-08-23", "data_release": "12.0"}, {"uuid": "76b3f4d8-c6b7-4662-ac42-1d27d4684281", "version": "2", "file_change": "released", "release_date": "2022-03-29", "data_release": "32.0"}]
+
```
-### \_mapping Endpoint
+### `_mapping` Endpoint
Each search and retrieval endpoint is equipped with a ```_mapping``` endpoint that provides information about available fields. For example, `files/_mapping` endpoint provides information about fields and field groups available at the `files` endpoint: `https://api.gdc.cancer.gov/files/_mapping`.
@@ -851,23 +960,24 @@ Each part of the response is described below:
curl 'https://api.gdc.cancer.gov/projects/_mapping'
```
```output
+This output was put thought a json format application for easier viewability.
{
- ...
+ ...
- "_mapping": {
- "projects.disease_type": {
- "doc_type": "projects",
- "field": "disease_type",
- "type": "id"
- },
- "projects.name": {
- "doc_type": "projects",
- "field": "name",
- "type": "id"
- }
- }
+ "_mapping": {
+ "projects.disease_type": {
+ "doc_type": "projects",
+ "field": "disease_type",
+ "type": "id"
+ },
+ "projects.name": {
+ "doc_type": "projects",
+ "field": "name",
+ "type": "id"
+ }
+ }
- ...
+ ...
}
```
@@ -967,16 +1077,19 @@ This example requests `male` cases using HTTP GET.
The JSON object to be passed to the GDC API looks like:
- {"op": "=",
- "content": {
- "field": "cases.demographic.gender",
- "value": ["male"]
- }
- }
+ {
+ "op": "=",
+ "content": {
+ "field": "cases.demographic.gender",
+ "value": [
+ "male"
+ ]
+ }
+ }
-URL-encoding the above JSON object using [Percent-(URL)-encoding tool](https://www.beautifyconverter.com/json-escape-unescape.php) results in the following string:
+URL-encoding the above JSON object using [Percent-(URL)-encoding tool](https://www.freeformatter.com/url-encoder.html) results in the following string:
- %7b%22op%22%3a+%22%3d%22%2c%0d%0a++++++%22content%22%3a+%7b%0d%0a++++++++++%22field%22%3a+%22cases.clinical.gender%22%2c%0d%0a++++++++++%22value%22%3a+%5b%22male%22%5d%0d%0a++++++%7d%0d%0a%7d
+ %7B%0D%0A++++%22op%22%3A+%22%3D%22%2C%0D%0A++++%22content%22%3A+%7B%0D%0A++++++++%22field%22%3A+%22cases.demographic.gender%22%2C%0D%0A++++++++%22value%22%3A+%5B%0D%0A++++++++++++%22male%22%0D%0A++++++++%5D%0D%0A++++%7D%0D%0A%7D
The above string can now be passed to the GDC API using the `filters` parameter:
@@ -996,697 +1109,785 @@ filt = {"op":"=",
params = {'filters':json.dumps(filt), 'sort':'demographic.gender:asc'}
# requests URL-encodes automatically
response = requests.get(cases_endpt, params = params)
-print json.dumps(response.json(), indent=2)
+print(json.dumps(response.json(), indent=2))
```
``` Output
{
"data": {
"hits": [
{
- "sample_ids": [
- "1d014bf1-95ae-42e3-ae39-97ff4841d8ca",
- "6b685bfc-651b-48d1-8e68-32c8096ea205"
+ "id": "03974dc9-0162-4de8-9897-09f88693681a",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Nevi and Melanomas",
+ "analyte_ids": [
+ "9747b614-624b-410a-8b94-854a16cd143a",
+ "c8974764-4836-4a34-aeb8-52b491f78d0e",
+ "bcea1ed5-b9cb-4a92-ad80-598d8a223fb3"
],
- "portion_ids": [
- "c061217a-266a-496d-8a96-3489191afa87",
- "0d3a6a58-0e00-4889-bc73-5ddb5a387738",
- "e858ee92-0438-48e9-a70d-80ef2c0ad539"
+ "submitter_id": "HCM-BROD-0334-C43",
+ "submitter_analyte_ids": [
+ "HCM-BROD-0334-C43-10A-01D",
+ "HCM-BROD-0334-C43-85M-01D",
+ "HCM-BROD-0334-C43-85M-01R"
],
- "submitter_portion_ids": [
- "TCGA-66-2770-01A-21-2193-20",
- "TCGA-66-2770-01A-01",
- "TCGA-66-2770-11A-01"
+ "days_to_consent": null,
+ "aliquot_ids": [
+ "dcd74e48-12f3-4a86-a829-c7e055c215b7",
+ "ea182abf-041d-474a-bc53-f6fdd05cd999",
+ "33f3ba0a-c902-4288-9fa7-5696d959e51d"
],
- "created_datetime": null,
"submitter_aliquot_ids": [
- "TCGA-66-2770-01A-01D-1522-08",
- "TCGA-66-2770-01A-01D-0848-05",
- "TCGA-66-2770-01A-01W-0879-09",
- "TCGA-66-2770-11A-01W-0878-08",
- "TCGA-66-2770-01A-01R-0849-01",
- "TCGA-66-2770-01A-01W-0877-08",
- "TCGA-66-2770-01A-01D-0846-06",
- "TCGA-66-2770-11A-01W-0880-09",
- "TCGA-66-2770-01A-01D-0964-09",
- "TCGA-66-2770-11A-01D-0846-06",
- "TCGA-66-2770-01A-01D-0845-04",
- "TCGA-66-2770-01A-01W-0881-10",
- "TCGA-66-2770-11A-01D-0963-08",
- "TCGA-66-2770-11A-01D-0844-01",
- "TCGA-66-2770-01A-01R-0851-07",
- "TCGA-66-2770-11A-01W-0882-10",
- "TCGA-66-2770-11A-01D-1522-08",
- "TCGA-66-2770-01A-01T-1557-13",
- "TCGA-66-2770-01A-01D-0847-02",
- "TCGA-66-2770-01A-01D-0844-01",
- "TCGA-66-2770-11A-01D-0847-02",
- "TCGA-66-2770-11A-01D-0964-09",
- "TCGA-66-2770-01A-01D-0963-08",
- "TCGA-66-2770-01A-01R-0850-03",
- "TCGA-66-2770-11A-01D-0845-04",
- "TCGA-66-2770-01A-01T-0852-07"
- ],
- "updated_datetime": "2016-05-02T15:57:03.730994-05:00",
- "submitter_analyte_ids": [
- "TCGA-66-2770-01A-01D",
- "TCGA-66-2770-11A-01W",
- "TCGA-66-2770-01A-01T",
- "TCGA-66-2770-01A-01W",
- "TCGA-66-2770-01A-01R",
- "TCGA-66-2770-11A-01D"
+ "HCM-BROD-0334-C43-85M-01R-A79O-41",
+ "HCM-BROD-0334-C43-10A-01D-A79L-36",
+ "HCM-BROD-0334-C43-85M-01D-A79L-36"
],
- "analyte_ids": [
- "385807d3-78de-4558-8d93-702d93fc835a",
- "247acc7a-b4f5-47e9-86da-5ea9b04ad444",
- "151b8cb9-6b0a-4db9-9b0e-62aa501b35d9",
- "e549aebd-4dda-4ea8-8ccf-56c03bc8b2be",
- "631ad4eb-845a-4e70-96ad-4b40157218a8",
- "9a75640e-09d4-42b7-8cb4-75d62b39e98a"
- ],
- "submitter_id": "TCGA-66-2770",
- "case_id": "f1b357e4-d67a-42c9-b0b7-12f69fa3da58",
- "state": null,
- "aliquot_ids": [
- "a2d10f8e-6b27-4df0-bd25-ac24992d0bb4",
- "8c1c733a-abed-468f-b4d0-d1ac34ba6d8b",
- "cad8d384-3b7a-4f70-89c2-5584ae75c5eb",
- "42e774cf-3c4a-4efd-9665-378cb6b4afac",
- "3755168b-f5da-422d-847a-566cb112a8d7",
- "cae4d249-ba67-4316-8761-7e71e3813182",
- "aa6e700c-ce01-4cc9-87de-8bf615a8aa1a",
- "ad5c4069-e616-4ab4-9b03-b196f9189b20",
- "07c26ea4-0584-4cb0-8e5a-d057b8fe6c14",
- "f95c2cb5-d20a-4f1f-8f2a-95a2d37fbdc4",
- "817bf327-e583-4704-b294-c3645dcc4adf",
- "2246cb75-38bd-491f-b6ee-99f4781f2564",
- "a81b9090-626d-492d-9baf-7fa3ef70111c",
- "5cd6f026-894e-45f6-bc59-d6f056e63846",
- "e417903d-ab76-44f0-aae9-3a91fa9a8d3c",
- "1d809a56-31ca-49d8-a57b-e773236b24de",
- "df60a743-ef4b-43ea-bc5a-4d75e8befb8a",
- "871350e2-958f-401c-ae86-6bc880a01942",
- "3dc4207d-5671-4c3d-b75a-d39ef69b564c",
- "69b77cc0-d00a-4ea3-9b39-3e3019d9e292",
- "3d035ee8-9523-4771-8738-c8a5a2f91403",
- "775e46bd-e56f-40fa-9891-aaedc1d49395",
- "d1c60049-922a-42d4-bd7e-8cf4ace47f05",
- "5220a53f-f3fc-476c-aa72-65a038eb2fd8",
- "b7e44e6e-ccf9-4b75-a258-159912ab51ca",
- "42750622-28d7-4d32-9262-b139fe77bc01"
+ "created_datetime": "2020-05-21T08:55:40.814734-05:00",
+ "diagnosis_ids": [
+ "3d666f1b-58c2-451f-8ebf-87b5caa02aaf",
+ "fedc3533-85f7-4fc6-b996-a1f596e021df"
],
- "slide_ids": [
- "a10196d2-7a81-4e1e-a9a7-62d123c30875",
- "72edc1ba-916d-42a2-9f22-6254c6e54c5c",
- "ff15eeb9-550e-4c78-90cc-a6cce8ccc3df",
- "71ccfb52-169d-4176-94d6-fff5b75f853d"
+ "sample_ids": [
+ "cd88baf4-b6eb-4df5-9b42-d55f3aad739c",
+ "eb79f8b4-1cc3-4a32-ad51-cfea8cf150f0"
],
+ "consent_type": null,
"submitter_sample_ids": [
- "TCGA-66-2770-11A",
- "TCGA-66-2770-01A"
- ]
- },
- {
- "sample_ids": [
- "06889714-2a40-4248-98ee-f690b301e36a",
- "9f43a0c6-ea19-4021-b0ed-026f33ce1c33"
+ "HCM-BROD-0334-C43-10A",
+ "HCM-BROD-0334-C43-85M"
+ ],
+ "primary_site": "Skin",
+ "submitter_diagnosis_ids": [
+ "HCM-BROD-0334-C43_diagnosis2",
+ "HCM-BROD-0334-C43_diagnosis"
],
+ "updated_datetime": "2021-03-03T15:15:08.075155-06:00",
+ "case_id": "03974dc9-0162-4de8-9897-09f88693681a",
+ "index_date": "Diagnosis",
+ "state": "released",
"portion_ids": [
- "3a001d28-7cf9-4c61-b155-73938aebaa25",
- "79554cfd-e853-481e-8e37-1e296034094e"
+ "bd0bc175-5b54-47c1-96fc-c6d8afc0c115"
],
"submitter_portion_ids": [
- "TCGA-02-0075-01A-01",
- "TCGA-02-0075-10A-01"
- ],
- "created_datetime": null,
- "submitter_aliquot_ids": [
- "TCGA-02-0075-01A-01W-0204-02",
- "TCGA-02-0075-01A-01R-0194-03",
- "TCGA-02-0075-01A-01D-0198-02",
- "TCGA-02-0075-01A-01R-0202-01",
- "TCGA-02-0075-10A-01W-0207-09",
- "TCGA-02-0075-01A-01R-0676-04",
- "TCGA-02-0075-10A-01D-0198-02",
- "TCGA-02-0075-10A-01D-0197-06",
- "TCGA-02-0075-10A-01D-0193-01",
- "TCGA-02-0075-01A-01W-0207-09",
- "TCGA-02-0075-01A-01W-0206-08",
- "TCGA-02-0075-01A-01D-0193-01",
- "TCGA-02-0075-10A-01W-0205-10",
- "TCGA-02-0075-01A-01R-0201-02",
- "TCGA-02-0075-10A-01W-0204-02",
- "TCGA-02-0075-01A-01D-0199-05",
- "TCGA-02-0075-10A-01W-0206-08",
- "TCGA-02-0075-01A-01D-0196-04",
- "TCGA-02-0075-01A-01T-0195-07",
- "TCGA-02-0075-10A-01D-0196-04",
- "TCGA-02-0075-01A-01D-0197-06",
- "TCGA-02-0075-01A-01D-0888-01",
- "TCGA-02-0075-01A-01R-0195-07",
- "TCGA-02-0075-01A-01W-0205-10"
- ],
- "updated_datetime": "2016-05-02T15:00:01.972331-05:00",
- "submitter_analyte_ids": [
- "TCGA-02-0075-01A-01R",
- "TCGA-02-0075-10A-01D",
- "TCGA-02-0075-01A-01W",
- "TCGA-02-0075-01A-01T",
- "TCGA-02-0075-01A-01D",
- "TCGA-02-0075-10A-01W"
- ],
- "analyte_ids": [
- "fec22de0-a2b9-45df-9854-1ebe76cee84e",
- "b4d11c50-61f1-4d4a-815f-1c0413018d7f",
- "c48673d0-a38d-44e1-8cfd-e91cb23ea2d5",
- "24f1852c-999a-4ea8-917c-fcfd683e2aca",
- "aa431260-a0fc-4924-80ce-61cab8b5e83e",
- "11f21140-d761-44ca-a9b2-b24099df3b15"
- ],
- "submitter_id": "TCGA-02-0075",
- "case_id": "b196f82b-ef3f-4e05-99f7-da5df65e691e",
- "state": null,
- "aliquot_ids": [
- "75531fe0-101e-4220-bd47-98892c90ee70",
- "e5ea38d4-f47c-4c8a-8bab-13631e0a9a7b",
- "d48b7c2c-daac-4496-af8f-1f45ca43f627",
- "bbba08fc-2514-4e15-afb7-41eecc7e876f",
- "0685b37f-a47c-4222-a846-bf9f3c000de3",
- "683986da-3cee-446d-9b7a-83bef25815c9",
- "e6ffdb20-a1be-4664-bcd3-cc7a4de6f40b",
- "5d1f25c0-9e1a-41ad-9735-134f39dbf70e",
- "528b40b9-246f-4ba3-8209-777136638e62",
- "33131479-5d69-4262-a549-ba8864320f3b",
- "5c7822fc-cf4f-4f62-8482-7c0ce1b7ab9a",
- "b95e7659-e3a4-4e96-b98c-f67d26b85322",
- "30c84aca-f9db-4e07-ac34-1a92b1652ca1",
- "d5e3b5cc-06e0-4294-9d3c-8f3b63acae3d",
- "b14b3d09-3a7f-41a6-81df-2757efa67906",
- "513040e2-dc29-4e2c-86fb-57371eede17a",
- "21c3be1b-7c1e-4864-99d1-486cfe5d8f1d",
- "5e28e5dc-6dfa-44a9-8793-9134cb4cdda5",
- "b8c25892-4773-428f-a02c-f930931268e8",
- "266d5260-08e4-4cec-87f3-ca415bd98575",
- "8859a3ae-f85d-4ef2-830b-80f42f98d53e",
- "ac018a8c-a6e2-4291-a4bf-a330ae9c441e",
- "4b022f7f-7549-4d97-9d41-4e5f2e9ec74c",
- "caad3dfa-74a9-4ecc-95c1-86f6fbfd4ab5"
- ],
- "slide_ids": [
- "39f547cd-5dc3-4bf4-99ea-073bb161c23c",
- "5f096267-0cc2-4cc5-a206-7357159633d7"
- ],
- "submitter_sample_ids": [
- "TCGA-02-0075-10A",
- "TCGA-02-0075-01A"
+ "HCM-BROD-0334-C43-10A-01"
]
},
{
- "sample_ids": [
- "ba08195b-31cf-4bb1-a470-23740225c99d",
- "929889c4-e474-4104-b69b-fac7e414a59e"
+ "id": "03bfeb7c-cecf-4691-8263-33cdfe391ea9",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Adenomas and Adenocarcinomas",
+ "analyte_ids": [
+ "db8132c3-47a8-49ec-9b78-bc7d18debf67",
+ "f74bf217-dae2-4554-92e4-8707068ea7a7",
+ "01764f17-2a97-442e-a08b-8a21303b4770",
+ "c9884c81-3c8f-4ad9-a962-42c7459a2276",
+ "d4f1a9f8-f748-4f45-aa06-da4d760c4fab"
],
- "portion_ids": [
- "48a36eb4-79fb-45e7-8bb1-0fa1d5fcda2c",
- "1de5e67a-ac3f-4c18-92c4-27ba1868c7ac",
- "e09fc5e7-e8d2-4bf9-b12b-17b22e0387e4"
+ "submitter_id": "HCM-BROD-0124-C25",
+ "submitter_analyte_ids": [
+ "HCM-BROD-0124-C25-85A-01D",
+ "HCM-BROD-0124-C25-01A-01D",
+ "HCM-BROD-0124-C25-10A-01D",
+ "HCM-BROD-0124-C25-85A-01R",
+ "HCM-BROD-0124-C25-01A-01R"
],
- "submitter_portion_ids": [
- "TCGA-EJ-A8FU-10A-01",
- "TCGA-EJ-A8FU-01A-21-A43L-20",
- "TCGA-EJ-A8FU-01A-11"
+ "aliquot_ids": [
+ "fe8e2565-749a-470a-b843-7afbe95ded81",
+ "092656af-b279-46b8-9ccf-b1eabfbd1d6f",
+ "677edb2c-fac3-4878-a28d-cf4e0d7873d7",
+ "8bb9fff4-24f8-426e-9f2a-4cb30a4ac5c2",
+ "f1c6f71d-b125-47bb-91ad-90d7cbff0012"
],
- "created_datetime": null,
"submitter_aliquot_ids": [
- "TCGA-EJ-A8FU-01A-11R-A36B-13",
- "TCGA-EJ-A8FU-01A-11R-A36G-07",
- "TCGA-EJ-A8FU-01A-11D-A363-01",
- "TCGA-EJ-A8FU-10A-01D-A361-01",
- "TCGA-EJ-A8FU-10A-01D-A362-08",
- "TCGA-EJ-A8FU-01A-11W-A447-08",
- "TCGA-EJ-A8FU-01A-11D-A365-05",
- "TCGA-EJ-A8FU-01A-11D-A364-08",
- "TCGA-EJ-A8FU-10A-01W-A446-08"
- ],
- "updated_datetime": "2016-05-02T15:57:04.948573-05:00",
- "submitter_analyte_ids": [
- "TCGA-EJ-A8FU-01A-11W",
- "TCGA-EJ-A8FU-01A-11D",
- "TCGA-EJ-A8FU-01A-11R",
- "TCGA-EJ-A8FU-10A-01W",
- "TCGA-EJ-A8FU-10A-01D"
+ "HCM-BROD-0124-C25-01A-01D-A78W-36",
+ "HCM-BROD-0124-C25-85A-01D-A786-36",
+ "HCM-BROD-0124-C25-01A-01R-A78X-41",
+ "HCM-BROD-0124-C25-10A-01D-A78W-36",
+ "HCM-BROD-0124-C25-85A-01R-A787-41"
],
- "analyte_ids": [
- "2d4e4925-6ac8-498f-882b-4bbf319f6b7b",
- "8d09b982-1256-4674-b383-d6ca4b4bb3c8",
- "c74495d9-63bf-4ac0-b10e-04b3b06103c1",
- "b9884d98-af57-4901-8b9d-4fdbf73d2c5a",
- "2f16ac02-13bf-44fd-bbd7-658c1c384928"
- ],
- "submitter_id": "TCGA-EJ-A8FU",
- "case_id": "23e56e08-e11d-4e83-88a8-1254675b3af8",
- "state": null,
- "aliquot_ids": [
- "e77da017-5dc6-4e32-9568-755e4ee9b533",
- "c9b286d1-d500-4bb3-bb3d-5bf40b1b1265",
- "b7867d52-7987-46d4-a595-0ff5b5375a58",
- "5586ad35-94b7-459e-8982-8e7fb25697a1",
- "162a63f7-594f-4669-a06d-b4899c7fe86a",
- "b8b1ab44-ee6e-4ac5-9efd-d5bd07e67b9c",
- "7adcdf73-3ad3-4da7-ab27-2888f1d4f53a",
- "eb498e52-3eae-402f-8cac-ec930f8d938d",
- "293f781c-c2c7-479b-b1a6-5f951a2c5e5a"
+ "created_datetime": "2019-04-16T11:21:56.471158-05:00",
+ "diagnosis_ids": [
+ "00184ed8-780a-4acf-b5f1-b1fcd6b08dcf"
],
- "slide_ids": [
- "454a95d5-d084-4f36-b1f1-32c6c23ab46e"
+ "sample_ids": [
+ "e6bc6b9d-553f-4c78-bc11-ebcf7b0d4f27",
+ "539593d1-bd9b-4379-8c86-16cdf607cd4e",
+ "b0c0b5b0-cf6d-4281-8f4d-43dc77e88bc6"
],
"submitter_sample_ids": [
- "TCGA-EJ-A8FU-01A",
- "TCGA-EJ-A8FU-10A"
+ "HCM-BROD-0124-C25-85A",
+ "HCM-BROD-0124-C25-01A",
+ "HCM-BROD-0124-C25-10A"
+ ],
+ "primary_site": "Pancreas",
+ "submitter_diagnosis_ids": [
+ "HCM-BROD-0124-C25_diagnosis"
+ ],
+ "updated_datetime": "2021-07-12T12:25:55.528644-05:00",
+ "case_id": "03bfeb7c-cecf-4691-8263-33cdfe391ea9",
+ "index_date": "Diagnosis",
+ "state": "released",
+ "portion_ids": [
+ "303512c0-382b-4442-a5a7-2699ca8b1384",
+ "f07a4dae-2878-452e-9836-6f39c594d38d"
+ ],
+ "submitter_portion_ids": [
+ "HCM-BROD-0124-C25-10A-01",
+ "HCM-BROD-0124-C25-01A-01"
]
},
{
- "sample_ids": [
- "d43f0112-fe59-4842-9fda-1189e5fb7248",
- "213cbbe5-c382-47a1-b936-bf40c2c99091"
+ "id": "05f41641-ee22-4d41-bb87-2bfa47cd983f",
+ "lost_to_followup": null,
+ "slide_ids": [
+ "775be57f-6df8-40c3-9c4e-c06dd900a237",
+ "7bd1dea3-7819-43e5-a9e4-5fe0a189cc87"
],
- "portion_ids": [
- "26441aae-22e5-4e69-b3f5-34ccde356c93",
- "60d7a93c-0634-438e-a72a-ce63630bb890",
- "246a8f01-7ef2-4737-a984-49aa0b41c089"
+ "submitter_slide_ids": [
+ "HCM-BROD-0095-C15-06A-01-S2-HE",
+ "HCM-BROD-0095-C15-06A-01-S1-HE"
],
- "submitter_portion_ids": [
- "TCGA-F2-6879-10A-01",
- "TCGA-F2-6879-01A-21-A39M-20",
- "TCGA-F2-6879-01A-11"
+ "days_to_lost_to_followup": null,
+ "disease_type": "Adenomas and Adenocarcinomas",
+ "analyte_ids": [
+ "3cd4c8b4-8c23-4c20-bffb-97e0f5e5ac0a",
+ "76ed5a0c-8129-4a73-bbd2-08d9b36bee62",
+ "d2acb3a5-7b51-4d86-8c2f-18b3f886a001",
+ "e6f06e9e-bf13-44fb-990c-d64b1096cd7c",
+ "331fc610-2b43-4b0a-a9b2-3a8a665cb000"
],
- "created_datetime": "2016-05-02T16:23:44.347995-05:00",
- "submitter_aliquot_ids": [
- "TCGA-F2-6879-01A-11R-2155-13",
- "TCGA-F2-6879-10A-01D-2153-01",
- "TCGA-F2-6879-10A-01D-2152-26",
- "TCGA-F2-6879-01A-11D-2157-05",
- "TCGA-F2-6879-10A-01D-2154-08",
- "TCGA-F2-6879-01A-11D-A45X-08",
- "TCGA-F2-6879-01A-11D-2154-08",
- "TCGA-F2-6879-01A-11W-2179-08",
- "TCGA-F2-6879-01A-11D-2153-01",
- "TCGA-F2-6879-01A-11R-2156-07",
- "TCGA-F2-6879-01A-11D-2152-26",
- "TCGA-F2-6879-10A-01D-A45X-08",
- "TCGA-F2-6879-10A-01W-2179-08",
- "TCGA-F2-6879-01A-01D-YYYY-23"
- ],
- "updated_datetime": "2016-05-02T16:23:44.347995-05:00",
+ "submitter_id": "HCM-BROD-0095-C15",
"submitter_analyte_ids": [
- "TCGA-F2-6879-10A-01D",
- "TCGA-F2-6879-01A-11R",
- "TCGA-F2-6879-10A-01W",
- "TCGA-F2-6879-01A-11W",
- "TCGA-F2-6879-01A-11D"
+ "HCM-BROD-0095-C15-06A-11R",
+ "HCM-BROD-0095-C15-85A-01R",
+ "HCM-BROD-0095-C15-85A-01D",
+ "HCM-BROD-0095-C15-10B-01D",
+ "HCM-BROD-0095-C15-06A-11D"
],
- "analyte_ids": [
- "e87dde8d-3bf5-42d8-9a77-620d5c4943e0",
- "30ade77d-996b-4031-93ab-6b341d49eb0a",
- "1d94bd70-6621-4a94-8102-d673663e6665",
- "ea65d92e-1597-410d-84d8-abb2a6235b3e",
- "79697034-1cec-4d92-8195-8a35258ab477"
- ],
- "submitter_id": "TCGA-F2-6879",
- "case_id": "8d9bd437-8b4b-4da5-87ba-6b5790f05022",
- "state": null,
+ "days_to_consent": null,
"aliquot_ids": [
- "e7533585-b062-4d74-b511-05dc806a1357",
- "e107952a-cc2b-4410-b0f9-62e7115430a0",
- "61f1c8b1-986a-485a-9d96-4e4285b6425a",
- "c043e276-fece-4cb9-a848-a0b16e6099b6",
- "e5d110e1-63ad-49ce-b9b7-22bbd7ef8a88",
- "7accb08d-acdb-46bc-bf7f-b9f678193115",
- "a52cd04b-41d6-40db-b050-00ef3a143f7e",
- "207fcf5e-c422-4333-9ec2-5dab38d240c7",
- "5ddd3f83-28a8-4b7f-9aec-203a3c2efbe5",
- "ccd4dd70-c0e4-42cf-870e-33d1013b201a",
- "e12314fe-f16a-4d85-95b4-e712ede450f6",
- "695461e3-283c-4b5b-9325-6b2588b67fd8",
- "8481be1e-0993-487d-8d73-b0eb72b304ee",
- "d7200791-4f1c-418f-8744-91b793486d9f"
+ "24b11e4f-5e04-4d28-875e-a242515f6d07",
+ "4af130bc-16ef-42c0-9f01-04dc601f4165",
+ "9bd07008-27df-4e7e-be83-2d0fbeb2db94",
+ "11efd698-fb14-4a52-aaec-0084afc5bbe0",
+ "3f7b05f1-a8cd-48f2-89f1-4a1e4eab92c5"
],
- "slide_ids": [
- "bcbcc947-cab1-4400-aebc-1d9e251a3ce8",
- "cae8d0b9-3605-40af-bf99-7c23df8110a9"
+ "submitter_aliquot_ids": [
+ "HCM-BROD-0095-C15-06A-11R-A79D-41",
+ "HCM-BROD-0095-C15-85A-01R-A79D-41",
+ "HCM-BROD-0095-C15-10B-01D-A79C-36",
+ "HCM-BROD-0095-C15-06A-11D-A79C-36",
+ "HCM-BROD-0095-C15-85A-01D-A79C-36"
+ ],
+ "created_datetime": "2019-04-04T14:07:27.780827-05:00",
+ "diagnosis_ids": [
+ "cd0da3ad-189b-4be0-a8c6-55e0294e7c73"
],
- "submitter_sample_ids": [
- "TCGA-F2-6879-10A",
- "TCGA-F2-6879-01A"
- ]
- },
- {
"sample_ids": [
- "3a66b5bd-7037-463c-9f8d-2ba3de9d5571",
- "84f603d6-9f71-48fb-b2e3-190424407452"
+ "5fcb1b3c-6711-4caa-9c6c-31ed2c0fc238",
+ "71568fd7-b545-4979-a907-ef8bf41e76db",
+ "d9db85d9-5ea2-41cd-96d2-d50462b5d4b6"
],
+ "consent_type": null,
+ "submitter_sample_ids": [
+ "HCM-BROD-0095-C15-85A",
+ "HCM-BROD-0095-C15-06A",
+ "HCM-BROD-0095-C15-10B"
+ ],
+ "primary_site": "Esophagus",
+ "submitter_diagnosis_ids": [
+ "HCM-BROD-0095-C15_diagnosis"
+ ],
+ "updated_datetime": "2021-01-06T22:55:10.531130-06:00",
+ "case_id": "05f41641-ee22-4d41-bb87-2bfa47cd983f",
+ "index_date": "Diagnosis",
+ "state": "released",
"portion_ids": [
- "fe90de9f-8ee3-4d55-834f-a90538958cb7",
- "7a0042fd-07f0-4894-adb0-03cebce8aa02"
+ "0ef29dec-13e5-4faf-997c-a9c9502a353b",
+ "c7660224-cef0-4d7d-b532-a4e9d4a7fb7c"
],
"submitter_portion_ids": [
- "TCGA-VQ-A922-01A-11",
- "TCGA-VQ-A922-10A-01"
+ "HCM-BROD-0095-C15-06A-11",
+ "HCM-BROD-0095-C15-10B-01"
+ ]
+ },
+ {
+ "id": "07a067d0-7dfc-4817-b4c5-9200da20a59f",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Adenomas and Adenocarcinomas",
+ "analyte_ids": [
+ "3a6a1f63-f9d3-44f0-8659-3b3cd19a42a0",
+ "6671382a-20f0-41d7-a35d-c09c7275d08a",
+ "d5034c0e-2790-4fb5-82b0-e36c0626b57c"
],
- "created_datetime": "2016-05-02T16:26:23.121974-05:00",
- "submitter_aliquot_ids": [
- "TCGA-VQ-A922-10A-01D-A412-01",
- "TCGA-VQ-A922-01A-11D-A40Z-01",
- "TCGA-VQ-A922-10A-01D-A413-08",
- "TCGA-VQ-A922-01A-01D-YYYY-23",
- "TCGA-VQ-A922-01A-11R-A414-31",
- "TCGA-VQ-A922-01A-11D-A410-08",
- "TCGA-VQ-A922-01A-11R-A415-13",
- "TCGA-VQ-A922-01A-11D-A411-05"
- ],
- "updated_datetime": "2016-05-02T16:26:23.121974-05:00",
+ "submitter_id": "HCM-SANG-0268-C18",
"submitter_analyte_ids": [
- "TCGA-VQ-A922-01A-11R",
- "TCGA-VQ-A922-10A-01D",
- "TCGA-VQ-A922-01A-11D"
+ "HCM-SANG-0268-C18-85A-01D",
+ "HCM-SANG-0268-C18-85A-01R",
+ "HCM-SANG-0268-C18-10A-01D"
],
- "analyte_ids": [
- "15bec495-04c7-412b-ad69-26b1f9274ccf",
- "26a24673-04a1-4837-b888-702b0578aef2",
- "2c0ecd67-b9ff-4e60-8d2f-7744c79a13aa"
- ],
- "submitter_id": "TCGA-VQ-A922",
- "case_id": "8bd783a3-d6c9-4c87-a2a1-09f903b9c7ca",
- "state": null,
+ "days_to_consent": null,
"aliquot_ids": [
- "58a121b4-265c-44ae-b6a9-79d087ee8b34",
- "76fbba49-0123-4524-89aa-a1818c5507cb",
- "0b0805bb-edaa-400f-ae9f-effed3dbb605",
- "3370d626-d572-4d13-9cd3-1823a5df3d34",
- "60934993-a9df-4389-b64d-da6844ef22df",
- "243f24ba-bb0f-44e0-bcb1-69a97b395981",
- "6cae9f2a-1c6c-4645-98b6-20719aec1413",
- "44d020d1-c516-4a15-94e8-bcf0cb9c2683"
+ "e60dec0b-a2db-4b66-9e77-e8f0b8856a53",
+ "2a558468-cca8-4fe9-a745-8c76436ce6cb",
+ "ae8d0e5c-340b-488e-ba9e-d0912869ee8d",
+ "745432c8-ea7f-48e0-bd53-28ead61a7ec0",
+ "e2d32e2a-3161-40ab-88ef-0a4dfd3a72b4",
+ "c2328729-0a45-4740-a9cb-3b8ff31807b3",
+ "6b3085c9-63b8-4b82-8413-f775dbc6d993"
],
- "slide_ids": [
- "0ff02899-57f8-419e-8872-c6ede53f4d3c"
+ "submitter_aliquot_ids": [
+ "HCM-SANG-0268-C18-85A-01D-A80T-32-aliquot",
+ "HCM-SANG-0268-C18-85A-01R-A80W-32-aliquot",
+ "HCM-SANG-0268-C18-10A-01D-A79M-36",
+ "HCM-SANG-0268-C18-10A-01D-A80T-32-aliquot",
+ "HCM-SANG-0268-C18-85A-01R-A79O-41",
+ "HCM-SANG-0268-C18-01A-01D-A80T-32-aliquot",
+ "HCM-SANG-0268-C18-85A-01D-A79M-36"
+ ],
+ "created_datetime": "2020-05-29T12:07:57.849637-05:00",
+ "diagnosis_ids": [
+ "78f3dc9e-9b51-42b2-b809-405e438f7f68"
],
- "submitter_sample_ids": [
- "TCGA-VQ-A922-10A",
- "TCGA-VQ-A922-01A"
- ]
- },
- {
"sample_ids": [
- "5bb5bd60-cf47-413b-88fa-f14977e24035",
- "82fcf670-1646-4a28-9578-f7e5b2f426e5",
- "3b87fed0-cfbd-4ee3-b71d-ab595853e836"
- ],
+ "20b5f03e-26b9-4902-9bdd-667168727e6d",
+ "17766175-a251-454b-9263-76af20b77290",
+ "6a530ae1-79fa-4536-bbf5-86b14a80563e",
+ "9eb44d2a-80c0-43d2-822a-0598a7d8e68c",
+ "a3817d45-dbaa-4295-8d17-712c0c2438e4",
+ "475ac2f5-327f-4d94-bebf-e03058911b59"
+ ],
+ "consent_type": null,
+ "submitter_sample_ids": [
+ "HCM-SANG-0268-C18-10A-01D-A80T-32",
+ "HCM-SANG-0268-C18-85A-01D-A80T-32",
+ "HCM-SANG-0268-C18-01A-01D-A80T-32",
+ "HCM-SANG-0268-C18-10A",
+ "HCM-SANG-0268-C18-85A-01R-A80W-32",
+ "HCM-SANG-0268-C18-85A"
+ ],
+ "primary_site": "Colon",
+ "submitter_diagnosis_ids": [
+ "HCM-SANG-0268-C18_diagnosis"
+ ],
+ "updated_datetime": "2023-02-22T07:39:25.979291-06:00",
+ "case_id": "07a067d0-7dfc-4817-b4c5-9200da20a59f",
+ "index_date": "Sample Procurement",
+ "state": "released",
"portion_ids": [
- "18bf160e-702a-464a-9920-f115024b5484",
- "10a9c093-009d-4bc0-a344-2afd3f0f9b9f",
- "8ebd06e1-5eda-47ec-8888-61965ecf005e"
+ "4d2aeabd-3a5a-42fc-8bcd-301829a32883"
],
"submitter_portion_ids": [
- "TCGA-HU-8243-11A-01",
- "TCGA-HU-8243-01A-11",
- "TCGA-HU-8243-10A-01"
- ],
- "created_datetime": "2016-05-02T16:17:09.754748-05:00",
- "submitter_aliquot_ids": [
- "TCGA-HU-8243-01A-01D-YYYY-23",
- "TCGA-HU-8243-01A-11D-2340-08",
- "TCGA-HU-8243-01A-11D-2338-01",
- "TCGA-HU-8243-01A-11D-2342-05",
- "TCGA-HU-8243-11A-01D-2338-01",
- "TCGA-HU-8243-11A-01D-2340-08",
- "TCGA-HU-8243-10A-01D-2339-01",
- "TCGA-HU-8243-01A-11R-2343-13",
- "TCGA-HU-8243-10A-01D-2341-08"
- ],
- "updated_datetime": "2016-05-02T16:17:09.754748-05:00",
- "submitter_analyte_ids": [
- "TCGA-HU-8243-11A-01D",
- "TCGA-HU-8243-10A-01D",
- "TCGA-HU-8243-01A-11R",
- "TCGA-HU-8243-01A-11D"
- ],
+ "HCM-SANG-0268-C18-10A-01"
+ ]
+ },
+ {
+ "id": "0a6a14db-ca5c-4bf9-9125-611d672bc67b",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Adenomas and Adenocarcinomas",
"analyte_ids": [
- "89c9094d-5cf6-4c7d-ad24-41b7ad9427cc",
- "2c413e60-0122-426b-afb3-ae94810e2513",
- "57d41760-0fed-49d2-8606-48231cb244ea",
- "37ed51fd-b540-408e-8bd6-4447ae4aa84a"
- ],
- "submitter_id": "TCGA-HU-8243",
- "case_id": "77a8eab6-f6a1-4739-9031-75ead40d68cb",
- "state": null,
+ "4c154bcd-3cd5-4e3a-bb90-948cf4c92965",
+ "e8c8e41f-0a24-4d16-bcdd-3d7422150c4d",
+ "7ce86045-0051-465e-8744-f6299a007bce",
+ "f32ed211-100b-47ca-b0b0-774a15d58e60",
+ "a70c036b-ea29-46d5-ba7f-8152947146f8",
+ "117f4f00-2978-4a67-8a18-3acbd77fe930"
+ ],
+ "submitter_id": "HCM-SANG-0271-D12",
+ "submitter_analyte_ids": [
+ "HCM-SANG-0271-D12-86A-01R",
+ "HCM-SANG-0271-D12-86A-01D",
+ "HCM-SANG-0271-D12-85A-01R",
+ "HCM-SANG-0271-D12-31B-01D",
+ "HCM-SANG-0271-D12-10A-01D",
+ "HCM-SANG-0271-D12-85B-01D"
+ ],
+ "days_to_consent": null,
"aliquot_ids": [
- "ace3edd6-14a9-42cc-84f3-6127237f2913",
- "a711abd1-f1c2-4e42-8b66-79b4514ac1c4",
- "6af7ba34-58f7-4472-8c7e-89fc91ad5ac1",
- "558ff67a-a584-46f8-9089-8f4a08015294",
- "71c0a224-5953-4b59-a49c-b7aa1e959f1e",
- "a460c222-bcac-4959-961f-4dbd73e1ce13",
- "6e5789d7-4988-457a-86eb-e618c7ab06eb",
- "ff31f56b-398c-45ee-b122-f10027774527",
- "9635cfd4-3d26-4fc6-846c-fd74d5b60098"
+ "d59f00c0-696c-4444-8e7f-c0140dbcd8e3",
+ "d1848dd2-dccb-43c6-bd29-bb7f331089ef",
+ "f7125c62-3864-412e-9afa-dc4970e43d05",
+ "3bfc0b7c-58c7-47c9-9664-5c2d087f4485",
+ "3109b83d-36ed-4850-9847-9710f3921413",
+ "8dda870c-374e-49ae-8f0b-44b50ef567cd",
+ "80d9347a-3684-4348-b1de-14bdca1471fd",
+ "0bc14c0a-9fb9-480d-bfb5-3b7d89c83efd",
+ "fe6747dd-12a0-4bb4-b2f3-9dd6f047df02",
+ "98284402-4f76-4aa8-893c-95c72b5fb395"
],
- "slide_ids": [
- "60b7c6b8-594a-40c3-9341-a0902e4e6938",
- "e55e00a0-2048-404a-b83a-f34106468694"
+ "submitter_aliquot_ids": [
+ "HCM-SANG-0271-D12-85A-01R-A80W-32-aliquot",
+ "HCM-SANG-0271-D12-31B-01D-A80U-36",
+ "HCM-SANG-0271-D12-86A-01D-A85C-36",
+ "HCM-SANG-0271-D12-85A-01D-A80T-32-aliquot",
+ "HCM-SANG-0271-D12-01A-01D-A80T-32-aliquot",
+ "HCM-SANG-0271-D12-85B-01D-A80U-36",
+ "HCM-SANG-0271-D12-10A-01D-A80U-36",
+ "HCM-SANG-0271-D12-10A-01D-A80T-32-aliquot",
+ "HCM-SANG-0271-D12-85A-01R-A80V-41",
+ "HCM-SANG-0271-D12-86A-01R-A85D-41"
+ ],
+ "created_datetime": "2020-07-08T11:54:16.081928-05:00",
+ "diagnosis_ids": [
+ "17a41ef7-aafc-4b70-8d8f-484dc5cd27bd"
],
+ "sample_ids": [
+ "ae2b48ea-7255-4d3d-ba00-1def687c3606",
+ "b16a3a6b-b0b1-4749-afbc-aea8f4eb3a5d",
+ "dc021e65-03cd-4210-a9b7-cdc971c22223",
+ "bc55eaf9-56fb-4f98-9fd8-2e80796b2873",
+ "5d4ac7e0-5a91-4e32-bf47-1febc9cc37d7",
+ "371a0350-a942-4a63-ac59-873da0cd1e86",
+ "4d8a7679-067e-4af7-9b11-d7722adc35ba",
+ "2ccc3dc4-042a-477a-8b3f-b8b7ff838762",
+ "fa202911-ce39-476e-9631-e81ffb46a402"
+ ],
+ "consent_type": null,
"submitter_sample_ids": [
- "TCGA-HU-8243-10A",
- "TCGA-HU-8243-01A",
- "TCGA-HU-8243-11A"
+ "HCM-SANG-0271-D12-85A-01R-A80W-32",
+ "HCM-SANG-0271-D12-85B",
+ "HCM-SANG-0271-D12-01A-01D-A80T-32",
+ "HCM-SANG-0271-D12-85A",
+ "HCM-SANG-0271-D12-10A-01D-A80T-32",
+ "HCM-SANG-0271-D12-10A",
+ "HCM-SANG-0271-D12-31B",
+ "HCM-SANG-0271-D12-86A",
+ "HCM-SANG-0271-D12-85A-01D-A80T-32"
+ ],
+ "primary_site": "Colon",
+ "submitter_diagnosis_ids": [
+ "HCM-SANG-0271-D12_diagnosis"
+ ],
+ "updated_datetime": "2023-02-22T07:39:25.979291-06:00",
+ "case_id": "0a6a14db-ca5c-4bf9-9125-611d672bc67b",
+ "index_date": "Sample Procurement",
+ "state": "released",
+ "portion_ids": [
+ "bcb558ac-e5bf-4b6c-af72-9b78b7b76722",
+ "a9aec562-c531-458d-95f1-768e2610aae7"
+ ],
+ "submitter_portion_ids": [
+ "HCM-SANG-0271-D12-10A-01",
+ "HCM-SANG-0271-D12-31B-01"
]
},
{
- "sample_ids": [
- "2f5cc9c9-31a9-5eb3-952a-b21e7cef50ca",
- "4f3f4fc8-4465-5230-83ec-c0ef6aceb2ea"
+ "id": "0cf7d1fe-e9c7-4e84-9497-df13ca2ed2c9",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Adenomas and Adenocarcinomas",
+ "analyte_ids": [
+ "6ef5c20b-50c3-4fda-bd1e-de4876ccab7b",
+ "4a18990e-a0dc-4466-aaed-0053ffa0656a",
+ "0a62151e-6c10-4e23-80ff-93970fa20f7a",
+ "9b91bd41-f1f6-4712-84c9-54afa62d089b",
+ "038fc540-f8e2-44b7-944c-ac6f25cac665"
],
- "updated_datetime": "2016-05-25T19:12:45.610324-05:00",
- "submitter_aliquot_ids": [
- "TARGET-30-PAUXFZ-01A-01D",
- "TARGET-30-PAUXFZ-10A-01D"
+ "submitter_id": "HCM-SANG-0287-C20",
+ "submitter_analyte_ids": [
+ "HCM-SANG-0287-C20-85A-01D",
+ "HCM-SANG-0287-C20-01A-01D",
+ "HCM-SANG-0287-C20-85A-01R",
+ "HCM-SANG-0287-C20-10A-01D",
+ "HCM-SANG-0287-C20-01A-01R"
],
- "submitter_id": "TARGET-30-PAUXFZ",
- "case_id": "a7ccef7c-14c0-5232-b647-58b4a54fb343",
"aliquot_ids": [
- "9e1e30a8-7607-5b7e-b33c-9a6c5828d5fb",
- "c56898f9-c394-516a-bdbb-bf32a5af9d3f"
+ "43bb9903-2b44-494f-9275-62dd88803739",
+ "c0b86e70-1df5-4acb-9667-cfada31b7c5e",
+ "feb5f601-cd08-46bf-a8b3-4553d6ce24ba",
+ "1e9d473d-c7b7-4eca-82c5-4d8b40e482a8",
+ "22bc24d9-719b-4f3c-8587-e394cf409117",
+ "77b7d070-0fd0-4d33-9204-b6f8cd95c415",
+ "89ab0d1b-b692-4248-abc6-7dc698b54e67",
+ "1870408c-6d85-4e00-9c64-178350135992",
+ "48f6c991-9d58-4d1f-96e4-c62dcf0c6bf8"
+ ],
+ "submitter_aliquot_ids": [
+ "HCM-SANG-0287-C20-01A-01D-A78U-36",
+ "HCM-SANG-0287-C20-85A-01R-A80W-32-aliquot",
+ "HCM-SANG-0287-C20-85A-01R-A78V-41",
+ "HCM-SANG-0287-C20-10A-01D-A80T-32-aliquot",
+ "HCM-SANG-0287-C20-85A-01D-A80T-32-aliquot",
+ "HCM-SANG-0287-C20-01A-01D-A80T-32-aliquot",
+ "HCM-SANG-0287-C20-01A-01R-A78V-41",
+ "HCM-SANG-0287-C20-10A-01D-A78U-36",
+ "HCM-SANG-0287-C20-85A-01D-A78U-36"
+ ],
+ "created_datetime": "2019-10-14T10:45:59.013881-05:00",
+ "diagnosis_ids": [
+ "c91c84dc-0e49-412f-9c97-8c69b195cf02"
],
- "submitter_sample_ids": [
- "TARGET-30-PAUXFZ-01A",
- "TARGET-30-PAUXFZ-10A"
- ]
- },
- {
"sample_ids": [
- "c1bcb8d1-e13d-4af4-93f4-02d5f7f616a2",
- "52fcf737-cdcc-43ea-b33c-4018039b42dd"
+ "a716e59b-6876-4c5e-85c9-a53869482d95",
+ "06c2d962-ede5-4f2f-81dd-dc9253a9ddf5",
+ "459eaa9f-e97f-4008-8cf6-f6671985fd30",
+ "31866d02-8351-497c-8c73-1fc5fef584aa",
+ "38243f28-93e3-4a8a-bf92-8019a07b3bec",
+ "62ec2e41-acac-4449-b9fe-fd2c938cc811",
+ "fa630ad7-f709-43ca-bf5c-d3e83dd51779"
],
+ "submitter_sample_ids": [
+ "HCM-SANG-0287-C20-01A",
+ "HCM-SANG-0287-C20-85A-01D-A80T-32",
+ "HCM-SANG-0287-C20-85A-01R-A80W-32",
+ "HCM-SANG-0287-C20-10A",
+ "HCM-SANG-0287-C20-01A-01D-A80T-32",
+ "HCM-SANG-0287-C20-85A",
+ "HCM-SANG-0287-C20-10A-01D-A80T-32"
+ ],
+ "primary_site": "Rectum",
+ "submitter_diagnosis_ids": [
+ "HCM-SANG-0287-C20_diagnosis"
+ ],
+ "updated_datetime": "2023-02-22T07:39:25.979291-06:00",
+ "case_id": "0cf7d1fe-e9c7-4e84-9497-df13ca2ed2c9",
+ "index_date": "Sample Procurement",
+ "state": "released",
"portion_ids": [
- "e0e97a05-656a-468e-8418-0d08c38e76ab",
- "3e2a0eab-7d89-4f3c-9c0e-8942e53d3c45"
+ "003cfb21-0ffb-44e2-9961-ebdd5c39f361",
+ "a7236e0c-decf-48c3-a977-d069436420b7"
],
"submitter_portion_ids": [
- "TCGA-KK-A8I9-01A-11",
- "TCGA-KK-A8I9-11A-11"
+ "HCM-SANG-0287-C20-01A-01",
+ "HCM-SANG-0287-C20-10A-01"
+ ]
+ },
+ {
+ "id": "0e9a9e97-f0bf-4f4a-84cc-73eccfc627b1",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Adenomas and Adenocarcinomas",
+ "analyte_ids": [
+ "78c3017c-6de1-4ccb-bf4d-27922b0b1f38",
+ "f877946f-8a20-40dd-b5e0-5d4a350c2528",
+ "c70fdba0-7324-4ea1-b15c-8de62a459a50",
+ "9c7ccff1-5150-46a4-94dd-a928df9db3e7",
+ "9bc12019-74f2-4784-9322-0398a2c1b3d1"
],
- "created_datetime": null,
- "submitter_aliquot_ids": [
- "TCGA-KK-A8I9-11A-11D-A361-01",
- "TCGA-KK-A8I9-11A-11D-A362-08",
- "TCGA-KK-A8I9-11A-11W-A446-08",
- "TCGA-KK-A8I9-01A-11R-A36G-07",
- "TCGA-KK-A8I9-11A-11D-A40C-01",
- "TCGA-KK-A8I9-01A-11D-A363-01",
- "TCGA-KK-A8I9-01A-11W-A447-08",
- "TCGA-KK-A8I9-01A-11D-A365-05",
- "TCGA-KK-A8I9-01A-11D-A364-08",
- "TCGA-KK-A8I9-01A-11R-A36B-13"
- ],
- "updated_datetime": "2016-05-02T15:57:29.451686-05:00",
+ "submitter_id": "HCM-CSHL-0376-D37",
"submitter_analyte_ids": [
- "TCGA-KK-A8I9-11A-11W",
- "TCGA-KK-A8I9-01A-11R",
- "TCGA-KK-A8I9-11A-11D",
- "TCGA-KK-A8I9-01A-11W",
- "TCGA-KK-A8I9-01A-11D"
+ "HCM-CSHL-0376-D37-31A-11D",
+ "HCM-CSHL-0376-D37-10A-01D",
+ "HCM-CSHL-0376-D37-31A-11R",
+ "HCM-CSHL-0376-D37-85P-01D",
+ "HCM-CSHL-0376-D37-85P-01R"
],
- "analyte_ids": [
- "ddec19cb-5e4c-4151-8b6d-741044abff1e",
- "96c5b539-8eb7-4156-81d0-7b7fecd68900",
- "ced38a45-7610-49d4-8bf9-d53a1fc2d489",
- "476f5deb-1b3f-4a35-8a31-f27763ba8d8a",
- "c284f2af-1e9b-40cc-8936-b61cfd251d62"
- ],
- "submitter_id": "TCGA-KK-A8I9",
- "case_id": "261c3d74-706e-4751-bd15-8f3c1a402ff0",
- "state": null,
"aliquot_ids": [
- "4f76de2d-e07a-402b-9818-7f04d3704a43",
- "96802a73-b1db-47d7-8f5f-4504f3ece5ad",
- "f376fc45-370a-4d96-833b-9a1322e32a42",
- "d3e88dd3-66d7-40d4-978a-4ddab868373a",
- "06f1d087-75c9-4da8-8339-80aff3bfaa12",
- "50b1e243-b45a-42a1-8692-b7ae5d51250f",
- "0f1c00d3-f3dc-4d2b-bd8a-ecc31e4f4089",
- "986a3ed6-ba56-4025-a2bd-9909648e703a",
- "bebc84b6-9179-420b-8207-858b999e8c0c",
- "239d5e7e-5fb5-4df3-ae6b-a5a06ee296ae"
+ "790b5310-35ac-4382-b062-db3078e8b20a",
+ "33019b27-f718-4436-95fd-ff4d8a28c923",
+ "6c69fc6a-6606-4f79-8ad8-4bf6bfaab56d",
+ "aa16d6e6-6a3d-487f-82b1-9ffa258b851e",
+ "5facfb50-5793-43a6-be50-5c1afe446dcc"
],
- "slide_ids": [
- "1e174ca5-9298-41b6-a705-728f111a3e7b",
- "a3e31324-9e06-4799-85b4-4f6236848009"
+ "submitter_aliquot_ids": [
+ "HCM-CSHL-0376-D37-31A-11R-A78V-41",
+ "HCM-CSHL-0376-D37-85P-01D-A78T-36",
+ "HCM-CSHL-0376-D37-31A-11D-A78T-36",
+ "HCM-CSHL-0376-D37-85P-01R-A78V-41",
+ "HCM-CSHL-0376-D37-10A-01D-A78T-36"
+ ],
+ "created_datetime": "2019-10-14T13:24:10.078043-05:00",
+ "diagnosis_ids": [
+ "9ce37f22-bbde-4447-bbf0-28e85f4f2837"
+ ],
+ "sample_ids": [
+ "ac11b2da-38a9-444e-a922-3b78f15be942",
+ "cbfc2432-c011-41ee-81fd-5efd8c0dac79",
+ "76b129b9-954b-4471-949b-e118dc778e2d"
],
"submitter_sample_ids": [
- "TCGA-KK-A8I9-11A",
- "TCGA-KK-A8I9-01A"
+ "HCM-CSHL-0376-D37-10A",
+ "HCM-CSHL-0376-D37-85P",
+ "HCM-CSHL-0376-D37-31A"
+ ],
+ "primary_site": "Colon",
+ "submitter_diagnosis_ids": [
+ "HCM-CSHL-0376-D37_diagnosis"
+ ],
+ "updated_datetime": "2023-02-22T07:39:25.979291-06:00",
+ "case_id": "0e9a9e97-f0bf-4f4a-84cc-73eccfc627b1",
+ "index_date": "Diagnosis",
+ "state": "released",
+ "portion_ids": [
+ "d0c90c2f-d6d9-4601-b2f8-6ca30b48f405",
+ "f2b394b2-35f1-492b-969e-df863a2714cc"
+ ],
+ "submitter_portion_ids": [
+ "HCM-CSHL-0376-D37-10A-01",
+ "HCM-CSHL-0376-D37-31A-11"
]
},
{
+ "id": "149a8565-e0c5-4474-a693-d44f1b445c0c",
+ "lost_to_followup": "Yes",
+ "slide_ids": [
+ "846c933d-4bca-4995-ac45-1caf38ee481b",
+ "4be18133-d1ff-400f-b57b-dded404249c0",
+ "8f24d92e-cfee-4cce-8eec-e4f302766255",
+ "08650e03-96e9-4213-8892-b3da6928171f"
+ ],
+ "submitter_slide_ids": [
+ "HCM-BROD-0199-C71-01A-01-S2-HE",
+ "HCM-BROD-0199-C71-02A-01-S2-HE",
+ "HCM-BROD-0199-C71-01A-01-S1-HE",
+ "HCM-BROD-0199-C71-02A-01-S1-HE"
+ ],
+ "days_to_lost_to_followup": null,
+ "disease_type": "Gliomas",
+ "analyte_ids": [
+ "3b8a2f07-a6d0-4998-8fe8-c2138372e191",
+ "ae4eb402-7c13-46f0-bdf8-361c7fdc9430",
+ "b720a33e-c858-47f4-9e51-e58748004e96",
+ "fdf06863-7d14-4f79-ad2c-b46d443d235c",
+ "a4f88ff3-7c9e-4edc-808f-aa01378ec68d",
+ "cca66f05-89e0-48a7-8948-53e556fee5be",
+ "e658c087-c37d-47ac-9472-3b6b6eed7188",
+ "6be6aff1-9694-4c7f-8f43-f0f902b95849",
+ "39f9ce44-944c-49b8-b215-05b117d5e62d"
+ ],
+ "submitter_id": "HCM-BROD-0199-C71",
+ "submitter_analyte_ids": [
+ "HCM-BROD-0199-C71-85A-01D",
+ "HCM-BROD-0199-C71-01A-11D",
+ "HCM-BROD-0199-C71-85S-01R",
+ "HCM-BROD-0199-C71-85A-01R",
+ "HCM-BROD-0199-C71-02A-11R",
+ "HCM-BROD-0199-C71-02A-11D",
+ "HCM-BROD-0199-C71-01A-11R",
+ "HCM-BROD-0199-C71-10A-01D",
+ "HCM-BROD-0199-C71-85R-01D"
+ ],
+ "aliquot_ids": [
+ "9f18daaa-cd67-436e-85a1-1e9b3e1e2135",
+ "eff144c4-4e2a-497d-998a-5ae6bccf2576",
+ "7ec23cac-61f4-4f8f-afa7-ed8a3ca1493f",
+ "6ea15978-902f-45ba-b148-cc4247341882",
+ "529f3cbf-f65d-4a60-a562-b3fcfbd7d4c9",
+ "22542d68-476b-47fd-91da-03db887756d6",
+ "7f4e7f6b-33b0-49da-95bd-88643d5e14ff",
+ "fc0121b9-5de8-4af6-90db-b36dd8207ebf",
+ "c9dd10db-bf23-4bb2-a6db-fec1b45549c8"
+ ],
+ "submitter_aliquot_ids": [
+ "HCM-BROD-0199-C71-85R-01D-A80U-36",
+ "HCM-BROD-0199-C71-02A-11R-A80V-41",
+ "HCM-BROD-0199-C71-02A-11D-A80U-36",
+ "HCM-BROD-0199-C71-01A-11D-A786-36",
+ "HCM-BROD-0199-C71-10A-01D-A786-36",
+ "HCM-BROD-0199-C71-01A-11R-A787-41",
+ "HCM-BROD-0199-C71-85A-01D-A786-36",
+ "HCM-BROD-0199-C71-85S-01R-A80V-41",
+ "HCM-BROD-0199-C71-85A-01R-A787-41"
+ ],
+ "created_datetime": "2019-04-04T15:00:32.807421-05:00",
+ "diagnosis_ids": [
+ "df0230e6-d07c-4814-9aee-5be560d1ce58",
+ "2d72ecab-4038-4ef7-b921-79b88ad62722"
+ ],
"sample_ids": [
- "d43f727a-96d6-40b8-86ae-7a3e0aa46853",
- "b8329a6d-a87b-47f4-ad00-9e979e62647b"
+ "b5919dd1-039e-4ab4-b6d5-37b9d483893f",
+ "769c5e1b-89e3-431c-9420-31de5efe5a22",
+ "5e0faf77-f1b3-4b1a-8b6a-8066270eddb4",
+ "ee0e94ad-b5b0-4ad6-912c-c4010f1a1d26",
+ "600a5b42-f871-47a2-8124-68184edf10bd",
+ "a1dbbc0c-173c-4455-a780-8682dd2e258a"
],
+ "submitter_sample_ids": [
+ "HCM-BROD-0199-C71-85R",
+ "HCM-BROD-0199-C71-02A",
+ "HCM-BROD-0199-C71-85S",
+ "HCM-BROD-0199-C71-85A",
+ "HCM-BROD-0199-C71-10A",
+ "HCM-BROD-0199-C71-01A"
+ ],
+ "primary_site": "Brain",
+ "submitter_diagnosis_ids": [
+ "HCM-BROD-0199-C71_diagnosis",
+ "HCM-BROD-0199-C71_diagnosis2"
+ ],
+ "updated_datetime": "2021-01-06T22:55:10.531130-06:00",
+ "case_id": "149a8565-e0c5-4474-a693-d44f1b445c0c",
+ "index_date": "Diagnosis",
+ "state": "released",
"portion_ids": [
- "8960ddcc-0950-4d6e-a557-8727b652c93b",
- "e36bfd07-c911-4a98-8424-e58e5e9aaa68"
+ "9dccf7ac-58a0-4c9e-9fb6-4ead8c37d48b",
+ "5afaa056-b09e-4328-ae5b-feb70faa3595",
+ "77827fa6-18a9-4e1a-b49f-800cec351fa8"
],
"submitter_portion_ids": [
- "TCGA-QR-A70H-10A-01",
- "TCGA-QR-A70H-01A-12"
+ "HCM-BROD-0199-C71-10A-01",
+ "HCM-BROD-0199-C71-02A-11",
+ "HCM-BROD-0199-C71-01A-11"
+ ]
+ },
+ {
+ "id": "19b1e69a-355a-4dd7-9c56-d701f6c2c5a0",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Adenomas and Adenocarcinomas",
+ "analyte_ids": [
+ "181d5e6e-026d-4983-97e8-f4d9e28a0cfe",
+ "293683aa-f2ef-4d14-8ec7-681c870d3b71",
+ "c1f6530c-004c-4b10-bae5-a571542aabd2",
+ "2140e379-dd8b-4440-a626-bb0e01f8fc00",
+ "4d337e44-6468-43b8-bf2f-4301750dab99"
],
- "created_datetime": null,
- "submitter_aliquot_ids": [
- "TCGA-QR-A70H-01A-12R-A35K-07",
- "TCGA-QR-A70H-01A-12R-A35M-13",
- "TCGA-QR-A70H-01A-12D-A35E-05",
- "TCGA-QR-A70H-10A-01D-A35A-01",
- "TCGA-QR-A70H-01A-12D-A35C-01",
- "TCGA-QR-A70H-01A-12W-A43Z-08",
- "TCGA-QR-A70H-10A-01D-A35B-08",
- "TCGA-QR-A70H-10A-01W-A441-08",
- "TCGA-QR-A70H-01A-12D-A35D-08"
- ],
- "updated_datetime": "2016-05-02T15:37:31.996088-05:00",
+ "submitter_id": "HCM-SANG-0299-C15",
"submitter_analyte_ids": [
- "TCGA-QR-A70H-10A-01D",
- "TCGA-QR-A70H-10A-01W",
- "TCGA-QR-A70H-01A-12D",
- "TCGA-QR-A70H-01A-12W",
- "TCGA-QR-A70H-01A-12R"
+ "HCM-SANG-0299-C15-10B-01D",
+ "HCM-SANG-0299-C15-85X-01R",
+ "HCM-SANG-0299-C15-85A-01R",
+ "HCM-SANG-0299-C15-85X-01D",
+ "HCM-SANG-0299-C15-85A-01D"
],
- "analyte_ids": [
- "c4a41555-dd45-4e10-a3be-50d49a1121a3",
- "957e01f6-eb3f-446e-9f45-b50c66337e2d",
- "1acde950-2e0c-4586-852b-b4ac4e1ea4a4",
- "67c033c0-9fe8-4004-967e-c605e1890f4d",
- "b0873010-5d60-4691-b700-e172950f1d7c"
- ],
- "submitter_id": "TCGA-QR-A70H",
- "case_id": "13b41b15-a785-4ab7-b864-ffff6d35dd45",
- "state": null,
"aliquot_ids": [
- "d9120f00-7f10-49d5-ae84-6177e9424c7c",
- "31c6fa50-200a-46c1-a546-61b52592fd8f",
- "ab50f38c-2e7d-4d75-a216-27aeaa4d9305",
- "382d5e31-6c66-4df3-a695-6b8c29cfc681",
- "51d1fb14-c918-4439-b816-ef6cd3253c64",
- "f586d8d5-d0c6-4979-aaa7-10217a88fa4c",
- "2f9a60eb-602e-44bb-bc57-87e20d946f76",
- "fbafc85e-deff-46cd-a40f-479b9dc92a60",
- "cacbc8a6-0eb0-4277-931f-d0075c9b1de9"
+ "37fee31c-8669-4057-ae95-424586fa2a05",
+ "d17fd7af-fd14-4cb7-a6c1-c23339589288",
+ "65b7ffa3-0f1f-4918-a75f-2343720fe40c",
+ "a331201c-3aef-4eec-83d5-d38f4211c1b1",
+ "3ef40a4d-21cd-4be4-8c57-cba30c8e0778",
+ "b8af2a87-5b55-4c73-88f1-90ce5e1d05f5",
+ "a1988273-c9f8-4cd1-b1a6-5daa2c7a3e51",
+ "b535f87b-0a61-49bd-828b-bb5e3c11f2f7",
+ "3bb67aa8-5155-425e-9afc-9aca5120b0f0",
+ "fa20b306-a835-46d8-ab03-f8ea0b585381",
+ "65ec4e30-e782-4b6f-b985-33e9c7e72a0a"
],
- "slide_ids": [
- "2310e34c-0ea5-4876-9f87-bad0b7a44513"
+ "submitter_aliquot_ids": [
+ "HCM-SANG-0299-C15-85B-01D-A80T-32-aliquot",
+ "HCM-SANG-0299-C15-10A-01D-A80T-32-aliquot",
+ "HCM-SANG-0299-C15-85A-01R-A78V-41",
+ "HCM-SANG-0299-C15-85A-01D-A78U-36",
+ "HCM-SANG-0299-C15-85A-01R-A80W-32-aliquot",
+ "HCM-SANG-0299-C15-85X-01D-A78U-36",
+ "HCM-SANG-0299-C15-85X-01R-A78V-41",
+ "HCM-SANG-0299-C15-85A-01D-A80T-32-aliquot",
+ "HCM-SANG-0299-C15-01A-01D-A80T-32-aliquot",
+ "HCM-SANG-0299-C15-85B-01R-A80W-32-aliquot",
+ "HCM-SANG-0299-C15-10B-01D-A78U-36"
+ ],
+ "created_datetime": "2019-10-14T10:46:36.257369-05:00",
+ "diagnosis_ids": [
+ "7c6aa4ce-6661-4491-a827-c0a8045743a6",
+ "c1a6f70f-b871-4b4e-a292-aef67c2d4776"
],
- "submitter_sample_ids": [
- "TCGA-QR-A70H-01A",
- "TCGA-QR-A70H-10A"
- ]
- },
- {
"sample_ids": [
- "19dee039-9c98-4d4a-8baf-eea1b6dda8eb",
- "fdf1e501-f34f-450c-9a5c-611157079a86"
+ "c56f3f94-deda-4d21-8f8e-658108995dfa",
+ "ce46b9f4-5244-432d-aa87-026e0a27d71a",
+ "cf134dfb-5126-4bee-bcab-39d584335a21",
+ "34e0d6ee-97f5-420f-9b34-4784098125f7",
+ "ac8476c1-53bf-43b5-8695-db441ed1a720",
+ "9000b366-a14b-44bc-a782-484e09765b2d",
+ "8541bc01-57e6-4bf7-a42e-da1c8e790633",
+ "277d7ed9-4e6a-429b-8f4e-438ff0d2ba7a",
+ "d12d9184-5317-4730-af2e-a8428456a2a7"
],
+ "submitter_sample_ids": [
+ "HCM-SANG-0299-C15-85X",
+ "HCM-SANG-0299-C15-85B-01D-A80T-32",
+ "HCM-SANG-0299-C15-85A-01R-A80W-32",
+ "HCM-SANG-0299-C15-10B",
+ "HCM-SANG-0299-C15-01A-01D-A80T-32",
+ "HCM-SANG-0299-C15-85A",
+ "HCM-SANG-0299-C15-85A-01D-A80T-32",
+ "HCM-SANG-0299-C15-85B-01R-A80W-32",
+ "HCM-SANG-0299-C15-10A-01D-A80T-32"
+ ],
+ "primary_site": "Esophagus",
+ "submitter_diagnosis_ids": [
+ "HCM-SANG-0299-C15_diagnosis2",
+ "HCM-SANG-0299-C15_diagnosis"
+ ],
+ "updated_datetime": "2023-02-22T07:39:25.979291-06:00",
+ "case_id": "19b1e69a-355a-4dd7-9c56-d701f6c2c5a0",
+ "index_date": "Sample Procurement",
+ "state": "released",
"portion_ids": [
- "10b6ccb4-3637-4769-8988-417c0306eaef",
- "92f8cd48-451d-4ed6-8e60-b15aa93d2c09",
- "d0d55efa-c91d-45de-92bf-cf6f0d263b21"
+ "b8192c57-9cda-4dca-a590-1e13beadf2a0"
],
"submitter_portion_ids": [
- "TCGA-BJ-A18Z-01A-21",
- "TCGA-BJ-A18Z-01A-11-A21L-20",
- "TCGA-BJ-A18Z-10A-01"
+ "HCM-SANG-0299-C15-10B-01"
+ ]
+ },
+ {
+ "id": "19f1d344-4c14-4733-abbd-c2db6737e210",
+ "lost_to_followup": null,
+ "days_to_lost_to_followup": null,
+ "disease_type": "Ductal and Lobular Neoplasms",
+ "analyte_ids": [
+ "a21d5f05-af57-41bf-ab60-32d2c869611b",
+ "696f4b03-5f0d-4eaa-975d-a9feb64dae07",
+ "c8d4fb23-c55d-4bb6-bd3d-fc1f159d7a33",
+ "5db09eb8-fecc-433f-aa34-a12a7c9333dd",
+ "51598aba-be8e-42a0-bf6c-aca34776fc1f"
],
- "created_datetime": null,
- "submitter_aliquot_ids": [
- "TCGA-BJ-A18Z-01A-21D-A13U-02",
- "TCGA-BJ-A18Z-10A-01D-A13V-01",
- "TCGA-BJ-A18Z-01A-21R-A13Y-07",
- "TCGA-BJ-A18Z-01A-21W-A14T-08",
- "TCGA-BJ-A18Z-01A-21D-A13Z-05",
- "TCGA-BJ-A18Z-01A-21D-A37T-08",
- "TCGA-BJ-A18Z-10A-01D-A13W-08",
- "TCGA-BJ-A18Z-01A-21R-A13X-13",
- "TCGA-BJ-A18Z-01A-21D-A13W-08",
- "TCGA-BJ-A18Z-10A-01D-A13U-02",
- "TCGA-BJ-A18Z-10A-01W-A14T-08",
- "TCGA-BJ-A18Z-01A-21D-A13V-01"
- ],
- "updated_datetime": "2016-05-02T16:18:19.199189-05:00",
+ "submitter_id": "HCM-CSHL-0081-C25",
"submitter_analyte_ids": [
- "TCGA-BJ-A18Z-01A-21W",
- "TCGA-BJ-A18Z-01A-21D",
- "TCGA-BJ-A18Z-01A-21R",
- "TCGA-BJ-A18Z-10A-01D",
- "TCGA-BJ-A18Z-10A-01W"
+ "HCM-CSHL-0081-C25-11A-11D",
+ "HCM-CSHL-0081-C25-85A-01R",
+ "HCM-CSHL-0081-C25-85B-01D",
+ "HCM-CSHL-0081-C25-01A-11R",
+ "HCM-CSHL-0081-C25-01A-11D"
],
- "analyte_ids": [
- "119ebfa1-75b2-4f24-816a-4e9a5061f6b5",
- "f86759fd-ecc5-4f42-b5fe-b9f079d23968",
- "39691042-bd28-40ed-b66b-26414ecf1ba0",
- "76ea5056-d7fa-49fb-94bf-11171ca7c100",
- "71a822c9-b510-4a4c-8c30-18b8083acc2d"
- ],
- "submitter_id": "TCGA-BJ-A18Z",
- "case_id": "0d497faf-2c1c-4173-a5fe-770cca73323c",
- "state": null,
"aliquot_ids": [
- "fa580596-e70f-4ed0-85a2-6fb594ca679a",
- "776cb4b1-8efd-4ea2-b53f-9dff7dd94b10",
- "85a7922f-0327-437c-bdf5-1bb67a1e932f",
- "6d532180-0175-4610-8bfa-cca3a7c3697a",
- "b5977e73-49d8-4e99-9e97-993cc44dad17",
- "918793fa-b35e-4745-ac75-4d1c868089f8",
- "ba9479a1-929f-4e4e-8bf5-e23cb280dfcf",
- "e9776ff5-69b9-4669-ab33-e4bb030461ec",
- "8ba98907-ab03-4c9e-a900-e31aa16ff810",
- "35e18649-183e-4223-b2f6-d812bdd9becd",
- "4aa17671-4420-4989-a6dd-379250f4aeda",
- "815c53c3-8add-4612-b93c-3ed4bfa530aa"
+ "0a65e1bd-6af3-44ba-924c-193eb8e099d6",
+ "61112c39-c838-4f84-89c7-a33bfe5dea88",
+ "25cd08c3-09a9-406d-8ec3-ab4946224cf1",
+ "973d4fa3-4fa7-4ff9-8176-cc51c53b7079",
+ "9aa21acf-1e24-4b7a-a3c1-73354bdd81b6"
],
- "slide_ids": [
- "7c5b5c77-9fbc-4b48-81f5-48b5ede7c436"
+ "submitter_aliquot_ids": [
+ "HCM-CSHL-0081-C25-11A-11D-A78M-36",
+ "HCM-CSHL-0081-C25-01A-11R-A78N-41",
+ "HCM-CSHL-0081-C25-85B-01D-A78M-36",
+ "HCM-CSHL-0081-C25-01A-11D-A78M-36",
+ "HCM-CSHL-0081-C25-85A-01R-A78N-41"
+ ],
+ "created_datetime": "2019-09-19T08:58:31.776805-05:00",
+ "diagnosis_ids": [
+ "c1cca3a7-e0ac-40a1-9db7-6902b48d3c62"
+ ],
+ "sample_ids": [
+ "d9f23187-9a29-426a-9ead-4bb3a2ce6cf9",
+ "3709004e-b04d-4473-aa29-8dd84176d17d",
+ "adcc54e3-074b-4ca6-b179-0a5df8efeb36",
+ "05478d15-885a-4c44-a46a-81bbe6c9ee11"
],
"submitter_sample_ids": [
- "TCGA-BJ-A18Z-01A",
- "TCGA-BJ-A18Z-10A"
+ "HCM-CSHL-0081-C25-85B",
+ "HCM-CSHL-0081-C25-01A",
+ "HCM-CSHL-0081-C25-85A",
+ "HCM-CSHL-0081-C25-11A"
+ ],
+ "primary_site": "Pancreas",
+ "submitter_diagnosis_ids": [
+ "HCM-CSHL-0081-C25_diagnosis"
+ ],
+ "updated_datetime": "2023-02-22T07:39:25.979291-06:00",
+ "case_id": "19f1d344-4c14-4733-abbd-c2db6737e210",
+ "index_date": "Diagnosis",
+ "state": "released",
+ "portion_ids": [
+ "d9588542-d7ef-413e-900c-3f816b583525",
+ "b77573f1-e6a3-43c1-a56d-a207c39e18c4"
+ ],
+ "submitter_portion_ids": [
+ "HCM-CSHL-0081-C25-01A-11",
+ "HCM-CSHL-0081-C25-11A-11"
]
}
],
"pagination": {
"count": 10,
- "sort": "",
+ "total": 40232,
+ "size": 10,
"from": 0,
+ "sort": "",
"page": 1,
- "total": 6340,
- "pages": 634,
- "size": 10
+ "pages": 4024
}
},
"warnings": {}
@@ -1697,7 +1898,7 @@ print json.dumps(response.json(), indent=2)
#### Example: HTTP POST Request
-This example demonstrates how to obtain metadata in TSV format for a set of files using their UUIDs (e.g. UUIDs obtained from a [download manifest file generated by the GDC Data Portal](/Data_Portal/Users_Guide/Cart/#gdc-data-transfer-tool)).
+This example demonstrates how to obtain metadata in TSV format for a set of files using their UUIDs (e.g. UUIDs obtained from a [download manifest file generated by the GDC Data Portal](/Data_Portal/Users_Guide/Repository.md#generating-a-manifest-file-for-the-data-transfer-tool)).
The first step is to construct a JSON query object, including `filters`, `fields`, `format`, and `size` parameters. The object is then submitted as HTTP POST payload to the GDC API using curl, in order to retrieve a TSV file with the requested metadata.
@@ -1770,15 +1971,16 @@ cases_endpt = 'https://api.gdc.cancer.gov/cases'
params = {'fields':'submitter_id',
'format':'TSV'}
response = requests.get(cases_endpt, params = params)
-print response.content
+print(response.content)
```
```response1
-submitter_id
-TCGA-RC-A6M6
-TCGA-B6-A0RV
-TCGA-MB-A5Y8
-TCGA-BQ-5876
-TCGA-Z6-A9VB
+id submitter_id
+0286c31b-a704-4d7d-99e3-0bc4e8975b8b HCM-CSHL-0084-C25
+02f6d684-b6b5-419a-b0e1-b74d0a384a30 HCM-BROD-0408-C71
+03974dc9-0162-4de8-9897-09f88693681a HCM-BROD-0334-C43
+03bfeb7c-cecf-4691-8263-33cdfe391ea9 HCM-BROD-0124-C25
+04cbceab-f945-482b-956b-840756a17a4a HCM-BROD-0421-C71
+
```
```shell2
curl 'https://api.gdc.cancer.gov/cases?fields=submitter_id&size=5&format=XML&pretty=true'
@@ -1791,7 +1993,7 @@ params = {'fields':'submitter_id',
'format':'XML',
'pretty':'true'}
response = requests.get(cases_endpt, params = params)
-print response.content
+print(response.content)
```
```Output2
@@ -1799,29 +2001,34 @@ print response.content
-
- TCGA-MQ-A4LV
+ 0286c31b-a704-4d7d-99e3-0bc4e8975b8b
+ HCM-CSHL-0084-C25
-
- TCGA-N9-A4Q1
+ 02f6d684-b6b5-419a-b0e1-b74d0a384a30
+ HCM-BROD-0408-C71
-
- TCGA-78-7154
+ 03974dc9-0162-4de8-9897-09f88693681a
+ HCM-BROD-0334-C43
-
- TCGA-S7-A7WX
+ 03bfeb7c-cecf-4691-8263-33cdfe391ea9
+ HCM-BROD-0124-C25
-
- TCGA-XF-AAML
+ 04cbceab-f945-482b-956b-840756a17a4a
+ HCM-BROD-0421-C71
5
-
+ 86962
+ 5
0
- 2811
- 14052
+
1
- 5
+ 17393
@@ -1838,7 +2045,7 @@ Returns when the `pretty` parameter is set to `true`, the API response is format
curl 'https://api.gdc.cancer.gov/cases?fields=submitter_id&sort=submitter_id:asc&size=5'
```
```Response1
-{"data": {"hits": [{"id": "f7af65fc-97e3-52ce-aa2c-b707650e747b", "submitter_id": "TARGET-00-NAAEMA"}, {"id": "513d0a2a-3c94-5a36-97a4-24c3656fc66e", "submitter_id": "TARGET-00-NAAEMB"}, {"id": "b5f20676-727b-50b0-9b5a-582cd8572d6d", "submitter_id": "TARGET-00-NAAEMC"}, {"id": "0c0b183f-0d4a-5a9d-9888-0617cebcc462", "submitter_id": "TARGET-20-PABGKN"}, {"id": "0f5ed7a7-226d-57bc-a4ce-8a6b18560c55", "submitter_id": "TARGET-20-PABHET"}], "pagination": {"count": 5, "sort": "submitter_id:asc", "from": 0, "page": 1, "total": 14551, "pages": 2911, "size": 5}}, "warnings": {}}
+{"data": {"hits": [{"id": "be37f1f7-2f98-4f74-bc04-6dd2ae2afcad", "submitter_id": "01BR001"}, {"id": "e6915db0-7c89-484d-8f9f-15cca68b82fc", "submitter_id": "01BR008"}, {"id": "16614d46-172b-479c-992b-e80a8e9a2c59", "submitter_id": "01BR009"}, {"id": "567fc9e3-17a6-42b1-a896-5e9a9507d1d8", "submitter_id": "01BR010"}, {"id": "54e89878-a1bc-4f5a-9d68-4842a469586e", "submitter_id": "01BR015"}], "pagination": {"count": 5, "total": 86962, "size": 5, "from": 0, "sort": "None", "page": 1, "pages": 17393}}, "warnings": {}}
```
```Request2
curl 'https://api.gdc.cancer.gov/cases?fields=submitter_id&sort=submitter_id:asc&size=5&pretty=true'
@@ -1848,34 +2055,34 @@ curl 'https://api.gdc.cancer.gov/cases?fields=submitter_id&sort=submitter_id:as
"data": {
"hits": [
{
- "id": "f7af65fc-97e3-52ce-aa2c-b707650e747b",
- "submitter_id": "TARGET-00-NAAEMA"
+ "id": "be37f1f7-2f98-4f74-bc04-6dd2ae2afcad",
+ "submitter_id": "01BR001"
},
{
- "id": "513d0a2a-3c94-5a36-97a4-24c3656fc66e",
- "submitter_id": "TARGET-00-NAAEMB"
+ "id": "e6915db0-7c89-484d-8f9f-15cca68b82fc",
+ "submitter_id": "01BR008"
},
{
- "id": "b5f20676-727b-50b0-9b5a-582cd8572d6d",
- "submitter_id": "TARGET-00-NAAEMC"
+ "id": "16614d46-172b-479c-992b-e80a8e9a2c59",
+ "submitter_id": "01BR009"
},
{
- "id": "0c0b183f-0d4a-5a9d-9888-0617cebcc462",
- "submitter_id": "TARGET-20-PABGKN"
+ "id": "567fc9e3-17a6-42b1-a896-5e9a9507d1d8",
+ "submitter_id": "01BR010"
},
{
- "id": "0f5ed7a7-226d-57bc-a4ce-8a6b18560c55",
- "submitter_id": "TARGET-20-PABHET"
+ "id": "54e89878-a1bc-4f5a-9d68-4842a469586e",
+ "submitter_id": "01BR015"
}
],
"pagination": {
"count": 5,
- "sort": "submitter_id:asc",
+ "total": 86962,
+ "size": 5,
"from": 0,
+ "sort": "None",
"page": 1,
- "total": 14551,
- "pages": 2911,
- "size": 5
+ "pages": 17393
}
},
"warnings": {}
@@ -1900,121 +2107,131 @@ import json
files_endpt = 'https://api.gdc.cancer.gov/files'
params = {'fields':'cases.submitter_id,file_id,file_name,file_size'}
response = requests.get(files_endpt, params = params)
-print json.dumps(response.json(), indent=2)
+print(json.dumps(response.json(), indent=2))
```
```Response
{
"data": {
"hits": [
{
- "file_name": "NARKY_p_TCGAb69_SNP_N_GenomeWideSNP_6_H03_697832.grch38.seg.txt",
+ "id": "d570eccc-3c1c-4c4f-ae04-96be71fbe016",
"cases": [
{
- "submitter_id": "TCGA-BP-4989"
+ "submitter_id": "TCGA-AN-A0FL"
}
],
- "file_id": "3bd4d5dc-563a-481c-87a6-ec0017d0d58a",
- "file_size": 54200
+ "file_name": "TCGA-AN-A0FL-01Z-00-DX1.20A041C6-A306-4599-A7D1-65032A252AA9.svs",
+ "file_id": "d570eccc-3c1c-4c4f-ae04-96be71fbe016",
+ "file_size": 1055798681
},
{
- "file_name": "652ecf99-1af9-41fc-b0a5-d3e5c07a7b5d.FPKM.txt.gz",
+ "id": "0f8d8202-a1ca-4ea1-98b2-c20a6b08479a",
"cases": [
{
- "submitter_id": "TCGA-60-2709"
+ "submitter_id": "TCGA-AN-A0FL"
}
],
- "file_id": "b3286166-01f9-4149-81b5-a2ea5f27c50e",
- "file_size": 530665
+ "file_name": "nationwidechildrens.org_ssf.TCGA-AN-A0FL.xml",
+ "file_id": "0f8d8202-a1ca-4ea1-98b2-c20a6b08479a",
+ "file_size": 15519
},
{
- "file_name": "CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_D05_628212.nocnv_grch38.seg.txt",
+ "id": "b76f87b3-99c5-4297-b2df-8cbea8ecaf61",
"cases": [
{
- "submitter_id": "TCGA-A8-A07Z"
+ "submitter_id": "TCGA-BH-A18F"
}
],
- "file_id": "282cc9d1-c5e9-49ff-b27b-e00c1e5529c6",
- "file_size": 15806
+ "file_name": "7c4e4c2a-a0b1-424f-97d8-359825674429.wxs.aliquot_ensemble_masked.maf.gz",
+ "file_id": "b76f87b3-99c5-4297-b2df-8cbea8ecaf61",
+ "file_size": 21571
},
{
- "file_name": "REEDY_p_TCGAb65_SNP_N_GenomeWideSNP_6_F01_697686.nocnv_grch38.seg.txt",
+ "id": "be6d269d-4305-4643-b98e-af703a067761",
"cases": [
{
- "submitter_id": "TCGA-CJ-4871"
+ "submitter_id": "TCGA-BH-A18F"
}
],
- "file_id": "fe44a644-eefc-42c5-aac7-a216bc1e88e1",
- "file_size": 6179
+ "file_name": "HITCH_p_TCGASNP_b93_N_GenomeWideSNP_6_E11_741424.CEL",
+ "file_id": "be6d269d-4305-4643-b98e-af703a067761",
+ "file_size": 69084893
},
{
- "file_name": "84df7a8fee9fedb5e8e22849ec66d294_gdc_realn.bam",
+ "id": "fed73119-1d5e-4f7e-9713-183d1916422b",
"cases": [
{
- "submitter_id": "TCGA-A2-A0CO"
+ "submitter_id": "TCGA-BH-A18F"
}
],
- "file_id": "acd0ec73-c1fe-463e-912c-84e8416510e5",
- "file_size": 15545555724
+ "file_name": "3b928f83-14a7-4bd6-a9b0-744b414d4495.wxs.varscan2.raw_somatic_mutation.vcf.gz",
+ "file_id": "fed73119-1d5e-4f7e-9713-183d1916422b",
+ "file_size": 35903
},
{
- "file_name": "ed8c4bb6-891a-4cf2-80ba-42c5594760d0.vcf",
+ "id": "6877b045-91f1-4030-82ff-b90507e11e17",
"cases": [
{
- "submitter_id": "TCGA-BQ-7059"
+ "submitter_id": "TCGA-BH-A18F"
}
],
- "file_id": "ed8c4bb6-891a-4cf2-80ba-42c5594760d0",
- "file_size": 264694
+ "file_name": "5057e3cb-25cd-4a67-8d31-6ac8508ba3c7.methylation_array.sesame.level3betas.txt",
+ "file_id": "6877b045-91f1-4030-82ff-b90507e11e17",
+ "file_size": 770500
},
{
- "file_name": "nationwidechildrens.org_clinical.TCGA-IG-A6QS.xml",
+ "id": "07e8cdc7-d228-4752-ad19-800abd507277",
"cases": [
{
- "submitter_id": "TCGA-IG-A6QS"
+ "submitter_id": "TCGA-BH-A0BM"
}
],
- "file_id": "fe8cf009-f033-4536-95c7-836adcba5bf3",
- "file_size": 36996
+ "file_name": "TCGA-BRCA.28dcad29-448e-4bcb-911d-556c6f4a5573.star_fusion.rna_fusion.tsv",
+ "file_id": "07e8cdc7-d228-4752-ad19-800abd507277",
+ "file_size": 234
},
{
- "file_name": "05f6f9f7-6fb7-4c95-b79c-fdfaba16539d.vep.reheader.vcf.gz",
+ "id": "fef57b45-ede1-49b0-b60d-957a55a15e0e",
"cases": [
{
- "submitter_id": "TCGA-DK-A3IV"
+ "submitter_id": "TCGA-BH-A0BM"
}
],
- "file_id": "05f6f9f7-6fb7-4c95-b79c-fdfaba16539d",
- "file_size": 415044
+ "file_name": "nationwidechildrens.org_biospecimen.TCGA-BH-A0BM.xml",
+ "file_id": "fef57b45-ede1-49b0-b60d-957a55a15e0e",
+ "file_size": 127218
},
{
- "file_name": "C484.TCGA-12-5301-01A-01D-1486-08.7_gdc_realn.bam",
+ "id": "81a1b323-88b6-4837-bccf-ac84a79828b6",
"cases": [
{
- "submitter_id": "TCGA-12-5301"
+ "submitter_id": "TCGA-BH-A0BM"
}
],
- "file_id": "3b0293c2-4a26-428c-b097-9489f23a2a2d",
- "file_size": 23661175335
+ "file_name": "TCGA-BRCA.4570b87f-8116-48bf-86d3-b993536c88db.gene_level_copy_number.v36.tsv",
+ "file_id": "81a1b323-88b6-4837-bccf-ac84a79828b6",
+ "file_size": 3446816
},
{
- "file_name": "75a36e71-400d-46a5-93b0-7813cf0595ea.FPKM.txt.gz",
+ "id": "c6bf94a6-9940-4155-86b4-bbb10875dbdb",
"cases": [
{
- "submitter_id": "TCGA-BF-A5EO"
+ "submitter_id": "TCGA-BH-A18F"
}
],
- "file_id": "28f763c7-8064-4151-ae0e-31e70cd9bfe8",
- "file_size": 488422
+ "file_name": "TCGA-BRCA.88cae21a-4890-4fdd-a678-c4864620942c.star_fusion.rna_fusion.bedpe",
+ "file_id": "c6bf94a6-9940-4155-86b4-bbb10875dbdb",
+ "file_size": 229
}
],
"pagination": {
"count": 10,
- "sort": "",
+ "total": 931947,
+ "size": 10,
"from": 0,
+ "sort": "",
"page": 1,
- "total": 216435,
- "pages": 21644,
- "size": 10
+ "pages": 93195
}
},
"warnings": {}
@@ -2028,64 +2245,77 @@ The `expand` parameter provides a shortcut to request multiple related fields (f
#### Example
```Shell
-curl 'https://api.gdc.cancer.gov/files/ac2ddebd-5e5e-4aea-a430-5a87c6d9c878?expand=cases.samples&pretty=true'
-```
+curl 'https://api.gdc.cancer.gov/files/573ee7e9-b8bd-419e-808b-a027c4311731?expand=cases.samples&pretty=true'
```
+```Response
{
"data": {
- "data_type": "Aligned Reads",
- "updated_datetime": "2016-09-18T04:25:13.163601-05:00",
- "created_datetime": "2016-05-26T18:55:53.506549-05:00",
- "file_name": "000aa811c15656604161e8f0e3a0aae4_gdc_realn.bam",
- "md5sum": "200475f5f6e42520204e5f6aadfe954f",
- "data_format": "BAM",
+ "proportion_reads_mapped": 0.9648433596149857,
+ "access": "controlled",
+ "proportion_base_mismatch": 0.004117986,
+ "contamination_error": 0,
"acl": [
"phs000178"
],
- "access": "controlled",
+ "type": "aligned_reads",
"platform": "Illumina",
- "state": "submitted",
- "file_id": "ac2ddebd-5e5e-4aea-a430-5a87c6d9c878",
- "data_category": "Raw Sequencing Data",
- "file_size": 12667634731,
+ "created_datetime": "2022-05-12T14:42:10.014925-05:00",
+ "updated_datetime": "2022-11-01T11:52:54.136033-05:00",
+ "pairs_on_diff_chr": 1170013,
+ "state": "released",
+ "data_format": "BAM",
+ "total_reads": 379313036,
+ "proportion_coverage_30x": 0.000109,
"cases": [
{
"samples": [
{
- "sample_type_id": "11",
- "updated_datetime": "2016-09-08T11:00:45.021005-05:00",
+ "sample_type_id": "10",
+ "tumor_descriptor": "Not Reported",
+ "sample_id": "4e128a37-be58-477a-a01f-448179360b7c",
+ "sample_type": "Blood Derived Normal",
+ "tumor_code": null,
+ "created_datetime": null,
"time_between_excision_and_freezing": null,
- "oct_embedded": "false",
- "tumor_code_id": null,
- "submitter_id": "TCGA-QQ-A5VA-11A",
+ "composition": "Not Reported",
+ "updated_datetime": "2022-04-28T22:05:09.013808-05:00",
+ "days_to_collection": 6755,
+ "state": "released",
+ "initial_weight": null,
+ "preservation_method": null,
"intermediate_dimension": null,
- "sample_id": "b4e7558d-898e-4d68-a897-381edde0bbcc",
- "is_ffpe": false,
- "pathology_report_uuid": null,
- "created_datetime": null,
- "tumor_descriptor": null,
- "sample_type": "Solid Tissue Normal",
- "state": null,
- "current_weight": null,
- "composition": null,
"time_between_clamping_and_freezing": null,
+ "freezing_method": null,
+ "pathology_report_uuid": null,
+ "submitter_id": "TCGA-B6-A0RI-10A",
+ "tumor_code_id": null,
"shortest_dimension": null,
- "tumor_code": null,
- "tissue_type": null,
+ "oct_embedded": "false",
"days_to_sample_procurement": null,
- "freezing_method": null,
- "preservation_method": null,
- "days_to_collection": 5980,
- "initial_weight": 810.0,
- "longest_dimension": null
+ "longest_dimension": null,
+ "current_weight": null,
+ "is_ffpe": false,
+ "tissue_type": "Not Reported"
}
]
}
],
- "submitter_id": "32872121-d38a-4128-b96a-698a6f18f29d",
- "type": "aligned_reads",
- "file_state": "processed",
- "experimental_strategy": "WXS"
+ "file_name": "c9478f7d-bfe3-4e80-8161-39b3d440fa16_wgs_gdc_realn.bam",
+ "mean_coverage": 5.452655,
+ "proportion_reads_duplicated": 0.009253781617987946,
+ "submitter_id": "a4e380e5-420e-49af-986d-e721601065fb",
+ "data_category": "Sequencing Reads",
+ "proportion_coverage_10x": 0.07674,
+ "file_size": 42958286722,
+ "contamination": 0,
+ "average_base_quality": 32,
+ "file_id": "573ee7e9-b8bd-419e-808b-a027c4311731",
+ "data_type": "Aligned Reads",
+ "average_insert_size": 207,
+ "average_read_length": 51,
+ "experimental_strategy": "WGS",
+ "version": "1",
+ "data_release": "36.0 - 37.0"
},
"warnings": {}
}
@@ -2114,7 +2344,7 @@ files_endpt = 'https://api.gdc.cancer.gov/files'
params = {'fields':'file_name',
'from':0, 'size':2}
response = requests.get(files_endpt, params = params)
-print json.dumps(response.json(), indent=2)
+print(json.dumps(response.json(), indent=2))
```
```Response1
@@ -2122,20 +2352,22 @@ print json.dumps(response.json(), indent=2)
"data": {
"hits": [
{
- "file_name": "unc.edu.276a1e00-cf3a-4463-a97b-d544381219ea.2363081.rsem.isoforms.normalized_results"
+ "id": "d570eccc-3c1c-4c4f-ae04-96be71fbe016",
+ "file_name": "TCGA-AN-A0FL-01Z-00-DX1.20A041C6-A306-4599-A7D1-65032A252AA9.svs"
},
{
- "file_name": "nationwidechildrens.org_clinical.TCGA-EY-A5W2.xml"
+ "id": "0f8d8202-a1ca-4ea1-98b2-c20a6b08479a",
+ "file_name": "nationwidechildrens.org_ssf.TCGA-AN-A0FL.xml"
}
],
"pagination": {
"count": 2,
- "sort": "",
+ "total": 931947,
+ "size": 2,
"from": 0,
- "pages": 300936,
- "total": 601872,
+ "sort": "",
"page": 1,
- "size": 2
+ "pages": 465974
}
},
"warnings": {}
@@ -2152,41 +2384,41 @@ files_endpt = 'https://api.gdc.cancer.gov/files'
params = {'fields':'file_name',
'from':101, 'size':5}
response = requests.get(files_endpt, params = params)
-print json.dumps(response.json(), indent=2)
+print(json.dumps(response.json(), indent=2))
```
``` Output2
{
"data": {
"hits": [
{
- "file_name": "OCULI_p_TCGA_159_160_SNP_N_GenomeWideSNP_6_E09_831242.grch38.seg.txt",
- "id": "1d959137-d8e6-4336-b357-8ab9c88eeca8"
+ "id": "297933f5-1316-4cb6-b53f-9dbfa7f3d7ed",
+ "file_name": "TCGA-B6-A0RH-01A-02-TSB.ea83f31e-defb-4436-8a58-5b66b18d13b5.svs"
},
{
- "file_name": "jhu-usc.edu_SKCM.HumanMethylation450.3.lvl-3.TCGA-EE-A3JI-06A-11D-A21B-05.gdc_hg38.txt",
- "id": "9c02ec95-4aa3-4112-8823-c0fa87f71773"
+ "id": "2f31e897-b3e8-49f1-a400-ccf9f00f294a",
+ "file_name": "URAEI_p_TCGASNP_b85_N_GenomeWideSNP_6_F01_735050.grch38.seg.v2.txt"
},
{
- "file_name": "jhu-usc.edu_LAML.HumanMethylation450.2.lvl-3.TCGA-AB-3002-03A-01D-0742-05.gdc_hg38.txt",
- "id": "731c3560-bcef-4ebf-bfbc-7320399a5bcb"
+ "id": "ebd6cf90-4f6b-4193-887a-22fdb5645fbc",
+ "file_name": "TCGA-BRCA.5994c06d-ee9b-4ead-b3d1-2e1f286f7d6d.ascat2.allelic_specific.seg.txt"
},
{
- "file_name": "CUSKS_p_TCGAb47_SNP_1N_GenomeWideSNP_6_B03_628222.grch38.seg.txt",
- "id": "a6f73a3e-faf8-49d9-9b68-77781bd302df"
+ "id": "aebd6b5a-e676-4357-93df-523b31b55ea0",
+ "file_name": "TCGA-BRCA.c737131c-636f-4e1b-89b8-bb2d6ddd8164.star_fusion.rna_fusion.bedpe"
},
{
- "file_name": "5496e9f1-a383-4874-95bb-f4d1b33f4594.vcf",
- "id": "5496e9f1-a383-4874-95bb-f4d1b33f4594"
+ "id": "aa83a7e7-e9cc-4330-a7be-ca750cffb74c",
+ "file_name": "URAEI_p_TCGASNP_b85_N_GenomeWideSNP_6_F01_735050.birdseed.data.txt"
}
],
"pagination": {
"count": 5,
- "sort": "",
+ "total": 931947,
+ "size": 5,
"from": 101,
+ "sort": "",
"page": 21,
- "total": 274724,
- "pages": 54945,
- "size": 5
+ "pages": 186390
}
},
"warnings": {}
@@ -2212,7 +2444,7 @@ cases_endpt = 'https://api.gdc.cancer.gov/cases'
params = {'fields':'submitter_id',
'sort':'submitter_id:asc'}
response = requests.get(cases_endpt, params = params)
-print json.dumps(response.json(), indent=2)
+print(json.dumps(response.json(), indent=2))
```
``` Output
@@ -2220,54 +2452,54 @@ print json.dumps(response.json(), indent=2)
"data": {
"hits": [
{
- "id": "f7af65fc-97e3-52ce-aa2c-b707650e747b",
- "submitter_id": "TARGET-00-NAAEMA"
+ "id": "be37f1f7-2f98-4f74-bc04-6dd2ae2afcad",
+ "submitter_id": "01BR001"
},
{
- "id": "513d0a2a-3c94-5a36-97a4-24c3656fc66e",
- "submitter_id": "TARGET-00-NAAEMB"
+ "id": "e6915db0-7c89-484d-8f9f-15cca68b82fc",
+ "submitter_id": "01BR008"
},
{
- "id": "b5f20676-727b-50b0-9b5a-582cd8572d6d",
- "submitter_id": "TARGET-00-NAAEMC"
+ "id": "16614d46-172b-479c-992b-e80a8e9a2c59",
+ "submitter_id": "01BR009"
},
{
- "id": "0c0b183f-0d4a-5a9d-9888-0617cebcc462",
- "submitter_id": "TARGET-20-PABGKN"
+ "id": "567fc9e3-17a6-42b1-a896-5e9a9507d1d8",
+ "submitter_id": "01BR010"
},
{
- "id": "0f5ed7a7-226d-57bc-a4ce-8a6b18560c55",
- "submitter_id": "TARGET-20-PABHET"
+ "id": "54e89878-a1bc-4f5a-9d68-4842a469586e",
+ "submitter_id": "01BR015"
},
{
- "id": "b2a560a4-5e52-5d78-90ef-d680fbaf44d0",
- "submitter_id": "TARGET-20-PABHKY"
+ "id": "a1c7b7b9-b8c8-48c3-9420-55497f9318fd",
+ "submitter_id": "01BR017"
},
{
- "id": "1e5c8323-383d-51a0-9199-1b9504b29c7e",
- "submitter_id": "TARGET-20-PABLDZ"
+ "id": "ce3c8b98-e275-4cfd-a379-940d675a564b",
+ "submitter_id": "01BR018"
},
{
- "id": "c550a267-30bd-5bf3-9699-61341559e0d5",
- "submitter_id": "TARGET-20-PACDZR"
+ "id": "e4ce89ef-bcaa-418a-8a6b-3602793b9bbf",
+ "submitter_id": "01BR020"
},
{
- "id": "0fe29a81-74fc-5158-ae13-0437bc272805",
- "submitter_id": "TARGET-20-PACEGD"
+ "id": "19d3c861-8a5f-49a2-acc0-b55b25465c35",
+ "submitter_id": "01BR023"
},
{
- "id": "dd2b23ec-46f4-56b2-9429-6015c6dc730f",
- "submitter_id": "TARGET-20-PADDXZ"
+ "id": "afae8dce-294a-4108-bb28-376f804ae5c4",
+ "submitter_id": "01BR025"
}
],
"pagination": {
"count": 10,
- "sort": "submitter_id:asc",
+ "total": 86962,
+ "size": 10,
"from": 0,
+ "sort": "None",
"page": 1,
- "total": 14551,
- "pages": 1456,
- "size": 10
+ "pages": 8697
}
},
"warnings": {}
@@ -2298,34 +2530,114 @@ params = {'facets':'program.name',
'from':0, 'size':0,
'sort':'program.name:asc'}
response = requests.get(projects_endpt, params = params)
-print json.dumps(response.json(), indent=2)
+print(json.dumps(response.json(), indent=2))
```
```Response
{
"data": {
- "pagination": {
- "count": 0,
- "sort": "program.name:asc",
- "from": 0,
- "page": 1,
- "total": 39,
- "pages": 39,
- "size": 0
- },
"hits": [],
"aggregations": {
"program.name": {
"buckets": [
{
- "key": "TCGA",
- "doc_count": 33
+ "doc_count": 33,
+ "key": "TCGA"
+ },
+ {
+ "doc_count": 10,
+ "key": "MATCH"
+ },
+ {
+ "doc_count": 9,
+ "key": "TARGET"
+ },
+ {
+ "doc_count": 4,
+ "key": "CGCI"
+ },
+ {
+ "doc_count": 3,
+ "key": "CMI"
+ },
+ {
+ "doc_count": 2,
+ "key": "BEATAML1.0"
+ },
+ {
+ "doc_count": 2,
+ "key": "CPTAC"
+ },
+ {
+ "doc_count": 2,
+ "key": "MP2PRT"
+ },
+ {
+ "doc_count": 1,
+ "key": "APOLLO"
+ },
+ {
+ "doc_count": 1,
+ "key": "CDDP_EAGLE"
+ },
+ {
+ "doc_count": 1,
+ "key": "CTSP"
+ },
+ {
+ "doc_count": 1,
+ "key": "EXCEPTIONAL_RESPONDERS"
+ },
+ {
+ "doc_count": 1,
+ "key": "FM"
+ },
+ {
+ "doc_count": 1,
+ "key": "HCMI"
+ },
+ {
+ "doc_count": 1,
+ "key": "MMRF"
+ },
+ {
+ "doc_count": 1,
+ "key": "NCICCR"
+ },
+ {
+ "doc_count": 1,
+ "key": "OHSU"
+ },
+ {
+ "doc_count": 1,
+ "key": "ORGANOID"
+ },
+ {
+ "doc_count": 1,
+ "key": "REBC"
+ },
+ {
+ "doc_count": 1,
+ "key": "TRIO"
+ },
+ {
+ "doc_count": 1,
+ "key": "VAREPOP"
},
{
- "key": "TARGET",
- "doc_count": 6
+ "doc_count": 1,
+ "key": "WCDT"
}
]
}
+ },
+ "pagination": {
+ "count": 0,
+ "total": 79,
+ "size": 0,
+ "from": 0,
+ "sort": "None",
+ "page": 1,
+ "pages": 79
}
},
"warnings": {}
@@ -2368,125 +2680,233 @@ curl --request POST --header "Content-Type: application/json" --data @Payload 'h
``` Response
{
"data": {
- "pagination": {
- "count": 0,
- "sort": "",
- "from": 0,
- "page": 1,
- "total": 941,
- "pages": 941,
- "size": 0
- },
"hits": [],
"aggregations": {
"project.primary_site": {
"buckets": [
{
- "key": "Brain",
- "doc_count": 1133
+ "doc_count": 1202,
+ "key": "kidney"
+ },
+ {
+ "doc_count": 1191,
+ "key": "brain"
+ },
+ {
+ "doc_count": 1176,
+ "key": "bronchus and lung"
+ },
+ {
+ "doc_count": 1156,
+ "key": "breast"
+ },
+ {
+ "doc_count": 952,
+ "key": "colon"
+ },
+ {
+ "doc_count": 947,
+ "key": "stomach"
+ },
+ {
+ "doc_count": 878,
+ "key": "uterus, nos"
+ },
+ {
+ "doc_count": 869,
+ "key": "ovary"
+ },
+ {
+ "doc_count": 821,
+ "key": "corpus uteri"
+ },
+ {
+ "doc_count": 789,
+ "key": "other and unspecified parts of tongue"
+ },
+ {
+ "doc_count": 670,
+ "key": "connective, subcutaneous and other soft tissues"
},
{
- "key": "Breast",
- "doc_count": 1098
+ "doc_count": 633,
+ "key": "rectosigmoid junction"
},
{
- "key": "Lung",
- "doc_count": 1089
+ "doc_count": 586,
+ "key": "bones, joints and articular cartilage of other and unspecified sites"
},
{
- "key": "Kidney",
- "doc_count": 941
+ "doc_count": 565,
+ "key": "thyroid gland"
},
{
- "key": "Colorectal",
- "doc_count": 635
+ "doc_count": 528,
+ "key": "base of tongue"
},
{
- "key": "Uterus",
- "doc_count": 617
+ "doc_count": 528,
+ "key": "floor of mouth"
},
{
- "key": "Ovary",
- "doc_count": 608
+ "doc_count": 528,
+ "key": "gum"
},
{
- "key": "Head and Neck",
- "doc_count": 528
+ "doc_count": 528,
+ "key": "hypopharynx"
},
{
- "key": "Thyroid",
- "doc_count": 507
+ "doc_count": 528,
+ "key": "larynx"
},
{
- "key": "Prostate",
- "doc_count": 500
+ "doc_count": 528,
+ "key": "lip"
},
{
- "key": "Stomach",
- "doc_count": 478
+ "doc_count": 528,
+ "key": "oropharynx"
},
{
- "key": "Skin",
- "doc_count": 470
+ "doc_count": 528,
+ "key": "other and ill-defined sites in lip, oral cavity and pharynx"
},
{
- "key": "Bladder",
- "doc_count": 412
+ "doc_count": 528,
+ "key": "other and unspecified parts of mouth"
},
{
- "key": "Liver",
- "doc_count": 377
+ "doc_count": 528,
+ "key": "palate"
},
{
- "key": "Cervix",
- "doc_count": 308
+ "doc_count": 528,
+ "key": "tonsil"
},
{
- "key": "Adrenal Gland",
- "doc_count": 271
+ "doc_count": 500,
+ "key": "prostate gland"
},
{
- "key": "Soft Tissue",
- "doc_count": 261
+ "doc_count": 498,
+ "key": "retroperitoneum and peritoneum"
},
{
- "key": "Bone Marrow",
- "doc_count": 200
+ "doc_count": 470,
+ "key": "skin"
},
{
- "key": "Esophagus",
- "doc_count": 185
+ "doc_count": 448,
+ "key": "heart, mediastinum, and pleura"
},
{
- "key": "Pancreas",
- "doc_count": 185
+ "doc_count": 428,
+ "key": "liver and intrahepatic bile ducts"
},
{
- "key": "Testis",
- "doc_count": 150
+ "doc_count": 412,
+ "key": "bladder"
},
{
- "key": "Thymus",
- "doc_count": 124
+ "doc_count": 307,
+ "key": "cervix uteri"
},
{
- "key": "Pleura",
- "doc_count": 87
+ "doc_count": 271,
+ "key": "adrenal gland"
},
{
- "key": "Eye",
- "doc_count": 80
+ "doc_count": 261,
+ "key": "bones, joints and articular cartilage of limbs"
},
{
- "key": "Lymph Nodes",
- "doc_count": 58
+ "doc_count": 261,
+ "key": "meninges"
},
{
- "key": "Bile Duct",
- "doc_count": 51
+ "doc_count": 261,
+ "key": "other and unspecified male genital organs"
+ },
+ {
+ "doc_count": 261,
+ "key": "peripheral nerves and autonomic nervous system"
+ },
+ {
+ "doc_count": 258,
+ "key": "hematopoietic and reticuloendothelial systems"
+ },
+ {
+ "doc_count": 208,
+ "key": "testis"
+ },
+ {
+ "doc_count": 185,
+ "key": "esophagus"
+ },
+ {
+ "doc_count": 185,
+ "key": "pancreas"
+ },
+ {
+ "doc_count": 179,
+ "key": "other and ill-defined sites"
+ },
+ {
+ "doc_count": 179,
+ "key": "other endocrine glands and related structures"
+ },
+ {
+ "doc_count": 179,
+ "key": "spinal cord, cranial nerves, and other parts of central nervous system"
+ },
+ {
+ "doc_count": 172,
+ "key": "rectum"
+ },
+ {
+ "doc_count": 172,
+ "key": "unknown"
+ },
+ {
+ "doc_count": 124,
+ "key": "thymus"
+ },
+ {
+ "doc_count": 80,
+ "key": "eye and adnexa"
+ },
+ {
+ "doc_count": 58,
+ "key": "lymph nodes"
+ },
+ {
+ "doc_count": 58,
+ "key": "other and unspecified major salivary glands"
+ },
+ {
+ "doc_count": 58,
+ "key": "small intestine"
+ },
+ {
+ "doc_count": 51,
+ "key": "gallbladder"
+ },
+ {
+ "doc_count": 51,
+ "key": "other and unspecified parts of biliary tract"
}
]
}
+ },
+ "pagination": {
+ "count": 0,
+ "total": 1202,
+ "size": 0,
+ "from": 0,
+ "sort": "",
+ "page": 1,
+ "pages": 1202
}
},
"warnings": {}
@@ -2528,69 +2948,7 @@ The GDC Portal has a quicksearch functionality that allows for a project, case,
curl "https://api.gdc.cancer.gov/v0/all?query=TCGA&size=5"
```
```Response
-{
- "data": {
- "query": {
- "hits": [
- {
- "disease_type": [
- "Esophageal Carcinoma"
- ],
- "id": "UHJvamVjdDpUQ0dBLUVTQ0E=",
- "name": "Esophageal Carcinoma",
- "primary_site": [
- "Esophagus"
- ],
- "project_id": "TCGA-ESCA"
- },
- {
- "disease_type": [
- "Head and Neck Squamous Cell Carcinoma"
- ],
- "id": "UHJvamVjdDpUQ0dBLUhOU0M=",
- "name": "Head and Neck Squamous Cell Carcinoma",
- "primary_site": [
- "Head and Neck"
- ],
- "project_id": "TCGA-HNSC"
- },
- {
- "disease_type": [
- "Liver Hepatocellular Carcinoma"
- ],
- "id": "UHJvamVjdDpUQ0dBLUxJSEM=",
- "name": "Liver Hepatocellular Carcinoma",
- "primary_site": [
- "Liver"
- ],
- "project_id": "TCGA-LIHC"
- },
- {
- "disease_type": [
- "Colon Adenocarcinoma"
- ],
- "id": "UHJvamVjdDpUQ0dBLUNPQUQ=",
- "name": "Colon Adenocarcinoma",
- "primary_site": [
- "Colorectal"
- ],
- "project_id": "TCGA-COAD"
- },
- {
- "disease_type": [
- "Adrenocortical Carcinoma"
- ],
- "id": "UHJvamVjdDpUQ0dBLUFDQw==",
- "name": "Adrenocortical Carcinoma",
- "primary_site": [
- "Adrenal Gland"
- ],
- "project_id": "TCGA-ACC"
- }
- ]
- }
- }
-}
+{"data":{"query":{"hits":[{"disease_type":["Adenomas and Adenocarcinomas"],"id":"UHJvamVjdDpUQ0dBLUFDQw==","name":"Adrenocortical Carcinoma","primary_site":["Adrenal gland"],"project_id":"TCGA-ACC","project_quicksearch":"Adrenocortical Carcinoma"},{"disease_type":["Adenomas and Adenocarcinomas"],"id":"UHJvamVjdDpUQ0dBLUtJQ0g=","name":"Kidney Chromophobe","primary_site":["Kidney"],"project_id":"TCGA-KICH","project_quicksearch":"Kidney Chromophobe"},{"disease_type":["Adenomas and Adenocarcinomas"],"id":"UHJvamVjdDpUQ0dBLUxJSEM=","name":"Liver Hepatocellular Carcinoma","primary_site":["Liver and intrahepatic bile ducts"],"project_id":"TCGA-LIHC","project_quicksearch":"Liver Hepatocellular Carcinoma"},{"disease_type":["Myeloid Leukemias"],"id":"UHJvamVjdDpUQ0dBLUxBTUw=","name":"Acute Myeloid Leukemia","primary_site":["Hematopoietic and reticuloendothelial systems"],"project_id":"TCGA-LAML","project_quicksearch":"Acute Myeloid Leukemia"},{"disease_type":["Adenomas and Adenocarcinomas"],"id":"UHJvamVjdDpUQ0dBLUtJUlA=","name":"Kidney Renal Papillary Cell Carcinoma","primary_site":["Kidney"],"project_id":"TCGA-KIRP","project_quicksearch":"Kidney Renal Papillary Cell Carcinoma"}],"total":183550}}}
```
This endpoint can be used to quickly retrieve information about a file. For example, if a user wanted to know the UUID for `nationwidechildrens.org_biospecimen.TCGA-EL-A4K1.xml`, the following query could be used to quickly retrieve it programmatically:
@@ -2599,20 +2957,7 @@ This endpoint can be used to quickly retrieve information about a file. For exa
curl "https://api.gdc.cancer.gov/v0/all?query=nationwidechildrens.org_biospecimen.TCGA-EL-A4K1.xml&size=5"
```
```Response
-{
- "data": {
- "query": {
- "hits": [
- {
- "file_id": "2a7a354b-e497-4ae6-8a85-a170951596c1",
- "file_name": "nationwidechildrens.org_biospecimen.TCGA-EL-A4K1.xml",
- "id": "RmlsZToyYTdhMzU0Yi1lNDk3LTRhZTYtOGE4NS1hMTcwOTUxNTk2YzE=",
- "submitter_id": null
- }
- ]
- }
- }
-}
+{"data":{"query":{"hits":[{"file_id":"a74abfec-db78-4ed4-9e4b-604b66e30e30","file_name":"nationwidechildrens.org_biospecimen.TCGA-EL-A4K1.xml","id":"RmlsZTphNzRhYmZlYy1kYjc4LTRlZDQtOWU0Yi02MDRiNjZlMzBlMzA=","submitter_id":"nationwidechildrens.org_biospecimen.TCGA-EL-A4K1.xml"}],"total":1}}}
```
## Additional Examples
diff --git a/docs/API/Users_Guide/Submission.md b/docs/API/Users_Guide/Submission.md
index d47acf23c..80cb1f4c7 100644
--- a/docs/API/Users_Guide/Submission.md
+++ b/docs/API/Users_Guide/Submission.md
@@ -48,7 +48,7 @@ Metadata files must be uploaded in raw, unencoded form. Binary mode should be us
#### BCR XML
-While JSON and TSV are the recommended formats for submitting metadata, the GDC API can also extract metadata elements from BCR XML files. Users wishing to submit metadata as BCR XML must contact GDC User Services and ensure that appropriate element mapping is in place before initiating XML submission.
+While JSON and TSV are the recommended formats for submitting metadata, the GDC API can also extract metadata elements from BCR XML files. Users wishing to submit metadata as BCR XML must contact GDC User Services and ensure that appropriate element mapping is in place before initiating XML submission. Current mapping can be found in [GitHub](https://github.com/NCI-GDC/gdcdatamodel/tree/develop/gdcdatamodel/xml_mappings).
To submit BCR XML, make `PUT` requests with the `Content-Type: application/xml` header to the following URLs, replacing Program.name and Project.code as desribed in [Submission Endpoint](#submission_endpoint) (above):
@@ -63,12 +63,12 @@ The following is a sample shell command for submitting an XML file:
curl --request PUT --header "X-Auth-Token: $token" --header 'Content-Type: application/xml' --data-binary @biospecimen.xml 'https://api.gdc.cancer.gov/v0/submission/GDC/INTERNAL/xml/biospecimen/bcr/_dry_run'
-**NOTE:** A typical BCR XML file contains more information than what is extracted and indexed by the GDC. XML files submitted to the above endpoints are not retained or distributed to GDC data users, so the same files should also be submitted as data files (i.e. as clinical or biospecimen supplements).
+>**NOTE:** A typical BCR XML file contains more information than what is extracted and indexed by the GDC. XML files submitted to the above endpoints are not retained or distributed to GDC data users, so the same files should also be submitted as data files (i.e. as clinical or biospecimen supplements).
### Data File Formats
-The GDC API accepts a variety of data files after their metadata has been registered: BAM and FASTQ files, clinical and biospecimen supplements, slide images, and other file types. Supported data file formats are listed on the [GDC website](https://gdc.cancer.gov/node/266/).
+The GDC API accepts a variety of data files after their metadata has been registered: BAM and FASTQ files, clinical and biospecimen supplements, slide images, and other file types. Supported data file formats are listed on the [GDC Data Dictionary](https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-entity-list&anchor=submittable_data_file).
## GDC Data Model
@@ -82,7 +82,7 @@ Submitters can assign UUIDs to all submittable entities other than those that co
In addition to `id`, many entities also include a `submitter_id` field. This field can contain any string (e.g. a "barcode") that the submitter wishes to use to identify the entity. Typically this string identifies a corresponding entry in submitter's records. The GDC's only requirement with respect to `submitter_id` is that it be a string that is unique for all entities within a project. The GDC Submission API requires a `submitter_id` for most entities.
-**Note:** For `case` entities, `submitter_id` must correspond to a `submitted_subject_id` of a study participant registered with the project in dbGaP.
+>**Note:** For `case` entities, `submitter_id` must correspond to a `submitted_subject_id` of a study participant registered with the project in dbGaP.
### GDC Data Dictionary Endpoints
@@ -163,18 +163,21 @@ The following is an example of a POST request, that simulates creating an entity
```Request
{
- "project_id": "TCGA-ALCH",
+ "project_id": "GDC-INTERNAL",
"type": "case",
- "submitter_id": "TCGA-ALCH-000001",
- "projects": {
- "code": "ALCH"
+ "submitter_id": "GDC-INTERNAL-000093",
+ "disease_type": "Blood Vessel Tumors",
+ "primary_site": "Base of tongue",
+ "projects": {
+ "code": "INTERNAL"
}
}
+
```
```Command
token=$(**Note:** To check whether a dry run transaction was committed successfully, check the `state` of the transaction that executed the commit. The `state` of the dry run transaction itself does not represent the status of a subsequent commit.
## Creating and Updating Entities
@@ -333,7 +338,7 @@ The GDC Submission API supports HTTP POST and HTTP PUT methods for creating enti
The GDC suggests using POST for creating new entities, and using PUT only for updating entities. This helps to avoid inadvertent entity updates that can occur when using PUT for creating entities.
-**Note:** Once a relationship has been created between two entities, it cannot be removed by updating an entity. To remove a relationship, the child entity must be [deleted](#deleting-entities).
+>**Note:** Once a relationship has been created between two entities, it cannot be removed by updating an entity. To remove a relationship, the child entity must be [deleted](#deleting-entities).
### Example: Creating and Updating Case Entities (JSON)
@@ -342,41 +347,43 @@ In this example, a case entity is created using POST. Then an attempt is made to
The JSON in the request was generated using the `case` JSON template that can be obtained from the [GDC Data Dictionary Viewer](../../Data_Dictionary/index.md) and from `https://api.gdc.cancer.gov/v0/submission/template/case?format=json`.
-**Note:** For `case` entities, `submitter_id` must correspond to a `submitted_subject_id` of a study participant registered with the project in dbGaP.
+>**Note:** For `case` entities, `submitter_id` must correspond to a `submitted_subject_id` of a study participant registered with the project in dbGaP.
```Request1
{
+ "project_id": "GDC-INTERNAL",
"type": "case",
- "submitter_id": "TCGA-ALCH-000001",
- "projects": {
- "code": "ALCH"
+ "submitter_id": "GDC-INTERNAL-000093",
+ "disease_type": "Blood Vessel Tumors",
+ "primary_site": "Base of tongue",
+ "projects": {
+ "code": "INTERNAL"
}
-
}
```
```Command1
token=$(**NOTE:** Access to GDC Submission API GraphQL service is limited to authorized and authenticated submitters. Submitters may only access data in their own project using GraphQL.
### GraphQL IDE
@@ -2536,7 +2548,7 @@ GDC data submitters can access the GDC Submission API GraphQL endpoint at:
where __[API_version/]__ is the optional API version component (see [Getting Started](Getting_Started.md)).
-**NOTE:** An authentication token is required for all requests to the `graphql` endpoint. Queries are restricted to those projects for which the submitter has obtained authorization.
+>**NOTE:** An authentication token is required for all requests to the `graphql` endpoint. Queries are restricted to those projects for which the submitter has obtained authorization.
### Constructing a Query
diff --git a/docs/API/Users_Guide/System_Information.md b/docs/API/Users_Guide/System_Information.md
index 05ba49a46..022c7202b 100644
--- a/docs/API/Users_Guide/System_Information.md
+++ b/docs/API/Users_Guide/System_Information.md
@@ -23,7 +23,6 @@ Notifications will indicate the GDC `components` to which they apply:
| Component | Description |
|---------|----------------------------------------------------------------------|
| PORTAL | The GDC Data Portal |
-| LEGACY | The GDC Legacy Archive |
| SUBMISSION | The GDC Data Submission Portal |
| DOCUMENTATION | The GDC documentation site that contains GDC user guides, release notes, and the GDC Data Dictionary |
| WEBSITE | The GDC project website that includes information about the system. This does not include any of the above-listed GDC components. |
@@ -39,7 +38,7 @@ curl --request GET https://api.gdc.cancer.gov/v0/notifications
{
"level": "INFO",
"components": [
- "SUBMISSION_API",
+ "SUBMISSION_API"
"LEGACY_API"
],
"message": "The system is up!"
diff --git a/docs/API/Users_Guide/images/graphql-query.png b/docs/API/Users_Guide/images/graphql-query.png
new file mode 100644
index 000000000..e09f3f9a5
Binary files /dev/null and b/docs/API/Users_Guide/images/graphql-query.png differ
diff --git a/docs/API/Users_Guide/scripts/Authentication_Tokens.py b/docs/API/Users_Guide/scripts/Authentication_Tokens.py
new file mode 100644
index 000000000..0b9b7a7cd
--- /dev/null
+++ b/docs/API/Users_Guide/scripts/Authentication_Tokens.py
@@ -0,0 +1,27 @@
+import requests
+import json
+import re
+
+# This script will not work until $TOKEN_FILE_PATH is replaced with an actual path.
+
+with open("$TOKEN_FILE_PATH","r") as token:
+ token_string = str(token.read().strip())
+
+headers = {
+ 'X-Auth-Token': token_string
+ }
+
+data_endpt = 'https://api.gdc.cancer.gov/data/'
+data_uuid = 'fd89bfa5-b3a7-4079-bf90-709580c006e5'
+headers = {
+ 'X-Auth-Token': token_string
+ }
+response = requests.get(data_endpt + data_uuid, headers=headers)
+
+# The file name can be found in the header within the Content-Disposition key.
+response_head_cd = response.headers["Content-Disposition"]
+
+file_name = re.findall("filename=(.+)", response_head_cd)[0]
+
+with open(file_name, "wb") as output_file:
+ output_file.write(response.content)
diff --git a/docs/API/Users_Guide/scripts/Basic_Query.py b/docs/API/Users_Guide/scripts/Basic_Query.py
index 5eb08266d..eedeb2432 100644
--- a/docs/API/Users_Guide/scripts/Basic_Query.py
+++ b/docs/API/Users_Guide/scripts/Basic_Query.py
@@ -3,7 +3,7 @@
cases_endpt = 'https://api.gdc.cancer.gov/cases'
-# The fields parameter is passed as a comma-separated string of single names
+# The 'fields' parameter is passed as a comma-separated string of single names.
fields = [
"submitter_id",
"case_id",
@@ -22,4 +22,10 @@
response = requests.get(cases_endpt, params = params)
+# OUTPUT METHOD 1: Write to a file.
+file = open("basic_query.tsv", "w")
+file.write(response.text)
+file.close()
+
+# OUTPUT METHOD 2: View on screen.
print(response.content)
diff --git a/docs/API/Users_Guide/scripts/Basic_Troubleshooting.py b/docs/API/Users_Guide/scripts/Basic_Troubleshooting.py
new file mode 100644
index 000000000..0374b112c
--- /dev/null
+++ b/docs/API/Users_Guide/scripts/Basic_Troubleshooting.py
@@ -0,0 +1,11 @@
+import requests
+status_endpt = "https://api.gdc.cancer.gov/status"
+response = requests.get(status_endpt)
+
+# OUTPUT METHOD 1: Write to a file.
+file = open("api_status.json", "w")
+file.write(response.text)
+file.close()
+
+# OUTPUT METHOD 2: View on screen.
+print(response.content)
\ No newline at end of file
diff --git a/docs/API/Users_Guide/scripts/Complex_Query.py b/docs/API/Users_Guide/scripts/Complex_Query.py
index e3df06f0d..ad9b70682 100644
--- a/docs/API/Users_Guide/scripts/Complex_Query.py
+++ b/docs/API/Users_Guide/scripts/Complex_Query.py
@@ -1,6 +1,7 @@
import requests
import json
+# The 'fields' parameter is passed as a comma-separated string of single names.
fields = [
"file_name",
"cases.submitter_id",
@@ -52,4 +53,10 @@
# The parameters are passed to 'json' rather than 'params' in this case
response = requests.post(files_endpt, headers = {"Content-Type": "application/json"}, json = params)
-print(response.content.decode("utf-8"))
+# OUTPUT METHOD 1: Write to a file.
+file = open("complex_filters.tsv", "w")
+file.write(response.text)
+file.close()
+
+# OUTPUT METHOD 2: View on screen.
+print(response.content.decode("utf-8"))
\ No newline at end of file
diff --git a/docs/API/Users_Guide/scripts/Filter_Query.py b/docs/API/Users_Guide/scripts/Filter_Query.py
index 42c17761d..11bd6dcc9 100644
--- a/docs/API/Users_Guide/scripts/Filter_Query.py
+++ b/docs/API/Users_Guide/scripts/Filter_Query.py
@@ -1,6 +1,7 @@
import requests
import json
+# The 'fields' parameter is passed as a comma-separated string of single names.
fields = [
"submitter_id",
"case_id",
@@ -21,8 +22,7 @@
}
}
-# With a GET request, the filters parameter needs to be converted
-# from a dictionary to JSON-formatted string
+# With a GET request, the filters parameter needs to be converted from a dictionary to JSON-formatted string
params = {
"filters": json.dumps(filters),
@@ -33,4 +33,10 @@
response = requests.get(cases_endpt, params = params)
-print(response.content)
+# OUTPUT METHOD 1: Write to a file.
+file = open("filtered_query.tsv", "w")
+file.write(response.text)
+file.close()
+
+# OUTPUT METHOD 2: View on screen.
+print(response.content)
\ No newline at end of file
diff --git a/docs/API/Users_Guide/scripts/Sample_Request.py b/docs/API/Users_Guide/scripts/Sample_Request.py
new file mode 100644
index 000000000..2652df479
--- /dev/null
+++ b/docs/API/Users_Guide/scripts/Sample_Request.py
@@ -0,0 +1,14 @@
+import requests
+import json
+
+file_endpt = 'https://api.gdc.cancer.gov/files/'
+file_uuid = 'cb92f61d-041c-4424-a3e9-891b7545f351'
+response = requests.get(file_endpt + file_uuid)
+
+# OUTPUT METHOD 1: Write to a file.
+file = open("sample_request.json", "w")
+file.write(response.text)
+file.close()
+
+# OUTPUT METHOD 2: View on screen.
+print(json.dumps(response.json(), indent=2))
diff --git a/docs/Data/Bioinformatics_Pipelines/Aligned_reads_summary_metrics.md b/docs/Data/Bioinformatics_Pipelines/Aligned_reads_summary_metrics.md
new file mode 100644
index 000000000..7dda5c7a6
--- /dev/null
+++ b/docs/Data/Bioinformatics_Pipelines/Aligned_reads_summary_metrics.md
@@ -0,0 +1,23 @@
+# Aligned Reads Summary Metrics
+
+Various summary metrics are added to the aligned reads entity for query by the user. These are generated by such tools as SAMtools, Picard, and GATK4. These may be helpful to determine underlying quality or summary information regarding the submitted data. Examples are included below:
+
+* average_base_quality
+* average_insert_size
+* average_read_length
+* contamination
+* contamination_error
+* mean_coverage
+* msi_score
+* msi_status
+* pairs_on_diff_chr
+* proportion_base_mismatch
+* proportion_coverage_10x
+* proportion_coverage_30x
+* proportion_reads_duplicated
+* proportion_reads_mapped
+* proportion_targets_no_coverage
+* total_reads
+
+
+For a complete list of the summary metrics as well as the tools used to generate them please visit the [Data Dictionary Viewer](https://docs.gdc.cancer.gov/Data_Dictionary/viewer/#?view=table-definition-view&id=aligned_reads).
diff --git a/docs/Data/Bioinformatics_Pipelines/CNV_Pipeline.md b/docs/Data/Bioinformatics_Pipelines/CNV_Pipeline.md
index bf0f9218a..f8441cff8 100644
--- a/docs/Data/Bioinformatics_Pipelines/CNV_Pipeline.md
+++ b/docs/Data/Bioinformatics_Pipelines/CNV_Pipeline.md
@@ -2,29 +2,84 @@
## Introduction
-The copy number variation (CNV) pipeline uses Affymetrix SNP 6.0 array data to identify genomic regions that are repeated and infer the copy number of these repeats. This pipeline is built onto the existing TCGA level 2 data generated by [Birdsuite](https://www.broadinstitute.org/scientific-community/science/programs/medical-and-population-genetics/birdsuite/birdsuite) and uses the [DNAcopy](http://www.bioconductor.org/packages/release/bioc/html/DNAcopy.html) R-package to perform a circular binary segmentation (CBS) analysis [[1]](http://biostatistics.oxfordjournals.org/content/5/4/557.short). CBS translates noisy intensity measurements into chromosomal regions of equal copy number. The final output files are segmented into genomic regions with the estimated copy number for each region. The GDC further transforms these copy number values into segment mean values, which are equal to log2(copy-number/ 2). Diploid regions will have a segment mean of zero, amplified regions will have positive values, and deletions will have negative values. The GRCh38 probe-set was produced by mapping probe sequences to the GRCh38 reference genome and can be downloaded at the [GDC Reference File Website](https://gdc.cancer.gov/about-data/data-harmonization-and-generation/gdc-reference-files).
+The copy number variation (CNV) pipeline uses either NGS or Affymetrix SNP 6.0 (SNP6) array data to identify genomic regions that are repeated and infer the copy number of these repeats. Three sets of pipelines have been used for CNV inferences.
+* ASCAT
+* ABSOLUTE
+* DNAcopy
-## Data Processing Steps
+The first set of CNV pipelines are built upon the ASCAT [[1]](https://www.pnas.org/content/107/39/16910) algorithm for both WGS and SNP6 data. ASCAT is able to generate Allele-specific Copy Number Segment data with integer copy number values, and the derived integer Gene-Level Copy Number. 1.) The WGS copy number analysis pipeline, [ascatNGS](https://github.com/cancerit/ascatNgs), is described in detail [here](https://docs.gdc.cancer.gov/Data/Bioinformatics_Pipelines/DNA_Seq_Variant_Calling_Pipeline/#whole-genome-sequencing-variant-calling). 2.) The SNP6 copy number analysis pipeline, ASCAT2, is adopted from the [example ASCAT analysis](https://github.com/VanLoo-lab/ascat/blob/v2.5/ExampleData/ASCAT_examplePipeline.R). 3.) The SNP6 copy number analysis pipeline, ASCAT3, is an updated version of ASCAT2. The ASCAT3 analysis in TCGA was done by the [Vanloo lab](https://github.com/VanLoo-lab/ascat/tree/master/ReleasedData/TCGA_SNP6_hg38), and the GDC released a reformatted version of these calls. Both ASCAT2 and ASCAT3 generates data similar to ascatNGS.
-A metadata preprocessing step is used to convert the GRCh37 (hg19) probe set coordinates to the newer GRCh38 (hg38) genome build coordinates. A minimum quality control step to verify that reference bases are consistent across two genome builds is used to filter out low quality liftover probe sets.
+The second CNV pipeline, ABSOLUTE, also uses Affymetrix SNP 6.0 (SNP6) array data as input. The hg19 version of the segments were published as one of the [TCGA PanCancer analysis papers](https://doi.org/10.1016/j.ccell.2018.03.007) and the data is available in the [GDC publication page](https://gdc.cancer.gov/about-data/publications/pancanatlas). These calls have been manually curated and thus are considered of good quality. The GDC performed segment liftover and generated gene-level copy numbers. Note that the intermediate output of GRCh38 segments contain liftover artifacts and were not released in the GDC. Users can also obtain corresponding purity and ploidy measurements from the GDC publication page mentioned above.
-The [Copy Number Liftover Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_liftover_workflow) uses the TCGA level 2 tangent.copynumber files described above. These files were generated by first normalizing array intensity values, estimating raw copy number, and performing tangent normalization, which subtracts variation that is found in a set of normal samples. Original array intensity values (TCGA level 1) are available in the [GDC Legacy Archive](https://portal.gdc.cancer.gov/legacy-archive/) under the "Data Format: CEL" and "Platform: Affymetrix SNP 6.0" filters.
+The third set of CNV pipelines are built onto the existing TCGA level 2 SNP6 data generated by [Birdsuite](https://www.broadinstitute.org/scientific-community/science/programs/medical-and-population-genetics/birdsuite/birdsuite) and uses the [DNAcopy](http://www.bioconductor.org/packages/release/bioc/html/DNAcopy.html) R-package to perform a circular binary segmentation (CBS) analysis [[2]](http://biostatistics.oxfordjournals.org/content/5/4/557.short). CBS translates noisy intensity measurements into chromosomal regions of equal copy number. The final output files are segmented into genomic regions with the estimated copy number for each region. The GDC further transforms these copy number values into segment mean values, which are equal to log2(copy-number/ 2). Diploid regions will have a segment mean of zero, amplified regions will have positive values, and deletions will have negative values.
-The Copy Number Liftover Workflow performs CBS analysis using the DNACopy R-package to process tangent normalized data into [Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) files, which associate contiguous chromosome regions with log2 ratio segment means in a tab-delimited format. The number of probes with intensity values associated with each chromosome region is also reported (probes with no intensity values are not included in this count). During copy number segmentation probe sets from Pseudo-Autosomal Regions (PARs) were removed from males and Y chromosome segments were removed from females.
+## ASCAT Pipelines
+### Data Processing Steps
+#### Copy Number Segmentation
+
+The [Somatic Copy Number Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=somatic_copy_number_workflow) uses a tumor-normal pair of either SNP6 raw CEL data, or WGS data as input. The ASCAT algorithm derives allele-specific copy number segments while estimating and adjusting for tumor purity and ploidy [[1]](https://www.pnas.org/content/107/39/16910). Because there are two parental strands, the resulting Copy Number Segment or Allele-Specific Copy Number Segment files contain 3 different copy number integer values: Major_Copy_Number refers to the larger strand copy number, Minor_Copy_Number refers to the smaller strand copy number, Copy_Number is the sum of Major_Copy_Number and Minor_Copy_Number, and thus equals to the total copy number at the locus.
-Masked copy number segments are generated with the same method except that a filtering step is performed that removes Y chromosome and probe sets that were previously indicated to have frequent germline copy-number variation.
| I/O | Entity | Format |
|---|---|---|
-| Input | [Submitted Tangent Copy Number](/Data_Dictionary/viewer/#?view=table-definition-view&id=submitted_tangent_copy_number) | TXT |
-| Output | [Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) or Masked Copy Number Segment | TXT |
+| Input | [Submitted Genotype_Array](/Data_Dictionary/viewer/#?view=table-definition-view&id=submitted_genotyping_array) | CEL |
+| Output | [Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) or [Allele-Specific Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) | TXT |
+
+| I/O | Entity | Format |
+|---|---|---|
+| Input | [Aligned Reads](/Data_Dictionary/viewer/#?view=table-definition-view&id=aligned_reads) | BAM |
+| Output | [Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) or [Allele-Specific Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) | TXT |
+
+#### Gene-Level Copy Number
+
+Gene-level Copy Number is generated by inheriting the Copy_Number value of the residing segment in the Copy Number Segment file generated from ASCAT2, ASCAT3, or ascatNGS workflows.
-## File Access and Availability
+In some occasions, one gene may overlap with more than one segment. In this case, min_copy_number is the minimum value of all segments it overlaps, max_copy_number is the maximum value of all segments it overlaps, and copy_number is calculated as the weighted (on length of overlapped regions) median of copy number values from all overlapped segments. When there is a tie (very rare), the smaller number is used. If a gene overlaps with only one segment, copy_number = min_copy_number = max_copy_number. If a gene overlaps with no segments, the gene gets empty value "" in copy_number, min_copy_number and max_copy_number.
+
+
+| I/O | Entity | Format |
+|---|---|---|
+| Input | [Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) or Allele-Specific Copy Number Segment | TXT |
+| Output | [Copy Number Estimate](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_estimate) | TXT |
+
+
+### File Access and Availability
| Type | Description | Format |
|---|---|---|
-| Copy Number Segment| A table that associates contiguous chromosomal segments with genomic coordinates, mean array intensity, and the number of probes that bind to each segment. | TXT |
-| Masked Copy Number Segment | A table with the same information as the Copy Number Segment except that segments with probes known to contain germline mutations are removed. | TXT |
+| Copy Number Segment| A table that associates contiguous chromosomal segments with genomic coordinates, and integer copy numbers. | TXT |
+| Allele-Specific Copy Number Segment| A table that associates contiguous chromosomal segments with genomic coordinates, and integer copy numbers. | TXT |
+| Copy Number Estimate | A Gene-level Copy Number file that displays integer copy number on a gene level. Generated from Copy Number Segment or Allele-Specific Copy Number Segment files. | TXT |
+
+## ABSOLUTE Copy Number
+### Data Processing Steps
+
+The source data were generated by external groups. Please check the [corresponding publication](https://doi.org/10.1016/j.ccell.2018.03.007) for details.
+
+### File Access and Availability
+
+File Access and Availability is similar to that from the ASCAT pipelines, except that only gene-level copy numbers are available, but not segmentation calls.
+
+
+## DNACopy Pipeline
+### Data Processing Steps
+
+The GRCh38 SNP6 probe-set was produced by mapping probe sequences to the GRCh38 reference genome and can be downloaded at the [GDC Reference File Website](https://gdc.cancer.gov/about-data/data-harmonization-and-generation/gdc-reference-files).
+
+#### Copy Number Segmentation
+
+The [Copy Number Liftover Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_liftover_workflow) uses TCGA level 2 tangent.copynumber files. These files were generated by first normalizing array intensity values, estimating raw copy number, and performing tangent normalization, which subtracts variation that is found in a set of normal samples.
+
+The Copy Number Liftover Workflow performs CBS analysis using the DNACopy R-package to process tangent normalized data into [Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) files, which associate contiguous chromosome regions with log2 ratio segment means in a tab-delimited format. The number of probes with intensity values associated with each chromosome region is also reported (probes with no intensity values are not included in this count). During copy number segmentation probe sets from Pseudo-Autosomal Regions (PARs) were removed from males and Y chromosome segments were removed from females.
+
+Masked copy number segments are generated using the same method except that a filtering step is performed that removes the Y chromosome and probe sets that were previously indicated to be associated with frequent germline copy-number variation.
+
+| I/O | Entity | Format |
+|---|---|---|
+| Input | [Submitted Tangent Copy Number](/Data_Dictionary/viewer/#?view=table-definition-view&id=submitted_tangent_copy_number) | TXT |
+| Output | [Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) or [Masked Copy Number Segment](/Data_Dictionary/viewer/#?view=table-definition-view&id=copy_number_segment) | TXT |
+
+[1] Van Loo, P., Nordgard, S. H., Lingjaerde, O. C., Russnes, H. G., Rye, I. H., Sun, W. et al. "Allele-specific copy number analysis of tumors." Proceedings of the National Academy of Sciences, 107.39 (2010): 16910-16915.
-[1] Olshen, Adam B., E. S. Venkatraman, Robert Lucito, and Michael Wigler. "Circular binary segmentation for the analysis of array-based DNA copy number data." Biostatistics 5, no. 4 (2004): 557-572.
+[2] Olshen, Adam B., E. S. Venkatraman, Robert Lucito, and Michael Wigler. "Circular binary segmentation for the analysis of array-based DNA copy number data." Biostatistics 5, no. 4 (2004): 557-572.
diff --git a/docs/Data/Bioinformatics_Pipelines/DNA_Seq_Variant_Calling_Pipeline.md b/docs/Data/Bioinformatics_Pipelines/DNA_Seq_Variant_Calling_Pipeline.md
index 0c7e95651..470121b60 100644
--- a/docs/Data/Bioinformatics_Pipelines/DNA_Seq_Variant_Calling_Pipeline.md
+++ b/docs/Data/Bioinformatics_Pipelines/DNA_Seq_Variant_Calling_Pipeline.md
@@ -26,11 +26,6 @@ Prior to alignment, BAM files that were submitted to the GDC are split by read g
DNA-Seq analysis begins with the [Alignment Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=alignment_workflow). Read groups are aligned to the reference genome using one of two [BWA](http://bio-bwa.sourceforge.net) algorithms [[1]](http://www.ncbi.nlm.nih.gov/pubmed/19451168). BWA-MEM is used if mean read length is greater than or equal to 70 bp. Otherwise BWA-aln is used.
Each read group is aligned to the reference genome separately and all read group alignments that belong to a single aliquot are merged using [Picard Tools](http://broadinstitute.github.io/picard) [SortSam](https://broadinstitute.github.io/picard/command-line-overview.html#SortSam) and [MergeSamFiles](https://broadinstitute.github.io/picard/command-line-overview.html#MergeSamFiles). Duplicate reads, which may persist as PCR artifacts, are then flagged to prevent downstream variant call errors.
-#### Quality Control
-
-Quality control metrics are collected before and after the alignment workflow and reviewed to identify potential low-quality data files. Basic metrics such as GC content and mean read length as well as quality score metrics are collected from unaligned reads using [FASTQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/). Quality metrics collected by the GDC for aligned reads include samtools idxstat and flagstat. Alignment information is collected using Picard [CollectMultipleMetrics](https://broadinstitute.github.io/picard/command-line-overview.html#CollectMultipleMetrics) for both WGS and WXS. Coverage information is collected using picard [CollectWgsMetrics](https://broadinstitute.github.io/picard/command-line-overview.html#CollectWgsMetrics) for WGS and picard [CollectHsMetrics](https://broadinstitute.github.io/picard/command-line-overview.html#CollectHsMetrics) for WXS.
-
-Quality control metrics for each file endpoint can be accessed through the API using the `expand=analysis.metadata.read_groups,analysis.metadata.read_groups.read_group_qcs` parameter. Click [here](https://api.gdc.cancer.gov/files/40e311a4-67aa-468a-8e09-1c7daa2d10bb?pretty=true&expand=analysis.metadata.read_groups,analysis.metadata.read_groups.read_group_qcs) for an example query.
#### Reference Genome
@@ -42,18 +37,20 @@ All alignments are performed using the human reference genome GRCh38.d1.vd1. Dec
| Input | [Submitted Unaligned Reads](/Data_Dictionary/viewer/#?view=table-definition-view&id=submitted_unaligned_reads) or [Submitted Aligned Reads](/Data_Dictionary/viewer/#?view=table-definition-view&id=submitted_aligned_reads) | FASTQ or BAM |
| Output | [Aligned Reads](/Data_Dictionary/viewer/#?view=table-definition-view&id=aligned_reads) | BAM |
-
+
### DNA-Seq Alignment Command Line Parameters
-#### Step 1: Converting BAMs to FASTQs with Biobambam - biobambam2 2.0.54
+__Note that version numbers may vary in files downloaded from the GDC Portal due to ongoing pipeline development and improvement.__
+
+#### Step 1: Converting BAMs to FASTQs with Biobambam - biobambam2
```Shell
bamtofastq \
collate=1 \
exclude=QCFAIL,SECONDARY,SUPPLEMENTARY \
filename= \
gz=1 \
-inputformat=bam
+inputformat=bam \
level=5 \
outputdir= \
outputperreadgroup=1 \
@@ -64,7 +61,7 @@ outputperreadgroupsuffixO2=_o2.fq.gz \
outputperreadgroupsuffixS=_s.fq.gz \
tryoq=1 \
```
-#### Step 2: BWA Alignment - bwa 0.7.15 - samtools 1.3.1
+#### Step 2: BWA Alignment - bwa - samtools
If mean read length is greater than or equal to 70bp:
```Shell
bwa mem \
@@ -86,7 +83,7 @@ bwa sampe -r \
SORT_ORDER=coordinate \
VALIDATION_STRINGENCY=STRICT
```
-#### Step 4: BAM Merge - picard 2.6.0
+#### Step 4: BAM Merge - picard 2
```Shell
java -jar picard.jar MergeSamFiles \
ASSUME_SORTED=false \
@@ -108,7 +105,7 @@ SORT_ORDER=coordinate \
USE_THREADING=true \
VALIDATION_STRINGENCY=STRICT
```
-#### Step 5: Mark Duplicates - picard 2.6.0
+#### Step 5: Mark Duplicates - picard 2
```Shell
java -jar picard.jar MarkDuplicates \
CREATE_INDEX=true \
@@ -122,11 +119,11 @@ The alignment quality is further improved by the [Co-cleaning workflow](/Data_Di
#### Indel Local Realignment
-Local realignment of insertions and deletions is performed using [IndelRealigner](https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_indels_IndelRealigner.php). This step locates regions that contain misalignments across BAM files, which can often be caused by insertion-deletion (indel) mutations with respect to the reference genome. Misalignment of indel mutations, which can often be erroneously scored as substitutions, reduces the accuracy of downstream variant calling steps.
+Local realignment of insertions and deletions is performed using [IndelRealigner](https://github.com/broadinstitute/gatk-docs/blob/master/gatk3-tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_indels_IndelRealigner.json). This step locates regions that contain misalignments across BAM files, which can often be caused by insertion-deletion (indel) mutations with respect to the reference genome. Misalignment of indel mutations, which can often be erroneously scored as substitutions, reduces the accuracy of downstream variant calling steps.
#### Base Quality Score Recalibration
-A base quality score recalibration (BQSR) step is then performed using [BaseRecalibrator](https://software.broadinstitute.org/gatk/documentation/tooldocs/3.8-0/org_broadinstitute_gatk_tools_walkers_bqsr_BaseRecalibrator.php). This step adjusts base quality scores based on detectable and systematic errors. This step also increases the accuracy of downstream variant calling algorithms. Note that the original quality scores are kept in the OQ field of co-cleaned BAM files. These scores should be used if conversion of BAM files to FASTQ format is desired.
+A base quality score recalibration (BQSR) step is then performed using [BaseRecalibrator](https://gatk.broadinstitute.org/hc/en-us/articles/360036898312-BaseRecalibrator). This step adjusts base quality scores based on detectable and systematic errors. This step also increases the accuracy of downstream variant calling algorithms. Note that the original quality scores are kept in the OQ field of co-cleaned BAM files. These scores should be used if conversion of BAM files to FASTQ format is desired.
| I/O | Entity | Format |
@@ -183,9 +180,11 @@ java -jar GenomeAnalysisTK.jar \
Aligned and co-cleaned BAM files are processed through the [Somatic Mutation Calling Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=somatic_mutation_calling_workflow) as tumor-normal pairs. Variant calling is performed using four separate pipelines:
- [MuSE](http://bioinformatics.mdanderson.org/main/MuSE) [[2]](http://www.biorxiv.org/content/early/2016/05/25/055467.abstract)
-- [MuTect2](https://www.broadinstitute.org/cancer/cga/mutect) [[3]](http://www.nature.com/nbt/journal/v31/n3/abs/nbt.2514.html)
+- [MuTect2](https://gatk.broadinstitute.org/hc/en-us/articles/360037593851-Mutect2) [[3]](http://www.nature.com/nbt/journal/v31/n3/abs/nbt.2514.html)
- [VarScan2](http://dkoboldt.github.io/varscan/) [[4]](http://genome.cshlp.org/content/22/3/568.short)
-- [SomaticSniper](http://gmt.genome.wustl.edu/packages/somatic-sniper/) [[5]](http://bioinformatics.oxfordjournals.org/content/28/3/311.short)
+- [Pindel](https://github.com/ucscCancer/pindel-tool)
+
+Note that [SomaticSniper](http://gmt.genome.wustl.edu/packages/somatic-sniper/) [[5]](http://bioinformatics.oxfordjournals.org/content/28/3/311.short) was used and available on the GDC Data Portal prior to [GDC Data Release 35](https://docs.gdc.cancer.gov/Data/Release_Notes/Data_Release_Notes/#data-release-350).
Variant calls are reported by each pipeline in a VCF formatted file. See the GDC [VCF Format](../File_Formats/VCF_Format/) documentation for details on each available field. At this point in the DNA-Seq pipeline, all downstream analyses are branched into four separate paths that correspond to their respective variant calling pipeline.
@@ -194,15 +193,14 @@ Four separate variant calling pipelines are implemented for GDC data harmonizati
The [MuTect2 pipeline](https://gdc.cancer.gov/files/public/image/Broad_MuTect_0.png) employs a "Panel of Normals" to identify additional germline mutations. This panel is generated using TCGA blood normal genomes from thousands of individuals that were curated and confidently assessed to be cancer-free. This method allows for a higher level of confidence to be assigned to somatic variants that were called by the MuTect2 pipeline.
-Basic outlines for the other three pipelines can be found here:
+Basic outlines for the other two pipelines can be found here:
- [VarScan2 pipeline](https://gdc.cancer.gov/files/public/image/varscan-somatic-variant-calling-pipeline.png)
- [MuSE pipeline](https://gdc.cancer.gov/files/public/image/muse-somatic-variant-calling-pipeline.png)
-- [SomaticSniper pipeline](https://gdc.cancer.gov/files/public/image/somaticsniper-variant-calling-pipeline.png)
#### Indels
-Indel mutations that were generated with the MuTect2 and VarScan pipeline are detected and reported in GDC VCF files.
+Indel mutations that were generated with the MuTect2, Pindel, and VarScan pipelines are detected and reported in GDC VCF files.
#### Germline Variants
At this time, germline variants are deliberately excluded as harmonized data. The GDC does not recommend using germline variants that were previously detected and stored in the Legacy Archive as they do not meet the GDC criteria for high-quality data.
@@ -216,7 +214,7 @@ At this time, germline variants are deliberately excluded as harmonized data. Th
#### MuSE
-MuSEv1.0rc_submission_c039ffa; dbSNP v.144
+MuSEv1.0; dbSNP v.144
__Step 1:__ MuSE call
@@ -243,7 +241,7 @@ __Note:__ -E is used for WXS data and -G can be used for WGS data.
#### MuTect2
-GATK nightly-2016-02-25-gf39d340; dbSNP v.144
+GATK; dbSNP v.144
```Shell
java -jar GenomeAnalysisTK.jar \
@@ -261,34 +259,10 @@ java -jar GenomeAnalysisTK.jar \
--disable_auto_index_creation_and_locking_when_reading_rods
```
-
-
-#### SomaticSniper
-Somatic-sniper v1.0.5.0
-
-```Shell
-bam-somaticsniper \
--q 1 \
--L \
--G \
--Q 15 \
--s 0.01 \
--T 0.85 \
--N 2 \
--r 0.001 \
--n NORMAL \
--t TUMOR \
--F vcf \
--f ref.fa \
- \
- \
-
-```
-
#### VarScan
-__Step 1:__ Mpileup; Samtools 1.1
+__Step 1:__ Mpileup; Samtools
```Shell
samtools mpileup \
-f \
@@ -299,7 +273,7 @@ samtools mpileup \
```
-__Step 2:__ Varscan Somatic; Varscan.v2.3.9
+__Step 2:__ Varscan Somatic; Varscan.v2
```Shell
java -jar VarScan.jar somatic \
\
@@ -318,7 +292,7 @@ java -jar VarScan.jar somatic \
--output-vcf
```
-__Step 3:__ Varscan ProcessSomatic; Varscan.v2.3.9
+__Step 3:__ Varscan ProcessSomatic; Varscan.v2
```Shell
java -jar VarScan.jar processSomatic \
\
@@ -327,6 +301,123 @@ java -jar VarScan.jar processSomatic \
--p-value 0.07
```
+#### Pindel
+
+__Step 1:__ Filter Reads
+
+Filter BAM reads that are not unmapped or duplicate or secondary_alignment or failed_quality_control or supplementary for both tumor and normal BAM files
+
+Tool: sambamba
+
+```Shell
+Sambamba view $(input.bam) --filter "not (unmapped or duplicate or secondary_alignment or failed_quality_control or supplementary)" --format bam --nthreads 1 --output-filename $(output.bam)
+```
+
+__Step 2:__ Pindel
+
+[Pindel Repo](https://github.com/genome/pindel/releases/tag/v0.2.5b8)
+
+__Step 2a.:__ Calculate mean insert size
+```Python
+cmd = "samtools view -f66 %s | head -n 1000000" % (bam)
+output = do_shell_command(cmd)
+lines = output.decode('utf-8').split('\n')
+b_sum = 0
+b_count = 0
+numlines = 0
+for line in lines:
+ numlines += 1
+ tmp = line.split("\t")
+ if len(tmp) < 9:
+ break
+ if abs(int(tmp[8])) < 10000:
+ b_sum += abs(int(tmp[8]))
+ b_count += 1
+try:
+ mean = b_sum / b_count
+```
+__Step 2b.:__ Write it to a config file
+```Python
+for inputBamFile, meanInsertSize, tag in zip(inputBamFiles, meanInsertSizes, tags):
+ fil.write("%s\t%s\t%s\n" %(inputBamFile, meanInsertSize, tag))
+ fil.close()
+```
+__Step 2c.:__ Run pindel
+```Shell
+pindel \
+-f GRCh38.d1.vd1.fa \
+-i config_file \
+-o $(output_prefix) \
+--exclude GRCh38.d1.vd1.centromeres.telomeres.bed
+
+```
+__Step 2d.:__ Merge DI and SI OUTPUT
+```Python
+with open(os.path.join(args.workdir, "pindel_somatic"), "w") as handle:
+ for p in pindel_files:
+ if p.endswith("_D"):
+ with open(p) as ihandle:
+ for line in ihandle:
+ if re.search("ChrID", line):
+ handle.write(line)
+ for p in pindel_files:
+ if p.endswith("_SI"):
+ with open(p) as ihandle:
+ for line in ihandle:
+ if re.search("ChrID", line):
+ handle.write(line)
+```
+__Step 2e.:__ Create a config for pindel somatic filter
+```Python
+indel.filter.input = $(merged.pindel.output)
+indel.filter.vaf = 0.08
+indel.filter.cov = 20
+indel.filter.hom = 6
+indel.filter.pindel2vcf = "/path/to/pindel/pindel2vcf4tcga"
+indel.filter.reference = "GRCh38.d1.vd1.fa"
+indel.filter.referencename = "GRCh38"
+indel.filter.referencedate = datetime.datetime.now().strftime("%Y%m%d")
+indel.filter.output = $(output.file.name.vcf)
+
+```
+__Step 2f.:__ Apply somatic filter on pindel output
+Tool: pindel2vcf4tcga
+```Perl
+perl pindel/somatic_filter/somatic_indelfilter.pl $(somatic.indel.filter.config)
+```
+__Step 3:__ Pindel
+Tool: Picard.jar 2
+```Shell
+java \
+-d64 \
+-XX: +UseSerialGC \
+-Xmx16G \
+-jar picard.jar \
+SortVcf \
+CREATE_INDEX=true \
+SEQUENCE_DICTIONARY=GRCh38.d1.vd1.dict \
+I=$(pindel.somatic.vcf) \
+OUTPUT=$(output.vcf.gz)
+```
+__Step 5:__ Vt Normalization
+Tool: GenomeAnalysisTK.jar nightly-2016-02-25-gf39d340
+
+```Shell
+java \
+-Xmx4G \
+-jar \
+/bin/GenomeAnalysisTK.jar \
+-T VariantFiltration \
+--disable_auto_index_creation_and_locking_when_reading_rods \
+--variant $(vt.normal.output.vcf.gz) \
+-R GRCh38.d1.vd1.fa \
+--filterExpression vc.isBiallelic() && vc.getGenotype(\"TUMOR\").getAD().1 < 3" \
+--filterName TALTDP \
+-o $(output.vcf.gz)
+
+
+```
+
### Variant Call Annotation Workflow
Raw VCF files are then annotated in the [Somatic Annotation Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=somatic_annotation_workflow) with the [Variant Effect Predictor (VEP)](https://www.ensembl.org/info/docs/tools/vep/index.html) v84 [[6]](http://dx.doi.org/10.1093/bioinformatics/btq330) along with VEP GDC plugins.
@@ -345,17 +436,115 @@ The VEP uses the coordinates and alleles in the VCF file to infer biological con
Due to licensing constraints COSMIC is not utilized for annotation in the GDC VEP workflow.
-In addition to annotation, [False Positive Filter](https://github.com/ucscCancer/fpfilter-tool) is used to label low quality variants in VarScan and SomaticSniper outputs. Variants with SSQ < 25 in SomaticSniper are also removed.
+In addition to annotation, [False Positive Filter](https://github.com/ucscCancer/fpfilter-tool) is used to label low quality variants in VarScan.
| I/O | Entity | Format |
|---|---|---|
| Input | [Simple Somatic Mutation](/Data_Dictionary/viewer/#?view=table-definition-view&id=simple_somatic_mutation) | VCF |
| Output | [Annotated Somatic Mutation](/Data_Dictionary/viewer/#?view=table-definition-view&id=annotated_somatic_mutation) | VCF |
-### Somatic Aggregation Workflow
+### Tumor-Only Variant Calling Workflow
+
+Tumor only variant calling is performed on a tumor sample with no paired normal at the request of the research group. This method takes advantage of the normal cell contamination that is present in most tumor samples. These calls are made using the version of MuTect2 included in GATK4. Tumor-only variant call files can be found in the GDC Portal by filtering for "Workflow Type: GATK4 MuTect2".
+
+### Tumor-Only Variant Call Command-Line Parameters
+```
+GATK4 v4
+
+## 1. Generate OXOG metrics:
+
+java -d64 -XX:+UseSerialGC -Xmx3G -jar /gatk/gatk.jar \
+CollectSequencingArtifactMetrics \
+-I Tumor_Sample_Alignment.bam \
+-O \
+--FILE_EXTENSION .txt \
+-R GRCh38.d1.vd1.fa ## Only chr1-22 + XYM
+
+## 2. Generate pileup summaries on tumor sample:
+
+java -d64 -XX:+UseSerialGC -Xmx3G -jar /gatk/gatk.jar \
+GetPileupSummaries
+-I Tumor_Sample_Alignment.bam \
+-O .targeted_sequencing.table \
+-V af-only-gnomad-common-biallelic.grch38.main.vcf.gz \ # Germline reference from gnomad
+-L intervals.bed \ ## Only chr1-22 + XYM
+-R GRCh38.d1.vd1.fa
+
+## 3. Calculate contamination on tumor sample
+
+java -d64 -XX:+UseSerialGC -Xmx3G -jar /gatk/gatk.jar \
+CalculateContamination \
+-I .targeted_sequencing.table \ # From step 2
+-O .targeted_sequencing.contamination.table
+
+## 4. Find tumor sample name from BAM
+
+java -d64 -XX:+UseSerialGC -Xmx3G -jar /gatk/gatk.jar \
+GetSampleName \
+-I Tumor_Sample_Alignment.bam \
+-O .targeted_sequencing.sample_name
+
+## 5. Run MuTect2 using only tumor sample on chromosome level (25 commands with different intervals)
+
+java -Djava.io.tmpdir=/tmp/job_tmp_3 -d64 -jar -Xmx3G -XX:+UseSerialGC \
+/bin/gatk-4.2.4.0/gatk-package-4.2.4.0-local.jar \
+Mutect2 \
+-R GRCh38.d1.vd1.fa \
+-L chr4:1-190214555 \ # Specify chromosome
+-I Tumor_Sample_Alignment.bam \
+-O 3.mt2.vcf \
+-tumor \ # From step 4
+--af-of-alleles-not-in-resource 2.5e-06 \
+--germline-resource af-only-gnomad.hg38.vcf.gz \ # Germline reference from gnomad
+-pon gatk4_mutect2_4136_pon.vcf.gz # New panel of normal created by 4136 TCGA curated normal samples, using GATK4
+
+## After this step, all chromosome level VCFs are merged into one.
+
+## 6. Sort VCF with Picard
+
+java -d64 -XX:+UseSerialGC -Xmx16G -jar /usr/local/bin/picard.jar \
+SortVcf \
+SEQUENCE_DICTIONARY=GRCh38.d1.vd1.dict \
+OUTPUT=.targeted_sequencing.mutect2.tumor_only.sorted.vcf.gz \
+I=merged_multi_gatk4_mutect2_tumor_only_calling.vcf \ # From step 5
+CREATE_INDEX=true
+
+## 7. Filter variant calls from MuTect
+java -d64 -XX:+UseSerialGC -Xmx3G -jar /gatk/gatk.jar \
+FilterMutectCalls \
+-O .targeted_sequencing.mutect2.tumor_only.contFiltered.vcf.gz \
+-V .targeted_sequencing.mutect2.tumor_only.sorted.vcf.gz \ # From step 6
+--contamination-table .targeted_sequencing.contamination.table \ # From step 3
+-L intervals.bed
+
+## 8. Filter variants by orientation bias
+java -d64 -XX:+UseSerialGC -Xmx3G -jar /gatk/gatk.jar \
+FilterByOrientationBias \
+-O .targeted_sequencing.tumor_only.gatk4_mutect2.raw_somatic_mutation.vcf.gz \ # final output
+-P .pre_adapter_detail_metrics.txt \ # From step 1
+-V .targeted_sequencing.mutect2.tumor_only.contFiltered.vcf.gz \ # From step 7
+-L intervals.bed \
+-R GRCh38.d1.vd1.fa \
+-AM G/T \
+-AM C/T
+```
+
+### Tumor-Only Variant Annotation Workflow
-The Somatic Aggregation Workflow generates one MAF file from multiple VCF files; see the [GDC MAF Format](/Data/File_Formats/MAF_Format/) guide for details on file structure. In this step, one MAF file is generated per variant calling pipeline for each project, and contains all available cases within this project.
+After single-tumor variant calling is performed with MuTect2, a series of filters are applied to minimize the release of germline variants in downloadable VCFs. In all cases, the GDC applies a set of custom filters based on allele frequency, mapping quality, somatic/germline probability, and copy number. In some cases an additional variant classification step is applied before the GDC filters.
+The [PureCN](https://bioconductor.org/packages/devel/bioc/html/PureCN.html) R-package [[7]](https://doi.org/10.1186/s13029-016-0060-z) [[8]](https://doi.org/10.1101/552711) is used to classify the variants by somatic/germline status and clonality based on tumor purity, ploidy, contamination, copy number, and loss of heterozygosity. The following steps are performed with this package:
+
+* __Interval Capture__ : Generates an interval file using a FASTA and BED file coordinates.
+* __GC-Normalization__ : Calculates GC-normalized tumor/normal coverage data.
+* __Normal DB Creation__ : Generates a normal database using the normalized coverage file and panel-of-normals VCF
+* __Somatic Variant Calling__ : Classifies each of the previously called variants
+
+Note that PureCN will not be performed if there is insufficient data to produce a target capture kit specific normal database. In rare occasions, PureCN may not find a numeric solution. If PureCN is not performed or does not find a solution, this is indicated in the VCF header. VCF files that were annotated with these pipelines can be found in the GDC Portal by filtering for "Workflow Type: GATK4 MuTect2 Annotation".
+
+### Somatic Aggregation Workflow
+
+The Somatic Aggregation Workflow generates one MAF file from multiple VCF files; see the [GDC MAF Format](/Data/File_Formats/MAF_Format/) guide for details on file structure. In this step, one MAF file is generated per variant calling pipeline for each project and contains all available cases within this project.
| I/O | Entity | Format |
|---|---|---|
@@ -366,15 +555,65 @@ The Somatic Aggregation Workflow generates one MAF file from multiple VCF files;
The MAF files generated by Somatic Aggregation Workflow are controlled-access due to the presence of germline mutations. Open-access MAF files are modified for public release by removing columns and variants that could potentially contain germline mutation information. See the GDC [MAF Format](../File_Formats/MAF_Format/) for details about the criteria used to remove variants.
-While these criteria cause the pipeline to over-filter some of the true positive somatic variants in open-access MAF files, they prevent personally identifiable germline mutation information from becoming publicly available. The GDC recommends that investigators explore both controlled and open-access MAF files if omission of certain somatic mutations is a concern.
+While these criteria cause the pipeline to over-filter some of the true positive somatic variants in open-access MAF files, they prevent personally identifiable germline mutation information from becoming publicly available. The GDC recommends that investigators explore both controlled and open-access MAF files if omission of certain somatic mutations is a concern.
| I/O | Entity | Format |
|---|---|---|
| Input | [Aggregated Somatic Mutation](/Data_Dictionary/viewer/#?view=table-definition-view&id=aggregated_somatic_mutation) | Protected MAF |
| Output | [Masked Somatic Mutation](/Data_Dictionary/viewer/#?view=table-definition-view&id=masked_somatic_mutation) | Somatic MAF |
-## File Access and Availability
+### Whole Genome Sequencing Variant Calling
+
+Variant calls are generated from WGS data using a different pipeline than WXS and Targeted Sequencing samples. This pipeline, based on a [workflow generated by the Sanger Institute](https://github.com/cancerit/dockstore-cgpwgs), generates multiple downstream data types using the following software packages:
+
+* __CaVEMan:__ Single nucleotide variants, which are available in [VCF format](https://docs.gdc.cancer.gov/Data/File_Formats/VCF_Format/).
+* __Pindel:__ Small indel variants, which are available in [VCF format](https://docs.gdc.cancer.gov/Data/File_Formats/VCF_Format/).
+* __BRASS:__ Structural variants, which are available in *BEDPE format*.
+* __AscatNGS:__ Copy number variants, which are available as copy number estimates or copy number segment files, data may be available in *tab separated values (.TSV) or plain text file (.TXT)*
+#### BEDPE File Format
+
+[BEDPE file format](https://bedtools.readthedocs.io/en/latest/content/general-usage.html#bedpe-format), (**b**rowser **e**xtensible **d**ata **p**aired-**e**nd) is designed to concisely describe disjoint genome features, such as structural variations or paired-end sequence alignments. It's an enhanced version of the [BED format](http://genome.ucsc.edu/FAQ/FAQformat#format1), as BED does not allow inter-chromosomal feature definitions. In addition, BED only has one strand field, which is insufficient for paired-end sequence alignments, especially when studying structural variation. The BEDPE format is described below.
+
+
+* __chr*x* (required):__ The name of the chromosome on which the *x*th end of the feature exists. (x is 1 or 2). Any string can be used. For example, "chr1", "III", "myChrom", "contig1112.23" (use "." for unknown).
+* __start*x* (required):__ The zero-based starting position of the **first** end of the feature on chr*x*. The first base in a chromosome is numbered 0. The start position in each BEDPE feature is therefore interpreted to be 1 greater than the start position listed in the feature (use -1 for unknown).
+* __end*x* (required):__ The one-based ending position of the first end of the feature on chr*x*. The end position in each BEDPE feature is one-based (use -1 for unknown).
+* __name (optional):__ Defines the name of the BEDPE feature. Any string can be used.
+* __score (optional):__ A score between 0 and 1000. If the track line *useScore* attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). Any string can be used.
+* __strand*x* (optional):__ Defines the strand for the *x*th end of the feature. Either "." (unknown), "+", or "-".
+
+In addition to the above fields, bedtools allows for the addition of user-defined fields to the normal, 10-column BEDPE format as necessary. These columns are merely "passed through" pairToBed and pairToPair and are not part of any analysis. One would use these additional columns to add extra information (e.g., edit distance for each end of an alignment, or "deletion", "inversion", etc.) to each BEDPE feature.
+
+#### CNV from WGS File Format
+
+AscatNGS, originally developed by [Raine *et al* (2016)]( https://doi.org/10.1002/cpbi.17) ([GitHub page](https://github.com/cancerit)), indicates the DNA copy number changes affecting a tumor genome when comparing to a matched normal sample. See below for a description of the copy number segment and copy number estimation files produced by AscatNGS:
+
+* __GDC Aliquot:__ The GDC ID for the aliquot collected from the sample (copy number segment files only).
+* __Gene ID:__ The gene ENSMBL ID (copy number variant only).
+* __Gene Name:__ The gene symbol (copy number variant only).
+* __Chromosome:__ The name of the chromosome on which the copy number change exists.
+* __Start:__ The starting position of the copy.
+* __End:__ The ending position of the copy.
+* __Copy Number:__ The weighted median of the strand copy numbers [9].
+* __Major Copy Number:__ The greater strand copy number of the two strands of the DNA (copy number segment files only).
+* __Minor Copy number:__ The smaller strand copy number of the two strands of the DNA (copy number segment files only).
+* __Max. Copy number:__ The highest copy number for overlapped segment (copy number variant only).
+* __Min. Copy number:__ The lowest copy number for overlapped segment (copy number variant only).
+
+## Microsatellite Instability Detection
+
+The GDC adopts [MSIsensor2](https://github.com/niu-lab/msisensor2) to derive Microsatellite Instability (MSI) information from tumor DNA-Seq data. The MSIsensor2 software uses only the tumor BAM as input, and calculates the numeric MSI score (number of msi sites / all valid sites). The MSI status of MSI (Microsatellite Instable) or MSS (Microsatellite Stable) is then determined using a MSI score cutoff value of 20%.
+
+The output `msi_score` and `msi_status` values are stored directly as properties of the `aligned_reads` (BAM files), and can be [accessible via API](https://api.gdc.cancer.gov/files/82488c57-9789-449c-a09d-594172381dc1?pretty=true&fields=msi_score,msi_status,file_id,file_name). In addition, the portal/API can be filtered using these proprties by choosing "Add a File Filter" in the [Repository Page](https://portal.gdc.cancer.gov/v1/repository) and selecting `msi_score` or `msi_status`.
+
+Please note:
+
+1. MSI status generated from DNA-Seq by the GDC is considered bioinformatics-derived information, and is not considered clinical data. If performed by the clinical lab, the clinical MSI test result would be stored as a `laboratory_test` in the molecular_test entity.
+2. MSIsensor2 can theoretically be applied to WGS, WXS, or Targeted Sequencing data. Given the number of MSI sites available in some Targeted Sequencing data, please consider the results carefully.
+3. It is possible that multiple MSI statuses exist within the same sample/case if more than one DNA-Seq BAM was generated. It is the users' responsibility to check for their consistency, especially when the MSI scores are close to 20%.
+
+## File Access and Availability
Files from the GDC DNA-Seq analysis pipeline are available in the [GDC Data Portal](https://portal.gdc.cancer.gov) in BAM, VCF, and MAF formats. Descriptions are listed below for all available data types and their respective file formats.
@@ -398,4 +637,10 @@ Files from the GDC DNA-Seq analysis pipeline are available in the [GDC Data Port
[5]. Larson, David E., Christopher C. Harris, Ken Chen, Daniel C. Koboldt, Travis E. Abbott, David J. Dooling, Timothy J. Ley, Elaine R. Mardis, Richard K. Wilson, and Li Ding. "SomaticSniper: identification of somatic point mutations in whole genome sequencing data." Bioinformatics 28, no. 3 (2012): 311-317.
-[6] McLaren, William, Bethan Pritchard, Daniel Rios, Yuan Chen, Paul Flicek, and Fiona Cunningham. "Deriving the consequences of genomic variants with the Ensembl API and SNP Effect Predictor." Bioinformatics 26, no. 16 (2010): 2069-2070.
+[6]. McLaren, William, Bethan Pritchard, Daniel Rios, Yuan Chen, Paul Flicek, and Fiona Cunningham. "Deriving the consequences of genomic variants with the Ensembl API and SNP Effect Predictor." Bioinformatics 26, no. 16 (2010): 2069-2070.
+
+[7]. Riester, Markus, Angad P. Singh, A. Rose Brannon, Kun Yu, Catarina D. Campbell, Derek Y. Chiang, and Michael P. Morrissey. "PureCN: copy number calling and SNV classification using targeted short read sequencing." Source code for biology and medicine 11, no. 1 (2016): 13.
+
+[8]. Oh, Sehyun, Ludwig Geistlinger, Marcel Ramos, Martin Morgan, Levi Waldron, and Markus Riester. "Reliable analysis of clinical tumor-only whole exome sequencing data" bioRxiv 552711 (2019);
+
+[9]. Gene-level copy number data is generated by intersection of copy number segment and gene ranges. It is possible for one gene to overlap with multiple segments, and in this case, copy_number, min_copy_number and max_copy_number could take different values. In particular, the copy_number value is calculated as the median, weighted on length of overlapped bases, of segment copy numbers from all overlapped segments.
diff --git a/docs/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline.md b/docs/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline.md
index 0519ef59d..a45f0dbfe 100644
--- a/docs/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline.md
+++ b/docs/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline.md
@@ -1,14 +1,20 @@
# mRNA Analysis Pipeline
## Introduction
-The GDC mRNA quantification analysis pipeline measures gene level expression in [HT-Seq](http://www-huber.embl.de/HTSeq/doc/overview.html) raw read count, Fragments per Kilobase of transcript per Million mapped reads (FPKM), and FPKM-UQ (upper quartile normalization). These values are generated through this pipeline by first aligning reads to the GRCh38 [reference genome](https://gdc.cancer.gov/download-gdc-reference-files) and then by quantifying the mapped reads. To facilitate harmonization across samples, all RNA-Seq reads are treated as unstranded during analyses.
+The GDC mRNA quantification analysis pipeline measures gene level expression with [STAR](https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf) as raw read counts. Subsequently the counts are augmented with several transformations including Fragments per Kilobase of transcript per Million mapped reads (FPKM), upper quartile normalized FPKM (FPKM-UQ), and Transcripts per Million (TPM). These values are additionally annotated with the gene symbol and gene bio-type. These data are generated through this pipeline by first aligning reads to the GRCh38 [reference genome](https://gdc.cancer.gov/download-gdc-reference-files) and then by quantifying the mapped reads. To facilitate harmonization across samples, all RNA-Seq reads are treated as unstranded during analyses.
## Data Processing Steps
### RNA-Seq Alignment Workflow
-The mRNA Analysis pipeline begins with the [Alignment Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=alignment_workflow), which is performed using a two-pass method with [STAR](http://labshare.cshl.edu/shares/gingeraslab/www-data/dobin/STAR/STAR.posix/doc/STARmanual.pdf). STAR aligns each [read group](/Data_Dictionary/viewer/#?view=table-definition-view&id=read_group) separately and then merges the resulting alignments into one. Following the methods used by the International Cancer Genome Consortium [ICGC](https://icgc.org/) ([github](https://github.com/akahles/icgc_rnaseq_align)), the two-pass method includes a splice junction detection step, which is used to generate the final alignment. This workflow outputs a BAM file, which contains both aligned and unaligned reads. Quality assessment is performed pre-alignment with [FASTQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) and post-alignment with [RNA-SeQC](https://www.broadinstitute.org/cancer/cga/rna-seqc) and [Picard Tools](http://broadinstitute.github.io/picard/).
+The mRNA Analysis pipeline begins with the [Alignment Workflow](/Data_Dictionary/viewer/#?view=table-definition-view&id=alignment_workflow), which is performed using a two-pass method with [STAR](https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf). STAR aligns each [read group](/Data_Dictionary/viewer/#?view=table-definition-view&id=read_group) separately and then merges the resulting alignments into one. Following the methods used by the International Cancer Genome Consortium [ICGC](https://icgc.org/) ([github](https://github.com/akahles/icgc_rnaseq_align)), the two-pass method includes a splice junction detection step, which is used to generate the final alignment. This workflow outputs a genomic BAM file, which contains both aligned and unaligned reads. Quality assessment is performed pre-alignment with [FASTQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) and post-alignment with [Picard Tools](http://broadinstitute.github.io/picard/).
-[](images/gene-expression-quantification-pipeline.png "Click to see the full image.")
+Files that were processed after Data Release 14 have associated transcriptomic and chimeric alignments in addition to the genomic alignment detailed above. This only applies to aliquots with at least one set of paired-end reads. The chimeric BAM file contains reads that were mapped to different chromosomes or strands (fusion alignments). The genomic alignment files contain chimeric and unaligned reads to facilitate the retrieval of all original reads. The transcriptomic alignment reports aligned reads with transcript coordinates rather than genomic coordinates. The transcriptomic alignment is also sorted differently to facilitate downstream analyses. BAM index file pairing is not supported by this method of sorting, which does not allow for BAM slicing on these alignments. The splice-junction file for these alignments are also available.
+
+Files that were processed after Data Release 25 will have associated [gene fusion files](#fusion-pipelines).
+
+As of Data Release 32 the reference annotation will be updated to GENCODE v36 and HT-Seq will no longer be used.
+
+[](images/RNA-Seq-DR32_Image.png "Click to see the full image.")
| I/O | Entity | Format |
|---|---|---|
@@ -17,13 +23,13 @@ The mRNA Analysis pipeline begins with the [Alignment Workflow](/Data_Dictionary
### RNA-Seq Alignment Command Line Parameters
-####STAR-2.4.2a
+__Note that version numbers may vary in files downloaded from the GDC Data Portal due to ongoing pipeline development and improvement.__
-####ICGC STAR alignment pipeline
+```Original
+# STAR-2
-__For users with access to the ICGC pipeline:__
+### For users with access to the ICGC pipeline:
-```Shell
python star_align.py \
--genomeDir \
--FastqFileIn \
@@ -46,24 +52,21 @@ python star_align.py \
--sjdbOverhang 100 \
--outSAMstrandField intronMotif \
--outSAMunmapped Within
-```
-__For users without access to the ICGC pipeline:__
+### For users without access to the ICGC pipeline:
+
+### Step 1: Building the STAR index.*
-#### Step 1: Building the STAR index.*
-```Shell
STAR
--runMode genomeGenerate
--genomeDir
--genomeFastaFiles
--sjdbOverhang 100
---sjdbGTFfile
+--sjdbGTFfile
--runThreadN 8
-```
-\*These indices are available for download at the [GDC Website](https://gdc.cancer.gov/about-data/data-harmonization-and-generation/gdc-reference-files) and do not need to be built again.
-#### Step 2: Alignment 1st Pass.
-```Shell
+### Step 2: Alignment 1st Pass.
+
STAR
--genomeDir
--readFilesIn ,,... ,,...
@@ -83,9 +86,9 @@ STAR
--outSAMstrandField intronMotif
--outSAMtype None
--outSAMmode None
-```
-#### Step 3: Intermediate Index Generation.
-```Shell
+
+### Step 3: Intermediate Index Generation.
+
STAR
--runMode genomeGenerate
--genomeDir
@@ -93,9 +96,9 @@ STAR
--sjdbOverhang 100
--runThreadN
--sjdbFileChrStartEnd
-```
-#### Step 4: Alignment 2nd Pass.
-```Shell
+
+### Step 4: Alignment 2nd Pass.
+
STAR
--genomeDir
--readFilesIn ,,... ,,...
@@ -120,54 +123,152 @@ STAR
--outSAMheaderHD @HD VN:1.4
--outSAMattrRGline
```
+```DR15Plus
+# STAR-2
+
+STAR \
+--readFilesIn \
+--outSAMattrRGline \
+--alignIntronMax 1000000 \
+--alignIntronMin 20 \
+--alignMatesGapMax 1000000 \
+--alignSJDBoverhangMin 1 \
+--alignSJoverhangMin 8 \
+--alignSoftClipAtReferenceEnds Yes \
+--chimJunctionOverhangMin 15 \
+--chimMainSegmentMultNmax 1 \
+--chimOutType Junctions SeparateSAMold WithinBAM SoftClip \
+--chimSegmentMin 15 \
+--genomeDir \
+--genomeLoad NoSharedMemory \
+--limitSjdbInsertNsj 1200000 \
+--outFileNamePrefix \
+--outFilterIntronMotifs None \
+--outFilterMatchNminOverLread 0.33 \
+--outFilterMismatchNmax 999 \
+--outFilterMismatchNoverLmax 0.1 \
+--outFilterMultimapNmax 20 \
+--outFilterScoreMinOverLread 0.33 \
+--outFilterType BySJout \
+--outSAMattributes NH HI AS nM NM ch \
+--outSAMstrandField intronMotif \
+--outSAMtype BAM Unsorted \
+--outSAMunmapped Within \
+--quantMode TranscriptomeSAM GeneCounts \
+--readFilesCommand \
+--runThreadN \
+--twopassMode Basic
+```
+```DR32
+# STAR Genome Index
+STAR
+--runMode genomeGenerate
+--genomeDir
+--genomeFastaFiles
+--sjdbOverhang 100
+--sjdbGTFfile
+--runThreadN 8
+
+# STAR Alignment
+# STAR v2
+STAR
+--readFilesIn \
+--outSAMattrRGline \
+--genomeDir \
+--readFilesCommand \
+--runThreadN \
+--twopassMode Basic \
+--outFilterMultimapNmax 20 \
+--alignSJoverhangMin 8 \
+--alignSJDBoverhangMin 1 \
+--outFilterMismatchNmax 999 \
+--outFilterMismatchNoverLmax 0.1 \
+--alignIntronMin 20 \
+--alignIntronMax 1000000 \
+--alignMatesGapMax 1000000 \
+--outFilterType BySJout \
+--outFilterScoreMinOverLread 0.33 \
+--outFilterMatchNminOverLread 0.33 \
+--limitSjdbInsertNsj 1200000 \
+--outFileNamePrefix