Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
766 changes: 766 additions & 0 deletions 02_normalize_batchcorrect_omics.R

Large diffs are not rendered by default.

841 changes: 0 additions & 841 deletions 02_normalize_harmonize_proteomics.html

This file was deleted.

150 changes: 150 additions & 0 deletions 02_run_normalize_omics.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
---
title: "run_normalize_omics"
author: "JJ"
date: "2025-11-04"
output: html_document
---

# Get Helper Scripts
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(synapser)
synLogin()

syn <- list(get = synapser::synGet, store = synapser::synStore)


# Load helper metadata (your cNF_helper_code.R defines 'meta' and 'pcols')
source("cNF_helper_code.R")

# Source the pipeline
source("02_normalize_batchcorrect_omics.R")

```


# Run batch correction / normalization across phospho, global, and rna samples.

```{r}
# ---------------------------------------------------------------------------
# run_modality() — quick reference for args & expected batch structure
# ---------------------------------------------------------------------------
# Args:
#
# - modality : "global" | "phospho" | "rna" (case-insensitive).
# - batches : list of cohort specs; each has:
# syn_id, cohort, fname_aliquot_index, (optional) value_start_col
# (auto-detects; fallback = 5 for global/phospho, 3 for rna).
# - meta : data.frame joined by (aliquot, cohort). Uses Specimen/Patient/Tumor
# if present. For RNA, falls back to normalized Specimen matching.
# - syn : synapser client (e.g., syn <- synLogin()).
# - drop_name_substrings : character vector; OR-regex to drop sample cols (NULL = keep all).
# - out_dir : output dir (auto-created).
# - out_prefix: filename stem (used unless save_basename set).
# - upload_parent_id : Synapse folder ID for uploads (NULL = no upload).
# - pcols : named colors for Patient in PCA (optional).
# - write_outputs : TRUE = write CSV/PDF (+upload if parent set); FALSE = in-memory only.
# - save_basename : override file stem (else out_prefix).
# - do_batch_correct : TRUE = ComBat by cohort; FALSE = skip (adds *_noBatchCorrect).
#
# Per-modality normalization
# - phospho : 0 toNA to drop >50% missing to log2(x+0.01) to per-sample modified z.
# - global : log2(x) to per-sample modified z.
# - rna : drop >50% missing to log2(TPM+1) to per-sample modified z.
#
# Returns (list)
# - se_batches : per-batch normalized SEs.
# - se_combined: intersection-features combined SE.
# - se_corrected: post-ComBat SE (NULL if do_batch_correct=FALSE).
# - se_post : SE used “post” (se_corrected or se_combined).
# - did_combat: TRUE/FALSE.
# - long_pre : long table from se_combined (finite only).
# - long_post : long table from se_post.
# - pca_df_pre, pca_df_post : PCA inputs (complete-case features).
# - plots : pre_pca, pre_hist, pca, hist (ggplot).
# - files : if write_outputs=TRUE, $queued = written file paths.
#
# Notes
# - ComBat drops samples with NA cohort; requires colData(se)$cohort.
# - Global: splits multi-symbol Genes by ';'. RNA aliquot may be NA (Specimen fallback).
# - Sample headers auto-detected if path-like (*.raw, *.mzML, paths).
# ---------------------------------------------------------------------------



# Substrings to drop (These were the protocol optimization samples)
drop_subs <- c(
"cNF_organoid_DIA_G_02_11Feb25",
"cNF_organoid_DIA_G_05_11Feb25",
"cNF_organoid_DIA_G_06_11Feb25",
"cNF_organoid_DIA_P_02_29Jan25",
"cNF_organoid_DIA_P_05_11Feb25",
"cNF_organoid_DIA_P_06_11Feb25"
)

# PHOSPHO BATCHES
phospho_batches <- list(
list(syn_id = "syn69963552", cohort = 1, value_start_col = 5, fname_aliquot_index = 8),
list(syn_id = "syn69947351", cohort = 2, value_start_col = 5, fname_aliquot_index = 9)
)

# Run
phospho <- run_modality(
modality = "Phospho",
batches = phospho_batches,
meta = meta,
syn = syn,
drop_name_substrings = drop_subs,
out_dir = "phospho_test",
out_prefix = "phospho",
upload_parent_id = "syn70078365",
pcols = pcols,
write_outputs = FALSE,
save_basename = "phospho_batch12_corrected",
do_batch_correct = TRUE
)

# GLOBAL BATCHES
global_batches <- list(
list(syn_id = "syn69947355", cohort = 1, value_start_col = 5, fname_aliquot_index = 6),
list(syn_id = "syn69947352", cohort = 2, value_start_col = 5, fname_aliquot_index = 7)
)

global <- run_modality(
modality = "Global",
batches = global_batches,
meta = meta,
syn = syn,
drop_name_substrings = drop_subs,
out_dir = "global_test",
out_prefix = "global",
upload_parent_id = "syn70078365",
pcols = pcols,
write_outputs = FALSE,
save_basename = "global_batch12_corrected",
do_batch_correct = TRUE
)


# RNA BATCHES
rna_batches <- list(
list(syn_id = "syn66352931", cohort = 1, value_start_col = 5, fname_aliquot_index = 6),
list(syn_id = "syn70765053", cohort = 2, value_start_col = 5, fname_aliquot_index = 7)
)

rna <- run_modality(
modality = "rna",
batches = rna_batches,
meta = meta,
syn = syn,
drop_name_substrings = drop_subs,
out_dir = "rna_test",
out_prefix = "rna",
upload_parent_id = "syn71099587",
pcols = pcols,
write_outputs = FALSE,
save_basename = "RNA_12_no_batch_correct",
do_batch_correct = FALSE #Note this is set to false right now. Not needed for RNA
)

```
Loading