diff --git a/.cirro/preprocess.py b/.cirro/preprocess.py
index ede348a..45a4c95 100644
--- a/.cirro/preprocess.py
+++ b/.cirro/preprocess.py
@@ -29,8 +29,8 @@
 # 3. Set workflow_level value based on form input
 ds.logger.info("Setting workflow_level")
 
-levels = ['convert', 'sample', 'compare']
-flags = [ds.params['convert_lvl'], ds.params['sample_lvl'], ds.params['compare_lvl']]
+levels = ['convert', 'sample', 'patient', 'compare']
+flags = [ds.params['convert_lvl'], ds.params['sample_lvl'], ds.params['patient_lvl'], ds.params['compare_lvl']]
 workflow_level = [lvl for lvl, flag in zip(levels, flags) if flag]
 
 ds.add_param('workflow_level', ','.join(workflow_level))
diff --git a/.cirro/process-form.json b/.cirro/process-form.json
index f8dfb68..7201afd 100644
--- a/.cirro/process-form.json
+++ b/.cirro/process-form.json
@@ -10,6 +10,13 @@
                 "type": "boolean",
                 "value": true
             },
+            "patient_lvl": {
+                "default": true,
+                "description": "Run clustering and comparison modules on samples grouped by patient of origin",
+                "title": "Patient",
+                "type": "boolean",
+                "value": true
+            },
             "compare_lvl": {
                 "default": true,
                 "description": "Comparative TCR repertoire analyses across samples and subjects",
@@ -18,10 +25,10 @@
                 "value": true
             },
             "olga_chunk_length": {
-                "default": 2000000,
+                "default": 100000,
                 "description": "Divide total CDR3 list into chunks of n length for processing by OLGA. Larger length = reduced parallelization",
                 "title": "olga_chunk_length",
-                "type": "int"
+                "type": "integer"
             },
             "distance_metric": {
                 "default": "tcrdist",
diff --git a/.cirro/process-input.json b/.cirro/process-input.json
index 57e325a..df5539b 100644
--- a/.cirro/process-input.json
+++ b/.cirro/process-input.json
@@ -2,6 +2,7 @@
     "input_format": "airr",
     "convert_lvl": false,
     "sample_lvl": "$.params.dataset.paramJson.sample_lvl",
+    "patient_lvl": "$.params.dataset.paramJson.patient_lvl",
     "compare_lvl": "$.params.dataset.paramJson.compare_lvl",
     "olga_chunk_length": "$.params.dataset.paramJson.olga_chunk_length",
     "matrix_sparsity": "sparse",
diff --git a/bin/patient_calc.py b/bin/patient_calc.py
new file mode 100755
index 0000000..27564f2
--- /dev/null
+++ b/bin/patient_calc.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+Optimized TCR repertoire overlap (single-file input, vectorized)
+"""
+
+import argparse
+import pandas as pd
+import numpy as np
+
+
+# -------------------------
+# Morisita–Horn (vectorized)
+# -------------------------
+def morisita_horn_matrix(M):
+    """
+    M: (n_samples x n_features) count matrix
+    Returns: (n x n) similarity matrix
+    """
+    X = M.sum(axis=1, keepdims=True)  # (n,1)
+
+    # Avoid division by zero
+    X_safe = np.where(X == 0, 1, X)
+
+    lambda_vals = (M ** 2).sum(axis=1, keepdims=True) / (X_safe ** 2)
+
+    # Pairwise dot products
+    prod = M @ M.T  # (n x n)
+
+    denom = (lambda_vals + lambda_vals.T) * (X @ X.T)
+
+    # Avoid division by zero
+    denom = np.where(denom == 0, 1, denom)
+
+    return (2 * prod) / denom
+
+
+# -------------------------
+# Main
+# -------------------------
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        description="Optimized overlap metrics for TCR repertoires"
+    )
+    parser.add_argument(
+        "-i", "--input",
+        required=True,
+        help="Concatenated TSV with sample, CDR3b, counts"
+    )
+    parser.add_argument(
+        "-p", "--patient",
+        required=True,
+        help="String indicating patient of samples"
+    )
+    args = parser.parse_args()
+
+    # -------------------------
+    # Load + clean
+    # -------------------------
+    df = pd.read_csv(
+        args.input,
+        sep="\t",
+        usecols=["sample", "CDR3b", "counts"]
+    )
+
+    df = df.dropna(subset=["sample", "CDR3b"])
+
+    df["counts"] = pd.to_numeric(
+        df["counts"], errors="coerce"
+    ).fillna(0)
+
+    # -------------------------
+    # Pre-aggregate (critical optimization)
+    # -------------------------
+    df = (
+        df.groupby(["sample", "CDR3b"], as_index=False)["counts"]
+        .sum()
+    )
+
+    samples = df["sample"].unique()
+    n = len(samples)
+
+    print(f"Loaded {n} samples")
+
+    # -------------------------
+    # Build count matrix
+    # -------------------------
+    pivot = df.pivot(
+        index="sample",
+        columns="CDR3b",
+        values="counts"
+    ).fillna(0)
+
+    # Ensure consistent ordering
+    pivot = pivot.loc[samples]
+
+    M = pivot.to_numpy(dtype=float)
+
+    # -------------------------
+    # Presence/absence matrix
+    # -------------------------
+    B = (M > 0).astype(np.int32)
+
+    # -------------------------
+    # Jaccard (vectorized)
+    # -------------------------
+    intersection = B @ B.T  # shared junction counts
+
+    row_sums = B.sum(axis=1, keepdims=True)
+    union = row_sums + row_sums.T - intersection
+
+    union = np.where(union == 0, 1, union)
+
+    jaccard = intersection / union
+
+    # -------------------------
+    # Sørensen (Dice)
+    # -------------------------
+    denom = row_sums + row_sums.T
+    denom = np.where(denom == 0, 1, denom)
+
+    sorensen = (2 * intersection) / denom
+
+    # -------------------------
+    # Morisita–Horn (vectorized)
+    # -------------------------
+    morisita = morisita_horn_matrix(M)
+
+    # -------------------------
+    # Fix diagonals
+    # -------------------------
+    np.fill_diagonal(jaccard, 1.0)
+    np.fill_diagonal(sorensen, 1.0)
+    np.fill_diagonal(morisita, 1.0)
+
+    # -------------------------
+    # Write outputs
+    # -------------------------
+    patient = args.patient
+    pd.DataFrame(jaccard, index=samples, columns=samples).to_csv(f"{patient}/jaccard_mat.csv")
+    pd.DataFrame(sorensen, index=samples, columns=samples).to_csv(f"{patient}/sorensen_mat.csv")
+    pd.DataFrame(morisita, index=samples, columns=samples).to_csv(f"{patient}/morisita_mat.csv")
+
+    print("Finished writing all matrices")
\ No newline at end of file
diff --git a/modules/local/annotate/main.nf b/modules/local/annotate/main.nf
index 2c65eb8..086fdbd 100644
--- a/modules/local/annotate/main.nf
+++ b/modules/local/annotate/main.nf
@@ -1,3 +1,57 @@
+process ANNOTATE_PROCESS {
+    tag "${sample_meta.sample}"
+    label 'process_low'
+    publishDir enabled: false
+
+    input:
+    tuple val(sample_meta), path(count_table)
+
+    output:
+    tuple val(sample_meta), path("${sample_meta.sample}_cdr3.tsv"), emit: "process"
+
+    script:
+    """
+    python - <<EOF
+import pandas as pd
+
+USECOLS = [
+    "junction_aa",
+    "v_call",
+    "j_call",
+    "duplicate_count"
+]
+
+COLMAP = {
+    "junction_aa": "CDR3b",
+    "v_call": "TRBV",
+    "j_call": "TRBJ",
+    "duplicate_count": "counts"
+}
+
+df = pd.read_csv(
+    "${count_table}",
+    sep='\t',
+    usecols=USECOLS,
+    dtype={
+        "junction_aa": "string",
+        "v_call": "string",
+        "j_call": "string",
+        "duplicate_count": "int"
+    })
+
+df = (
+    df[df.junction_aa.notna()]
+    .rename(columns=COLMAP)
+    [["CDR3b", "TRBV", "TRBJ", "counts"]]
+)
+df["sample"] = "${sample_meta.sample}"
+
+df.to_csv("${sample_meta.sample}_cdr3.tsv", sep="\t", index=False)
+
+EOF
+    """
+}
+
 process ANNOTATE_CONCATENATE {
     label 'process_low'
 
diff --git a/modules/local/compare/giana.nf b/modules/local/compare/giana.nf
index 7140448..d53ecc6 100644
--- a/modules/local/compare/giana.nf
+++ b/modules/local/compare/giana.nf
@@ -1,17 +1,18 @@
 process GIANA_CALC {
+    tag "${patient}"
     label 'process_medium'
 
     input:
-    path concat_cdr3
+    tuple val(patient), path(concat_cdr3)
     val threshold
     val threshold_score
     val threshold_vgene
 
     output:
-    path "VgeneScores.txt"
-    path "giana_RotationEncodingBL62.txt"
-    path "giana_EncodingMatrix.txt"
-    path "giana.log"
+    path "${patient}_VgeneScores.txt"
+    path "${patient}_giana.txt"
+    // path "giana_EncodingMatrix.txt"
+    // path "giana.log"
 
     script:   
     """
@@ -23,8 +24,8 @@ process GIANA_CALC {
         --threshold_score ${threshold_score} \
         --threshold_vgene ${threshold_vgene} \
         --NumberOfThreads ${task.cpus} \
-        --Verbose \
-        > giana.log 2>&1
+        --Verbose
+        # > giana.log 2>&1
 
     # Insert header after GIANA comments
     insert=\$(head -n 1 "${concat_cdr3}")
@@ -40,8 +41,9 @@ process GIANA_CALC {
     /^##/ { print; next }
     !inserted { print insert; inserted=1 }
     { print }
-    ' giana_RotationEncodingBL62.txt > tmp && mv tmp giana_RotationEncodingBL62.txt
+    ' giana_RotationEncodingBL62.txt > ${patient}_giana.txt
 
-    mv giana_RotationEncodingBL62.txt_EncodingMatrix.txt giana_EncodingMatrix.txt
+    # mv giana_RotationEncodingBL62.txt_EncodingMatrix.txt giana_EncodingMatrix.txt
+    mv VgeneScores.txt ${patient}_VgeneScores.txt
     """
 }
diff --git a/modules/local/compare/gliph2.nf b/modules/local/compare/gliph2.nf
index 68dd080..d1a4916 100644
--- a/modules/local/compare/gliph2.nf
+++ b/modules/local/compare/gliph2.nf
@@ -1,19 +1,20 @@
 process GLIPH2_TURBOGLIPH {
+    tag "${patient}"
     label 'process_high'
     label 'process_high_compute'
     label 'process_high_memory'
 
     input:
-    path concat_cdr3
+    tuple val(patient), path(concat_cdr3)
 
     output:
-    path "all_motifs.txt", emit: 'all_motifs'
-    path "clone_network.txt", emit: 'clone_network'
-    path "cluster_member_details.txt", emit: 'cluster_member_details'
-    path "convergence_groups.txt", emit: 'convergence_groups'
-    path "global_similarities.txt", emit: 'global_similarities'
-    path "local_similarities.txt", emit: 'local_similarities'
-    path "parameter.txt", emit: 'gliph2_parameters'
+    path "${patient}/all_motifs.txt", emit: 'all_motifs'
+    path "${patient}/clone_network.txt", emit: 'clone_network'
+    path "${patient}/cluster_member_details.txt", emit: 'cluster_member_details'
+    path "${patient}/convergence_groups.txt", emit: 'convergence_groups'
+    path "${patient}/global_similarities.txt", emit: 'global_similarities'
+    path "${patient}/local_similarities.txt", emit: 'local_similarities'
+    path "${patient}/parameter.txt", emit: 'gliph2_parameters'
     
     script:
     """
@@ -28,7 +29,7 @@ process GLIPH2_TURBOGLIPH {
 
     result <- turboGliph::gliph2(
         cdr3_sequences = df,
-        result_folder = "./",
+        result_folder = "./${patient}",
         lcminp = ${params.local_min_pvalue},
         sim_depth = ${params.simulation_depth},
         kmer_mindepth = ${params.kmer_min_depth},
@@ -37,16 +38,16 @@ process GLIPH2_TURBOGLIPH {
         n_cores = ${task.cpus}
     )
 
-    df3 <- read.csv('cluster_member_details.txt', sep = '\t', stringsAsFactors = FALSE, check.names = FALSE)
+    df3 <- read.csv('${patient}/cluster_member_details.txt', sep = '\t', stringsAsFactors = FALSE, check.names = FALSE)
     df3[,'sample'] <- df3[,'patient']
     df3 <- merge(df3, df[, c("CDR3b", "TRBV", "sample", 'counts')], by = c("CDR3b", "TRBV", "sample", 'counts'), all.x = TRUE)
     df3 <- df3[, c('CDR3b', 'TRBV', 'TRBJ', 'counts', 'sample', 'tag', 'seq_ID', 'ultCDR3b')]
-    write.table(df3, "cluster_member_details.txt", sep = "\t", row.names = FALSE, quote = FALSE)
+    write.table(df3, "${patient}/cluster_member_details.txt", sep = "\t", row.names = FALSE, quote = FALSE)
     EOF
 
     # Rename local_similarities file to standardize output name
-    input_file="local_similarities_*.txt"
-    cat \$input_file > local_similarities.txt
+    input_file="${patient}/local_similarities_*.txt"
+    cat \$input_file > ${patient}/local_similarities.txt
     """
 }
 
diff --git a/modules/local/olga/main.nf b/modules/local/olga/main.nf
index 93e608c..defe2d9 100644
--- a/modules/local/olga/main.nf
+++ b/modules/local/olga/main.nf
@@ -10,7 +10,21 @@ process OLGA_CALCULATE{
 
     script:
     """
-    olga-compute_pgen --humanTRB -i ${cdr3_chunk} -o "pgen_${cdr3_chunk}"
+    olga-compute_pgen --humanTRB -i ${cdr3_chunk} -o olga_pgen.tsv
+    cat olga_pgen.tsv \
+    | awk -F'\t' -v OFS='\t' '
+        BEGIN { log10_base = log(10) }
+        {
+            cdr3 = \$1
+            pgen = \$2
+
+            if (pgen == "NA" || pgen == "" || pgen == 0) {
+                print cdr3, pgen, "NA"
+            } else {
+                print cdr3, pgen, log(pgen)/log10_base
+            }
+        }
+    ' > "pgen_${cdr3_chunk}"
     """
 }
 
@@ -22,23 +36,39 @@ process OLGA_CONCATENATE {
 
     output:
     path "olga_pgen.tsv", emit: cdr3_pgen
+    path "olga_pgen.stats.tsv", emit: cdr3_pgen_stats
 
     script:
     """
-    printf "CDR3b\tpgen\tlog10_pgen\n" > olga_pgen.tsv
-    cat ${pgen_chunks} \
-        | awk -F'\t' -v OFS='\t' '
-            {
+    awk -F'\t' '
+        BEGIN {
+            max_len = 0
+            min_log = ""
+            max_log = ""
+        }
+
+        {
             cdr3 = \$1
-            pgen = \$2
-            if (pgen == "NA" || pgen == "" || pgen == 0) {
-                print cdr3, pgen, "NA"
-            } else {
-                print cdr3, pgen, log(pgen)/log(10)
-            }
+            log10 = \$3
+
+            len = length(cdr3)
+            if (len > max_len) max_len = len
+
+            if (log10 != "NA" && log10 != "") {
+                if (min_log == "" || log10 < min_log) min_log = log10
+                if (max_log == "" || log10 > max_log) max_log = log10
             }
-        ' \
-        >> olga_pgen.tsv
+        }
+
+        END {
+            printf "max_cdr3_length\t%d\\n", max_len > "olga_pgen.stats.tsv"
+            printf "min_log10_pgen\t%s\\n", min_log > "olga_pgen.stats.tsv"
+            printf "max_log10_pgen\t%s\\n", max_log > "olga_pgen.stats.tsv"
+        }
+    ' ${pgen_chunks}
+
+    printf "CDR3b\tpgen\tlog10_pgen\n" > olga_pgen.tsv
+    cat ${pgen_chunks} >> olga_pgen.tsv
     """
 }
 
@@ -76,45 +106,126 @@ process OLGA_SAMPLE_MERGE {
     input:
     tuple val(sample_meta), path(count_table)
     path cdr3_pgen
+    val olga_stats
 
     output:
     tuple val(sample_meta), path("${sample_meta.sample}_tcr_pgen.tsv"), emit: olga_pgen
-    path "olga_xmin_value.txt", emit: olga_xmin
-    path "olga_xmax_value.txt", emit: olga_xmax
 
     script:
     """
     # Extract vector of cdr3 aa, dropping null values
     python - <<EOF
-    import numpy as np
-    import pandas as pd
+import numpy as np
+import pandas as pd
+
+INDEX_FILE = "${cdr3_pgen}"
+REP_FILE = "${count_table}"
+OUTPUT_FILE = "${sample_meta.sample}_tcr_pgen.tsv"
 
-    df = pd.read_csv("${count_table}", sep="\t")
+CHUNKSIZE = 2_000_000
 
-    pgen = pd.read_csv("${cdr3_pgen}", sep="\t")
-    pgen = pgen.rename(columns={"CDR3b": "junction_aa"})
 
-    print(pgen.head())
-    print(df.head())
+def load_index():
 
-    merged_df = df.merge(
-        pgen,
-        on="junction_aa",
-        how="inner"
-    )[['sequence_id', 'junction_aa', 'pgen', 'log10_pgen', 'duplicate_count', 'duplicate_frequency_percent']]
-    merged_df.to_csv("${sample_meta.sample}_tcr_pgen.tsv", sep="\t", index=False)
+    print("Loading pgen index...")
 
-    log_probs = merged_df['log10_pgen']
+    df = pd.read_csv(
+        INDEX_FILE,
+        sep="\t",
+        usecols=["CDR3b", "pgen", "log10_pgen"],
+        dtype={"CDR3b": "object"},
+    )
 
-    left_bound = np.floor(np.min(log_probs))
-    right_bound = np.ceil(np.max(log_probs))
+    max_len = "${olga_stats.max_cdr3_length}"
+    cdr3_dtype = f"S{max_len}"
 
-    with open(f"olga_xmin_value.txt", "w") as f:
-        f.write(str(left_bound))
+    print(f"Max CDR3 length: {max_len}")
 
-    with open(f"olga_xmax_value.txt", "w") as f:
-        f.write(str(right_bound))
-    EOF
+    cdr3_index = df["CDR3b"].values.astype(cdr3_dtype)
+
+    pgen_index = df["pgen"].to_numpy(dtype=np.float64)
+
+    log10_index = pd.to_numeric(
+        df["log10_pgen"], errors="coerce"
+    ).to_numpy(dtype=np.float32)
+
+    del df
+
+    print("Index loaded:", len(cdr3_index))
+
+    return cdr3_index, pgen_index, log10_index, cdr3_dtype
+
+
+def annotate_chunk(chunk, cdr3_index, pgen_index, log10_index, cdr3_dtype):
+
+    query = chunk["junction_aa"].values.astype(cdr3_dtype)
+
+    pos = np.searchsorted(cdr3_index, query)
+
+    pos_safe = pos.clip(max=len(cdr3_index) - 1)
+
+    match = (pos < len(cdr3_index)) & (cdr3_index[pos_safe] == query)
+
+    pgen_out = np.full(len(chunk), np.nan, dtype=np.float64)
+    log10_out = np.full(len(chunk), np.nan, dtype=np.float32)
+
+    pgen_out[match] = pgen_index[pos[match]]
+    log10_out[match] = log10_index[pos[match]]
+
+    chunk["pgen"] = pgen_out
+    chunk["log10_pgen"] = log10_out
+
+    return chunk
+
+
+def stream_join():
+
+    cdr3_index, pgen_index, log10_index, cdr3_dtype = load_index()
+
+    reader = pd.read_csv(
+        REP_FILE,
+        sep="\t",
+        chunksize=CHUNKSIZE,
+        dtype={"junction_aa": "object"},
+        usecols=[
+            "sequence_id",
+            "junction_aa",
+            "duplicate_count",
+            "duplicate_frequency_percent",
+        ])
+
+    header_written = False
+
+    for i, chunk in enumerate(reader):
+
+        print(f"Processing chunk {i} ({len(chunk)} rows)")
+        chunk = chunk[chunk['junction_aa'].notna()]
+
+        chunk = annotate_chunk(
+            chunk,
+            cdr3_index,
+            pgen_index,
+            log10_index,
+            cdr3_dtype,
+        )
+
+        chunk.to_csv(
+            OUTPUT_FILE,
+            sep="\t",
+            index=False,
+            mode="a",
+            header=not header_written,
+        )
+
+        header_written = True
+
+    print("Join complete")
+
+
+if __name__ == "__main__":
+    stream_join()
+
+EOF
     """
 }
 
@@ -124,8 +235,7 @@ process OLGA_HISTOGRAM_CALC {
 
     input:
     tuple val(sample_meta), path(olga_pgen)
-    val olga_global_xmin
-    val olga_global_xmax
+    val olga_stats
 
     output:
     tuple val(sample_meta), path("${sample_meta.sample}_histogram_data.hdf5"), emit: olga_histogram
@@ -146,8 +256,8 @@ process OLGA_HISTOGRAM_CALC {
     log_probs = np.log10(merged_df['pgen'])
     cdr3_counts = merged_df['duplicate_count']
 
-    min_val = ${olga_global_xmin}
-    max_val = ${olga_global_xmax}
+    min_val = ${olga_stats.min_log10_pgen}
+    max_val = ${olga_stats.max_log10_pgen}
     n_bins = 30
 
     bin_edges = np.linspace(min_val, max_val, n_bins + 1)
@@ -173,7 +283,6 @@ process OLGA_HISTOGRAM_CALC {
     """
 }
 
-
 process OLGA_HISTOGRAM_PLOT {
     tag "${sample_meta.sample}"
     label 'process_low'
@@ -231,25 +340,4 @@ process OLGA_HISTOGRAM_PLOT {
     plt.close()
     EOF
     """
-}
-
-process OLGA_WRITE_MAX {
-    label 'process_single'
-
-    input:
-    val olga_global_xmin
-    val olga_global_xmax
-    val olga_global_ymax
-
-    output:
-    path "olga_xmin_value.txt"
-    path "olga_xmax_value.txt"
-    path "olga_ymax_value.txt"
-
-    script:
-    """
-    echo ${olga_global_xmin} > olga_xmin_value.txt
-    echo ${olga_global_xmax} > olga_xmax_value.txt
-    echo ${olga_global_ymax} > olga_ymax_value.txt
-    """
 }
\ No newline at end of file
diff --git a/modules/local/patient/main.nf b/modules/local/patient/main.nf
new file mode 100644
index 0000000..c5b9eb7
--- /dev/null
+++ b/modules/local/patient/main.nf
@@ -0,0 +1,37 @@
+process PATIENT_CONCATENATE {
+    tag "${patient}"
+    label "process_single"
+
+    input:
+    tuple val(patient), path(files)
+
+    output:
+    tuple val(patient), path("${patient}_cdr3.tsv"), emit: patient_cdr3
+
+    script:
+    """
+    head -n 1 ${files[0]} > ${patient}_cdr3.tsv
+    tail -n +2 -q ${files.join(' ')} >> ${patient}_cdr3.tsv
+    """
+}
+
+process PATIENT_CALC {
+    tag "${patient}"
+    label 'process_single'
+
+    input:
+    tuple val(patient), path(concat_cdr3)
+
+    output:
+    path "${patient}/jaccard_mat.csv", emit: jaccard_mat
+    path "${patient}/sorensen_mat.csv", emit: sorensen_mat
+    path "${patient}/morisita_mat.csv", emit: morisita_mat
+
+    script:
+    """
+    mkdir -p "${patient}"
+    
+    patient_calc.py \
+        -i ${concat_cdr3} -p "${patient}" \
+    """
+}
diff --git a/modules/local/sample/sample_calc.nf b/modules/local/sample/sample_calc.nf
index 633ed7e..a971d56 100644
--- a/modules/local/sample/sample_calc.nf
+++ b/modules/local/sample/sample_calc.nf
@@ -17,12 +17,4 @@ process SAMPLE_CALC {
     """
     sample_calc.py -s '${sample_meta.sample}' -c ${count_table}
     """
-
-    stub:
-    """
-    touch sample_stats.csv
-    touch v_family.csv
-    touch d_family.csv
-    touch j_family.csv
-    """
 }
diff --git a/nextflow.config b/nextflow.config
index d777310..32f2048 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -41,7 +41,7 @@ params {
     vgene_x_cols = 'origin,timepoint'
 
     // OLGA parameters
-    olga_chunk_length = 2000000 // larger chunk size = less parallelization
+    olga_chunk_length = 100000 // larger chunk size = less parallelization
 
     // GIANA parameters
     threshold = 7.0
diff --git a/nextflow_schema.json b/nextflow_schema.json
index e81a2e5..907fa17 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -69,8 +69,8 @@
         "workflow_level": {
           "type": "string",
           "default": "sample,compare",
-          "enum": ["sample,compare", "sample", "compare", "convert"],
-          "description": "Comma-separated workflow stages (sample, compare)."
+          "description": "Comma-separated workflow stages.",
+          "pattern": "^(sample|patient|compare|convert)(,(sample|patient|compare|convert))*$"
         },
         "project_name": {
           "type": "string",
diff --git a/subworkflows/local/annotate.nf b/subworkflows/local/annotate.nf
index fe0a9d2..c61beea 100644
--- a/subworkflows/local/annotate.nf
+++ b/subworkflows/local/annotate.nf
@@ -5,8 +5,8 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { ANNOTATE_CONCATENATE; ANNOTATE_SORT_CDR3; ANNOTATE_DEDUPLICATE_CDR3_TRBV; ANNOTATE_DEDUPLICATE_CDR3 } from '../../modules/local/annotate'
-include { OLGA_CONCATENATE; OLGA_CALCULATE } from '../../modules/local/olga'
+include { ANNOTATE_PROCESS; ANNOTATE_SORT_CDR3; ANNOTATE_DEDUPLICATE_CDR3_TRBV; ANNOTATE_DEDUPLICATE_CDR3 } from '../../modules/local/annotate'
+include { OLGA_CONCATENATE as ANNOTATE_OLGA_CONCATENATE; OLGA_CALCULATE as ANNOTATE_OLGA_CALCULATE} from '../../modules/local/olga'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -16,14 +16,19 @@ include { OLGA_CONCATENATE; OLGA_CALCULATE } from '../../modules/local/olga'
 
 workflow ANNOTATE {
     take:
-    samplesheet_resolved
-    all_sample_files
+    sample_map
 
     main:
-    ANNOTATE_CONCATENATE( samplesheet_resolved,
-        all_sample_files )
+    ANNOTATE_PROCESS( sample_map )
+    
+    processed_samples = ANNOTATE_PROCESS.out.process
+    
+    processed_samples
+        .map { _meta, file -> file }
+        .collectFile(name: 'concat_cdr3.tsv', keepHeader: true, skip: 1)
+        .set { concat_cdr3 }
 
-    ANNOTATE_SORT_CDR3( ANNOTATE_CONCATENATE.out.concat_cdr3 )
+    ANNOTATE_SORT_CDR3( concat_cdr3 )
     concat_cdr3_sorted = ANNOTATE_SORT_CDR3.out.concat_cdr3_sorted
 
     ANNOTATE_DEDUPLICATE_CDR3_TRBV( concat_cdr3_sorted )
@@ -32,13 +37,13 @@ workflow ANNOTATE {
         ANNOTATE_DEDUPLICATE_CDR3_TRBV.out.unique_cdr3_trbv
     )
 
-    OLGA_CALCULATE(
+    ANNOTATE_OLGA_CALCULATE(
         ANNOTATE_DEDUPLICATE_CDR3.out.unique_cdr3
             .splitText(by: params.olga_chunk_length, file: true)
     )
 
-    OLGA_CONCATENATE (
-        OLGA_CALCULATE.out.pgen_chunk
+    ANNOTATE_OLGA_CONCATENATE (
+        ANNOTATE_OLGA_CALCULATE.out.pgen_chunk
             .collectFile(
                 name: 'olga_pgen_body.tsv',
                 sort: { f ->
@@ -47,9 +52,18 @@ workflow ANNOTATE {
                 }
             )
     )
-    cdr3_pgen = OLGA_CONCATENATE.out.cdr3_pgen
+    cdr3_pgen = ANNOTATE_OLGA_CONCATENATE.out.cdr3_pgen
+    olga_stats = ANNOTATE_OLGA_CONCATENATE.out.cdr3_pgen_stats
+        .map { f ->
+            def _m = f.readLines()
+                .collect{ stats -> stats.split('\t') }
+                .collectEntries{ stats -> [(stats[0]): stats[1]] }
+        }
+        .first()
 
     emit:
+    processed_samples
     concat_cdr3_sorted
     cdr3_pgen
+    olga_stats
 }
\ No newline at end of file
diff --git a/subworkflows/local/compare.nf b/subworkflows/local/compare.nf
index d647098..9e2453e 100644
--- a/subworkflows/local/compare.nf
+++ b/subworkflows/local/compare.nf
@@ -5,12 +5,8 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { COMPARE_CALC  } from '../../modules/local/compare/compare_calc'
-include { COMPARE_PLOT  } from '../../modules/local/compare/compare_plot'
 include { TCRSHARING_CALC; TCRSHARING_HISTOGRAM; TCRSHARING_SCATTERPLOT } from '../../modules/local/compare/tcrsharing'
 include { OLGA_MERGE as TCRSHARING_OLGA_MERGE } from '../../modules/local/olga'
-include { GLIPH2_TURBOGLIPH; GLIPH2_PLOT } from '../../modules/local/compare/gliph2'
-include { GIANA_CALC    } from '../../modules/local/compare/giana'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -20,37 +16,10 @@ include { GIANA_CALC    } from '../../modules/local/compare/giana'
 
 workflow COMPARE {
     take:
-    samplesheet_resolved
-    all_sample_files
     concat_cdr3_sorted
     cdr3_pgen
 
     main:
-    COMPARE_CALC( samplesheet_resolved,
-                    all_sample_files )
-
-    COMPARE_PLOT( samplesheet_resolved,
-                  COMPARE_CALC.out.jaccard_mat,
-                  COMPARE_CALC.out.sorensen_mat,
-                  COMPARE_CALC.out.morisita_mat,
-                  file(params.compare_stats_template),
-                  params.project_name,
-                  all_sample_files
-                  )
-
-    GIANA_CALC(
-        concat_cdr3_sorted,
-        params.threshold,
-        params.threshold_score,
-        params.threshold_vgene
-    )
-
-    if(params.use_gliph2) {
-        GLIPH2_TURBOGLIPH(
-            concat_cdr3_sorted
-        )
-    }
-
     TCRSHARING_OLGA_MERGE (concat_cdr3_sorted, cdr3_pgen)
 
     TCRSHARING_CALC(
diff --git a/subworkflows/local/patient.nf b/subworkflows/local/patient.nf
new file mode 100644
index 0000000..d8a55d3
--- /dev/null
+++ b/subworkflows/local/patient.nf
@@ -0,0 +1,54 @@
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT LOCAL MODULES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { PATIENT_CONCATENATE; PATIENT_CALC } from '../../modules/local/patient'
+include { GLIPH2_TURBOGLIPH } from '../../modules/local/compare/gliph2'
+include { GIANA_CALC    } from '../../modules/local/compare/giana'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN MAIN WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow PATIENT {
+    take:
+    processed_samples
+
+    main:
+    processed_samples
+        .map { meta, file -> [ meta.patient, file ] }
+        .groupTuple()
+        .set { patient_groups }
+
+    PATIENT_CONCATENATE ( patient_groups )
+
+    PATIENT_CALC( PATIENT_CONCATENATE.out.patient_cdr3 )
+
+    // TODO: disabling plotting until notebook updated
+    // COMPARE_PLOT( samplesheet_resolved,
+    //             COMPARE_CALC.out.jaccard_mat,
+    //             COMPARE_CALC.out.sorensen_mat,
+    //             COMPARE_CALC.out.morisita_mat,
+    //             file(params.compare_stats_template),
+    //             params.project_name,
+    //             all_sample_files
+    //             )
+
+    GIANA_CALC(
+        PATIENT_CONCATENATE.out.patient_cdr3,
+        params.threshold,
+        params.threshold_score,
+        params.threshold_vgene
+    )
+
+    if(params.use_gliph2) {
+        GLIPH2_TURBOGLIPH(
+            PATIENT_CONCATENATE.out.patient_cdr3
+        )
+    }
+}
\ No newline at end of file
diff --git a/subworkflows/local/sample.nf b/subworkflows/local/sample.nf
index d62b147..1454d1f 100644
--- a/subworkflows/local/sample.nf
+++ b/subworkflows/local/sample.nf
@@ -12,7 +12,7 @@ include { SAMPLE_AGGREGATE as SAMPLE_AGG_V } from '../../modules/local/sample/sa
 include { SAMPLE_AGGREGATE as SAMPLE_AGG_D } from '../../modules/local/sample/sample_aggregate'
 include { SAMPLE_AGGREGATE as SAMPLE_AGG_J } from '../../modules/local/sample/sample_aggregate'
 include { TCRDIST3_MATRIX; TCRDIST3_HISTOGRAM_CALC; TCRDIST3_HISTOGRAM_PLOT} from '../../modules/local/sample/tcrdist3'
-include { OLGA_SAMPLE_MERGE; OLGA_HISTOGRAM_CALC; OLGA_HISTOGRAM_PLOT; OLGA_WRITE_MAX } from '../../modules/local/olga'
+include { OLGA_SAMPLE_MERGE; OLGA_HISTOGRAM_CALC; OLGA_HISTOGRAM_PLOT } from '../../modules/local/olga'
 include { CONVERGENCE } from '../../modules/local/sample/convergence'
 include { TCRPHENO } from '../../modules/local/sample/tcrpheno'
 include { VDJDB_GET; VDJDB_VDJMATCH } from '../../modules/local/sample/tcrspecificity'
@@ -28,6 +28,7 @@ workflow SAMPLE {
     take:
     sample_map
     cdr3_pgen
+    olga_stats
 
     main:
 
@@ -87,25 +88,11 @@ workflow SAMPLE {
         global_y_max_value
     )
 
-    cdr3_pgen_file = cdr3_pgen.first()
     OLGA_SAMPLE_MERGE ( sample_map,
-        cdr3_pgen_file )
+        cdr3_pgen.first(),
+        olga_stats )
 
-    OLGA_SAMPLE_MERGE.out.olga_xmin
-        .map { xmin -> xmin.text.trim().toDouble() }
-        .collect()
-        .map { values -> values.min() }
-        .set { olga_x_min_value }
-    olga_x_min_value.view { olga_xmin -> "Olga x min matrix value: $olga_xmin" }
-
-    OLGA_SAMPLE_MERGE.out.olga_xmax
-        .map { xmax -> xmax.text.trim().toDouble() }
-        .collect()
-        .map { values -> values.max() }
-        .set { olga_x_max_value }
-    olga_x_max_value.view { olga_xmax -> "Olga x max matrix value: $olga_xmax" }
-
-    OLGA_HISTOGRAM_CALC ( OLGA_SAMPLE_MERGE.out.olga_pgen, olga_x_min_value, olga_x_max_value )
+    OLGA_HISTOGRAM_CALC ( OLGA_SAMPLE_MERGE.out.olga_pgen, olga_stats )
 
     OLGA_HISTOGRAM_CALC.out.olga_ymax
         .map { ymax -> ymax.text.trim().toDouble() }
@@ -116,12 +103,6 @@ workflow SAMPLE {
 
     OLGA_HISTOGRAM_PLOT( OLGA_HISTOGRAM_CALC.out.olga_histogram, olga_y_max_value )
 
-    OLGA_WRITE_MAX(
-        olga_x_min_value,
-        olga_x_max_value,
-        olga_y_max_value
-    )
-
     CONVERGENCE ( sample_map )
 
     TCRPHENO ( sample_map )
diff --git a/workflows/tcrtoolkit.nf b/workflows/tcrtoolkit.nf
index f7bc758..9e3ac04 100644
--- a/workflows/tcrtoolkit.nf
+++ b/workflows/tcrtoolkit.nf
@@ -13,6 +13,7 @@ include { INPUT_CHECK }         from '../subworkflows/local/input_check'
 include { CONVERT }             from '../subworkflows/local/convert'
 include { RESOLVE_SAMPLESHEET } from '../subworkflows/local/resolve_samplesheet'
 include { SAMPLE }              from '../subworkflows/local/sample'
+include { PATIENT }             from '../subworkflows/local/patient'
 include { COMPARE }             from '../subworkflows/local/compare'
 include { VALIDATE_PARAMS }     from '../subworkflows/local/validate_params'
 include { ANNOTATE }            from '../subworkflows/local/annotate'
@@ -43,6 +44,16 @@ workflow TCRTOOLKIT {
         }
     }
 
+    if (levels.contains('patient')) {
+        def samplesheeet_header = file(params.samplesheet).readLines().first().split(',')
+        def has_patient = samplesheeet_header.contains('patient')
+        
+        if (!has_patient) {
+            println("\u001B[33m[WARN]\u001B[0m Patient workflow was specified but metadata was not found in samplesheet; please specify patient IDs for samples using the 'patient' column.")
+            return
+        }
+    }
+
     // Checking input tables
     INPUT_CHECK( file(params.samplesheet) )
     ch_samplesheet_utf8 = INPUT_CHECK.out.samplesheet_utf8
@@ -80,26 +91,33 @@ workflow TCRTOOLKIT {
 
     // --- Main Analysis ---
 
-    RESOLVE_SAMPLESHEET( ch_samplesheet_utf8,
-        sample_map_final )
+    // RESOLVE_SAMPLESHEET( ch_samplesheet_utf8,
+    //     sample_map_final )
 
-    ANNOTATE( RESOLVE_SAMPLESHEET.out.samplesheet_resolved,
-        RESOLVE_SAMPLESHEET.out.all_sample_files )
+    if (levels.intersect(['sample','compare'])) {
+        ANNOTATE( sample_map_final )
+    }
 
     // Running sample level analysis
     if (levels.contains('sample')) {
         SAMPLE(
             sample_map_final,
-            ANNOTATE.out.cdr3_pgen
+            ANNOTATE.out.cdr3_pgen,
+            ANNOTATE.out.olga_stats
         )
     }
 
+    // Running patient analysis
+    if (levels.contains('patient')) {
+        PATIENT( ANNOTATE.out.processed_samples )
+    }
+
     // Running comparison analysis
     if (levels.contains('compare')) {
-        COMPARE( RESOLVE_SAMPLESHEET.out.samplesheet_resolved,
-            RESOLVE_SAMPLESHEET.out.all_sample_files,
+        COMPARE(
             ANNOTATE.out.concat_cdr3_sorted,
-            ANNOTATE.out.cdr3_pgen)
+            ANNOTATE.out.cdr3_pgen
+        )
     }
 }