Skip to content

Commit c49a63c

Browse files
authored
Merge pull request #23 from alexmccreight/main
Function name changes, reorganization, citation
2 parents a280f68 + edd2a08 commit c49a63c

13 files changed

+168
-141
lines changed

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ Description: Provides a fast and memory-efficient method for
99
URL: https://statfungen.github.io/simxQTL/, https://github.com/StatFunGen/simxQTL
1010
BugReports: https://github.com/StatFunGen/simxQTL/issues
1111
Authors@R: c(
12-
person("Gao", "Wang", role = c("cre", "aut"), email = "wang.gao@columbia.edu"),
13-
person("Alexander", "McCreight", role = "aut"),
12+
person("Alexander", "McCreight", role = c("cre", "aut")),
1413
person("Xuewei", "Cao", role = "aut"),
15-
person("Haochen", "Sun", role = "aut"))
14+
person("Haochen", "Sun", role = "aut"),
15+
person("Gao", "Wang", role = "aut", email = "wang.gao@columbia.edu"))
1616
License: MIT + file LICENSE
1717
Imports:
1818
matrixStats,

NAMESPACE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,20 @@ export(gene_data)
66
export(gene_data_null1)
77
export(gene_data_null2)
88
export(gene_data_null3)
9-
export(generate_eqtl_data)
9+
export(generate_cis_qtl_data)
1010
export(get_correlation)
1111
export(get_lower_chol)
1212
export(get_random_A)
1313
export(parse_num_causal_snps)
1414
export(sim_beta)
1515
export(sim_beta_fix_variant)
1616
export(sim_geno_LD)
17-
export(sim_geno_UKB)
1817
export(sim_geno_indep)
18+
export(sim_geno_real)
1919
export(sim_multi_traits)
20+
export(sim_single_trait_simple)
2021
export(sim_sumstats)
2122
export(simulate_causal_config)
22-
export(simulate_cis_expression)
2323
export(simulate_polygenic_trait)
2424
export(simulate_trans_expression)
2525
export(simulate_trans_mixture_celltype)

R/simulate_eQTL.R

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,14 @@ is_causal_power <- function(G, beta, residual_variance, power = 0.80) {
166166
}
167167

168168
###############################################################################
169-
#' Generate eQTL Data with Multiple Genetic Architecture Components
169+
#' Generate Cis-QTL Data with Multiple Genetic Architecture Components
170170
#'
171-
#' This function generates simulated gene expression data with a partitioned
171+
#' This function generates simulated cis-eQTL data with a partitioned
172172
#' genetic architecture that enforces strict effect size hierarchies:
173173
#' |sparse| > |oligogenic| >> |infinitesimal|
174174
#'
175+
#' Originally developed for the "susieR 2.0" manuscript, McCreight et al (2025).
176+
#'
175177
#' @param G Genotype matrix.
176178
#' @param h2g Total SNP heritability (proportion of variance explained by genotyped SNPs).
177179
#' @param prop_h2_sparse Proportion of h2g explained by sparse effects.
@@ -191,7 +193,7 @@ is_causal_power <- function(G, beta, residual_variance, power = 0.80) {
191193
#' combined beta values, indices for each effect component, realized heritability estimates,
192194
#' effect size ranges, hierarchy validation results, and causal indices.
193195
#' @export
194-
generate_eqtl_data <- function(G,
196+
generate_cis_qtl_data <- function(G,
195197
h2g = 0.30,
196198
prop_h2_sparse = 0.50,
197199
prop_h2_oligogenic = 0.15,

R/simulate_genotype.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,19 @@ sim_geno_LD <- function(n, LD, min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, is.
6363
return(G)
6464
}
6565

66-
#' Simulate Genotypes Based on UK Biobank
66+
#' Simulate Genotypes Based on Real Data
6767
#'
6868
#' @param n Sample size.
69-
#' @param file_path Path to the UK Biobank file.
69+
#' @param file_path Path to the genotype file.
7070
#' @param min_maf Minimum minor allele frequency.
7171
#' @param scale Logical, whether to scale the data.
7272
#' @return A matrix of genotypes.
7373
#' @examples
74-
#' sim_geno_UKB(n = 100, file_path = "path/to/UKB/file", min_maf = 0.01, scale = TRUE)
74+
#' sim_geno_real(n = 100, file_path = "path/to/real/file", min_maf = 0.01, scale = TRUE)
7575
#' @export
76-
sim_geno_UKB <- function(n, file_path, min_maf = 0.01, scale = FALSE) {
76+
sim_geno_real <- function(n, file_path, min_maf = 0.01, scale = FALSE) {
7777
if (missing(n)) stop("Please provide the sample size")
78-
if (is.null(file_path)) stop("Please provide the path of UK Biobank!")
78+
if (is.null(file_path)) stop("Please provide the path to genotype data!")
7979
if (!grepl("\\.bed$", file_path)) stop("Please provide plink bfiles!")
8080

8181
G <- process_ukb(file_path, n, min_maf)

R/simulate_linreg.R

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,4 +414,60 @@ calculate_sumstat <- function(X, Y){
414414
z = z
415415
)
416416
return(tb)
417+
}
418+
419+
#' Simulate Single Trait with Simple Linear Model
420+
#'
421+
#' This function simulates a single trait (e.g., gene expression) based on genotype data.
422+
#' It generates a trait matrix Y using genotype matrix G and adjacency matrix A.
423+
#'
424+
#' @param G A matrix of genotypes with dimensions n x p.
425+
#' @param A A binary adjacency matrix of dimensions p x m indicating direct effects.
426+
#' @param phi_v The per-SNP heritability value.
427+
#'
428+
#' @return A matrix Y of dimensions n x m representing the simulated trait.
429+
#'
430+
#' @examples
431+
#' n <- 1000
432+
#' p <- 40
433+
#' m <- 8
434+
#' G <- matrix(rbinom(n * p, size = 2, prob = 0.3), ncol = p)
435+
#' A <- matrix(sample(0:1, p * m, replace = TRUE), nrow = p)
436+
#' phi_v <- 0.05
437+
#' Y <- sim_single_trait_simple(G, A, phi_v)
438+
#'
439+
#' @export
440+
sim_single_trait_simple <- function(G, A, phi_v) {
441+
n <- nrow(G)
442+
m <- ncol(A)
443+
Y <- matrix(NA, n, m)
444+
445+
for (j in 1:m) {
446+
connected_snps <- which(A[, j] == 1)
447+
num_snps <- length(connected_snps)
448+
449+
beta <- rep(0, ncol(G))
450+
for (i in connected_snps) {
451+
if (i == connected_snps[1]) {
452+
beta[i] <- 1
453+
} else {
454+
beta[i] <- sqrt(beta[connected_snps[1]]^2 * var(G[, connected_snps[1]]) / var(G[, i]))
455+
}
456+
}
457+
458+
# Corrected calculation of variance_sum and sigma2
459+
variance_sum <- var(G[, connected_snps, drop = FALSE] %*% beta[connected_snps])
460+
sigma2 <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
461+
while (sigma2 <= 0) {
462+
phi_v <- phi_v - 0.01
463+
sigma2 <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
464+
}
465+
466+
# Simulate trait
467+
Y_tmp <- G %*% beta + rnorm(n, mean = 0, sd = sqrt(sigma2))
468+
# Y[, j] <- scale(Y_tmp)
469+
Y[, j] <- Y_tmp
470+
}
471+
472+
return(Y)
417473
}

R/simulate_transQTL.R

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,4 @@
11

2-
#' Simulate cis gene expression
3-
#'
4-
#' This function simulates cis gene expression based on genotype data.
5-
#' It generates gene expression matrix E using genotype matrix G and adjacency matrix A.
6-
#'
7-
#' @param G A matrix of genotypes with dimensions n x p.
8-
#' @param A A binary adjacency matrix of dimensions p x m indicating direct effects.
9-
#' @param phi_v The per-SNP heritability value.
10-
#'
11-
#' @return A matrix E of dimensions n x m representing the simulated gene expression.
12-
#'
13-
#' @examples
14-
#' n <- 1000
15-
#' p <- 40
16-
#' m <- 8
17-
#' G <- matrix(rbinom(n * p, size = 2, prob = 0.3), ncol = p)
18-
#' A <- matrix(sample(0:1, p * m, replace = TRUE), nrow = p)
19-
#' phi_v <- 0.05
20-
#' E <- simulate_cis_expression(G, A, phi_v)
21-
#'
22-
#' @export
23-
simulate_cis_expression <- function(G, A, phi_v) {
24-
n <- nrow(G)
25-
m <- ncol(A)
26-
E <- matrix(NA, n, m)
27-
28-
for (j in 1:m) {
29-
connected_snps <- which(A[, j] == 1)
30-
num_snps <- length(connected_snps)
31-
32-
beta <- rep(0, ncol(G))
33-
for (i in connected_snps) {
34-
if (i == connected_snps[1]) {
35-
beta[i] <- 1
36-
} else {
37-
beta[i] <- sqrt(beta[connected_snps[1]]^2 * var(G[, connected_snps[1]]) / var(G[, i]))
38-
}
39-
}
40-
41-
# Corrected calculation of variance_sum and sigma2_cis
42-
variance_sum <- var(G[, connected_snps, drop = FALSE] %*% beta[connected_snps])
43-
sigma2_cis <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
44-
while (sigma2_cis <= 0) {
45-
phi_v <- phi_v - 0.01
46-
sigma2_cis <- var(G[, connected_snps[1]] * beta[connected_snps[1]]) / phi_v - variance_sum
47-
}
48-
49-
# Simulate gene expression
50-
E_tmp <- G %*% beta + rnorm(n, mean = 0, sd = sqrt(sigma2_cis))
51-
# E[, j] <- scale(E_tmp)
52-
E[, j] <- E_tmp
53-
}
54-
55-
return(E)
56-
}
57-
58-
59-
602
#' Simulate trans gene expression for different simulation scenarios
613
#'
624
#' This function can perform Type I error simulations or Accuracy and False Discovery Rate (ACC/FDR) simulations

_pkgdown.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ reference:
5050
contents:
5151
- sim_geno_indep
5252
- sim_geno_LD
53-
- sim_geno_UKB
53+
- sim_geno_real
5454

5555
- title: "Effect Size and Trait Simulation"
5656
desc: "Functions for simulating effect sizes and phenotypes"
@@ -60,6 +60,7 @@ reference:
6060
- sim_beta_fix_variant
6161
- simulate_polygenic_trait
6262
- sim_multi_traits
63+
- sim_single_trait_simple
6364

6465
- title: "Summary Statistics"
6566
desc: "Functions for simulating and calculating summary statistics"
@@ -68,15 +69,14 @@ reference:
6869
- calculate_sumstat
6970
- get_correlation
7071

71-
- title: "eQTL Simulation"
72-
desc: "Functions for simulating eQTL data"
72+
- title: "Cis-QTL Simulation"
73+
desc: "Functions for simulating cis-eQTL data, originally developed for the \"susieR 2.0\" manuscript, McCreight et al (2025)"
7374
contents:
74-
- generate_eqtl_data
75+
- generate_cis_qtl_data
7576

7677
- title: "Trans-QTL Simulation"
77-
desc: "Functions for simulating trans-QTL and gene regulatory networks"
78+
desc: "Functions for simulating trans-QTL and gene regulatory networks, originally developed for the \"transBoost\" manuscript, Cao et al (2026)"
7879
contents:
79-
- simulate_cis_expression
8080
- simulate_trans_expression
8181
- simulate_trans_mixture_celltype
8282
- get_random_A

inst/CITATION

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
bibentry(
2+
bibtype = "Article",
3+
title = paste("SuSiE 2.0: improved methods and implementations for",
4+
"genetic fine-mapping and phenotype prediction"),
5+
author = c(
6+
person("Alexander", "McCreight"),
7+
person("Yanghyeon", "Cho"),
8+
person("Daniel", "Nachun"),
9+
person("Ruixi", "Li"),
10+
person("Hao-Yu", "Gan"),
11+
person("Matthew", "Stephens"),
12+
person("Peter", "Carbonetto"),
13+
person("William", "R.P. Denault"),
14+
person("Gao", "Wang")
15+
),
16+
journal = "Submitting to Genome Biology",
17+
year = "2025",
18+
textVersion =
19+
paste("Alexander McCreight, Yanghyeon Cho, Daniel Nachun, Ruixi Li,",
20+
"Hao-Yu Gan, Matthew Stephens, Peter Carbonetto,",
21+
"William R.P. Denault and Gao Wang (2025). SuSiE 2.0: improved",
22+
"methods and implementations for genetic fine-mapping and",
23+
"phenotype prediction. Submitting to Genome Biology")
24+
)
Lines changed: 8 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/sim_geno_UKB.Rd

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)