Skip to content

Commit b5ba103

Browse files
Merge pull request #19 from alexmccreight/main
general improvements, documentation, bug fixes
2 parents 22db6b5 + e1a1606 commit b5ba103

33 files changed

+944
-174
lines changed

DESCRIPTION

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,17 @@ Authors@R: c(
1616
License: MIT + file LICENSE
1717
Imports:
1818
matrixStats,
19-
Rcpp (>= 1.0.3)
19+
Rcpp (>= 1.0.3),
20+
susieR,
21+
tibble
2022
Suggests:
2123
testthat,
2224
knitr,
23-
rmarkdown
25+
rmarkdown,
26+
BEDMatrix,
27+
data.table,
28+
mvtnorm
2429
LinkingTo: Rcpp
2530
NeedsCompilation: yes
2631
VignetteBuilder: knitr
27-
RoxygenNote: 7.3.2
32+
RoxygenNote: 7.3.3

NAMESPACE

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
export(calculate_sumstat)
34
export(compute_s2g)
45
export(gene_data)
56
export(gene_data_null1)
67
export(gene_data_null2)
78
export(gene_data_null3)
89
export(generate_eqtl_data)
10+
export(get_correlation)
911
export(get_lower_chol)
1012
export(get_random_A)
1113
export(parse_num_causal_snps)
@@ -18,14 +20,17 @@ export(sim_multi_traits)
1820
export(sim_sumstats)
1921
export(simulate_causal_config)
2022
export(simulate_cis_expression)
21-
export(simulate_infinitesimal_effects)
22-
export(simulate_oligogenic_effects)
2323
export(simulate_polygenic_trait)
24-
export(simulate_sparse_effects)
2524
export(simulate_trans_expression)
2625
export(simulate_trans_mixture_celltype)
26+
importFrom(BEDMatrix,BEDMatrix)
2727
importFrom(MASS,mvrnorm)
2828
importFrom(Matrix,chol)
29+
importFrom(Matrix,chol2inv)
30+
importFrom(data.table,fread)
31+
importFrom(mvtnorm,rmvnorm)
2932
importFrom(purrr,map2_dfc)
3033
importFrom(stats,pnorm)
3134
importFrom(stats,rnorm)
35+
importFrom(susieR,univariate_regression)
36+
importFrom(tibble,tibble)

R/ld_utils.R

Lines changed: 26 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,16 @@
1-
2-
3-
# - Calculate LD matrix
4-
get_ld <- function(X, intercepte = FALSE){
5-
X = t(X)
6-
# Center each variable
7-
if (!intercepte){
8-
X = X - rowMeans(X)
9-
}
10-
# Standardize each variable
11-
X = X / sqrt(rowSums(X^2))
12-
# Calculate correlations
13-
cr = tcrossprod(X)
14-
return(cr)
15-
}
16-
17-
# - Get positive definite LD matrix
18-
# Function to check if a matrix is positive definite
1+
# Check if a matrix is positive definite
2+
#' @keywords internal
193
is.positive.definite <- function(LD) {
204
ev <- eigen(LD, symmetric = TRUE)$values
215
return(all(ev > 0))
226
}
237

8+
# Get positive definite LD matrix
9+
#' @keywords internal
2410
get_ld_pd <- function(LD, lambda = 1e-3){
2511

2612
if (!isSymmetric(LD))
27-
stop("Provided LD matrix should be sysmatric!")
13+
stop("Provided LD matrix should be symmetric!")
2814

2915
while (!is.positive.definite(LD)) {
3016
LD <- LD + lambda * diag(nrow(LD))
@@ -33,29 +19,32 @@ get_ld_pd <- function(LD, lambda = 1e-3){
3319
return(LD)
3420
}
3521

36-
37-
38-
# - Calculate pseduo inverse of LD matrix
22+
# Calculate pseudo inverse of LD matrix
23+
#' @importFrom Matrix chol2inv
24+
#' @keywords internal
3925
get_ld_inverse <- function(LD, method = "svd",
4026
tol.exact = sqrt(.Machine$double.eps),
4127
tol.pct = 1e-3){
4228

4329
if (!isSymmetric(LD))
44-
stop("Provided LD matrix should be sysmatric!")
45-
46-
if (method == "svd")
30+
stop("Provided LD matrix should be symmetric!")
31+
32+
if (method == "svd") {
4733
LD_inv <- get_ld_inv_svd(LD, tol = tol.exact)
48-
49-
if (method == "eigen")
34+
} else if (method == "eigen") {
5035
LD_inv <- get_ld_inv_eigen(LD, tol = tol.pct)
51-
52-
if (method == "fastchol")
53-
LD_inv <- Matrix::chol2inv(LD)
54-
36+
} else if (method == "fastchol") {
37+
LD_inv <- chol2inv(LD)
38+
} else {
39+
stop("Invalid method. Choose 'svd', 'eigen', or 'fastchol'.")
40+
}
41+
5542
return(LD_inv)
56-
43+
5744
}
5845

46+
# Calculate LD inverse using eigendecomposition
47+
#' @keywords internal
5948
get_ld_inv_eigen <- function(LD, tol = 1e-3){
6049

6150
eigen_LD <- eigen(LD)
@@ -67,16 +56,17 @@ get_ld_inv_eigen <- function(LD, tol = 1e-3){
6756
return(LD_inv)
6857
}
6958

59+
# Calculate LD inverse using SVD
60+
#' @keywords internal
7061
get_ld_inv_svd <- function(LD, tol = sqrt(.Machine$double.eps)){
7162

7263
svd_LD <- svd(LD)
7364
Positive <- svd_LD$d > max(tol * svd_LD$d[1L], 0)
7465
if (all(Positive))
75-
LD_inv = svd_LD$v %*% (1/svd_LD$d * t(svd_LD$u))
76-
else
77-
LD_inv = svd_LD$v[,Positive,drop = FALSE] %*%
66+
LD_inv <- svd_LD$v %*% (1/svd_LD$d * t(svd_LD$u))
67+
else
68+
LD_inv <- svd_LD$v[,Positive,drop = FALSE] %*%
7869
((1/svd_LD$d[Positive]) * t(svd_LD$u[,Positive,drop = FALSE]))
7970

8071
return(LD_inv)
8172
}
82-

R/simulate_eQTL.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#' \item{beta}{A vector of effect sizes with nonzero entries for the sparse SNPs.}
1515
#' \item{sentinel_index}{Index of the sentinel SNP.}
1616
#' \item{other_sparse_indices}{Indices of the additional sparse SNPs.}
17-
#' @export
17+
#' @keywords internal
1818
simulate_sparse_effects <- function(G, h2_sparse, prop_h2_sentinel, n_other_sparse,
1919
min_sparse_effect = 0.10, sentinel_dominance = 1.5) {
2020
n_features <- ncol(G)
@@ -99,7 +99,7 @@ simulate_sparse_effects <- function(G, h2_sparse, prop_h2_sentinel, n_other_spar
9999
#' \item{beta}{A vector of effect sizes for oligogenic effects (zeros elsewhere).}
100100
#' \item{oligogenic_indices}{Indices of the oligogenic SNPs.}
101101
#' \item{mixture_assignments}{A vector (indexed by SNP) of mixture component assignments.}
102-
#' @export
102+
#' @keywords internal
103103
simulate_oligogenic_effects <- function(G, h2_oligogenic, n_oligogenic, mixture_props,
104104
non_sparse_indices,
105105
max_oligogenic_effect = 0.12,
@@ -166,7 +166,7 @@ simulate_oligogenic_effects <- function(G, h2_oligogenic, n_oligogenic, mixture_
166166
#' @param h2_infinitesimal Heritability allocated to infinitesimal effects.
167167
#' @param infinitesimal_indices Indices of SNPs to receive infinitesimal effects.
168168
#' @return A vector of effect sizes (with zeros outside infinitesimal_indices).
169-
#' @export
169+
#' @keywords internal
170170
simulate_infinitesimal_effects <- function(G, h2_infinitesimal, infinitesimal_indices) {
171171
n_features <- ncol(G)
172172
beta <- rep(0, n_features)
@@ -193,7 +193,7 @@ simulate_infinitesimal_effects <- function(G, h2_infinitesimal, infinitesimal_in
193193
#' @details The function calculates power using a non-central chi-square distribution
194194
#' with non-centrality parameter NCP = n * beta^2 * var(X) / residual_variance, where the
195195
#' significance threshold is Bonferroni-corrected (alpha = 0.05 / p).
196-
#' @export
196+
#' @keywords internal
197197
is_causal_power <- function(G, beta, residual_variance, power = 0.80) {
198198
n <- nrow(G)
199199
p <- ncol(G)

R/simulate_genotype.R

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
1-
2-
# sim_geno_functions.R
3-
41
#' Simulate Independent Genotypes
52
#'
63
#' @param n Sample size.
74
#' @param p Number of SNPs.
85
#' @param min_maf Minimum minor allele frequency.
96
#' @param max_maf Maximum minor allele frequency.
10-
#' @param scale Logical, whether to scale the data using scale_faster.
7+
#' @param scale Logical, whether to scale the data.
118
#' @return A matrix of genotypes.
129
#' @examples
1310
#' sim_geno_indep(n = 100, p = 10, min_maf = 0.01, max_maf = 0.4, scale = TRUE)
@@ -22,7 +19,7 @@ sim_geno_indep <- function(n, p, min_maf = 0.01, max_maf = 0.4, scale = FALSE) {
2219
colnames(G) <- paste0("variant", 1:p)
2320

2421
if (scale) {
25-
G <- scale_faster(G)
22+
G <- scale(G)
2623
}
2724

2825
return(G)
@@ -35,13 +32,14 @@ sim_geno_indep <- function(n, p, min_maf = 0.01, max_maf = 0.4, scale = FALSE) {
3532
#' @param min_maf Minimum minor allele frequency.
3633
#' @param max_maf Maximum minor allele frequency.
3734
#' @param lambda Regularization parameter.
38-
#' @param type Type of genotype to simulate, either "continuous" or "discrete".
39-
#' @param scale Logical, whether to scale the data using scale_faster.
35+
#' @param is.discrete Logical, whether to generate discrete (TRUE) or continuous (FALSE) genotypes.
36+
#' @param scale Logical, whether to scale the data.
37+
#' @param tol Tolerance for checking diagonal elements of LD matrix.
4038
#' @return A matrix of genotypes.
4139
#' @examples
42-
#' sim_geno_LD(n = 100, LD = matrix(runif(100), 10, 10), min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, type = "continuous", scale = TRUE)
40+
#' sim_geno_LD(n = 100, LD = matrix(runif(100), 10, 10), min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, is.discrete = FALSE, scale = TRUE)
4341
#' @export
44-
sim_geno_LD <- function(n, LD, min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, is.discrete = FALSE, scale = FALSE) {
42+
sim_geno_LD <- function(n, LD, min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, is.discrete = FALSE, scale = FALSE, tol = sqrt(.Machine$double.eps)) {
4543
if (missing(n)) stop("Please provide the sample size")
4644
if (is.null(LD)) stop("Please provide LD matrix!")
4745

@@ -58,7 +56,7 @@ sim_geno_LD <- function(n, LD, min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, is.
5856
}
5957

6058
if (scale) {
61-
G <- scale_faster(G)
59+
G <- scale(G)
6260
}
6361

6462
colnames(G) <- paste0("variant", 1:p)
@@ -70,7 +68,7 @@ sim_geno_LD <- function(n, LD, min_maf = 0.01, max_maf = 0.4, lambda = 1e-3, is.
7068
#' @param n Sample size.
7169
#' @param file_path Path to the UK Biobank file.
7270
#' @param min_maf Minimum minor allele frequency.
73-
#' @param scale Logical, whether to scale the data using scale_faster.
71+
#' @param scale Logical, whether to scale the data.
7472
#' @return A matrix of genotypes.
7573
#' @examples
7674
#' sim_geno_UKB(n = 100, file_path = "path/to/UKB/file", min_maf = 0.01, scale = TRUE)
@@ -83,26 +81,31 @@ sim_geno_UKB <- function(n, file_path, min_maf = 0.01, scale = FALSE) {
8381
G <- process_ukb(file_path, n, min_maf)
8482

8583
if (scale) {
86-
G <- scale_faster(G)
84+
G <- scale(G)
8785
}
8886

89-
colnames(G) <- paste0("variant", 1:p)
87+
colnames(G) <- paste0("variant", 1:ncol(G))
9088
return(G)
9189
}
9290

91+
# Safe multivariate normal random generation
92+
#' @importFrom mvtnorm rmvnorm
93+
#' @keywords internal
9394
safe_rmvnorm <- function(n, p, LD, maf, lambda = 1e-3) {
9495

9596
var_g <- diag(sqrt(2*maf*(1-maf)))
9697
Sigma <- var_g %*% LD %*% var_g
97-
G <- try(mvtnorm::rmvnorm(n, mean = rep(0, p), sigma = Sigma), silent = TRUE)
98+
G <- try(rmvnorm(n, mean = rep(0, p), sigma = Sigma), silent = TRUE)
9899
if(any(class(G) == "try-error")) {
99100
Sigma <- get_ld_pd(Sigma, lambda = lambda)
100-
G <- mvtnorm::rmvnorm(n, mean = rep(0, p), sigma = Sigma)
101+
G <- rmvnorm(n, mean = rep(0, p), sigma = Sigma)
101102
}
102103
return(G)
103104

104105
}
105106

107+
# Convert continuous genotypes to discrete (binomial)
108+
#' @keywords internal
106109
binomialize_genotype <- function(G, p, maf) {
107110

108111
sapply(1:p, function(i) {
@@ -116,9 +119,13 @@ binomialize_genotype <- function(G, p, maf) {
116119

117120
}
118121

122+
# Process UK Biobank genotype file
123+
#' @importFrom BEDMatrix BEDMatrix
124+
#' @importFrom data.table fread
125+
#' @keywords internal
119126
process_ukb <- function(file.path, n, min_maf = 0.01) {
120127

121-
G <- BEDMatrix::BEDMatrix(file.path)
128+
G <- BEDMatrix(file.path)
122129
G <- data.matrix(G[1:n,])
123130
G <- apply(G, 2, function(g) {
124131
tmp <- which(is.na(g))
@@ -127,7 +134,7 @@ process_ukb <- function(file.path, n, min_maf = 0.01) {
127134
})
128135
maf <- colMeans(G, na.rm = TRUE) / 2
129136
snpname <- strsplit(file.path, split = "\\.bed")[[1]][1]
130-
snp.names <- unlist(data.table::fread(paste0(snpname, ".bim"))[,2])
137+
snp.names <- unlist(fread(paste0(snpname, ".bim"))[,2])
131138
colnames(G) <- snp.names
132139
poss <- which(maf <= min_maf)
133140
if (length(poss) != 0) {
@@ -138,6 +145,8 @@ process_ukb <- function(file.path, n, min_maf = 0.01) {
138145
}
139146

140147

148+
# Generate SNP with specified LD
149+
#' @keywords internal
141150
generate_snp_LD <- function(g, ld, maf) {
142151

143152
# - change genotype to haplotype

0 commit comments

Comments
 (0)