|
6 | 6 | #' @return unique_grp. |
7 | 7 | #' @export |
8 | 8 | compute_uniq_grp <- function(profile, min_count_run, min_pres) { |
9 | | - grps <- profile |
10 | | - ttt <- table(grps) |
| 9 | + ttt <- table(profile) |
11 | 10 | ttt <- ttt[ttt >= max(min_count_run * min_pres, 2)] |
12 | 11 | unique_grp <- as.numeric(names(ttt)) |
13 | 12 | return(unique_grp) |
@@ -52,7 +51,7 @@ label_val_to_keep <- function(min_run, timeline, min_pres, this_times, times) { |
52 | 51 |
|
53 | 52 | # filtering based on the kernel regression estimate |
54 | 53 | this_smooth <- predict_smoothed_rt(min_run, this_timeline) |
55 | | - if (max(this_smooth) >= min_pres) { |
| 54 | + if (max(this_smooth, na.rm = TRUE) >= min_pres) { |
56 | 55 | measured_points <- good_points <- timeline |
57 | 56 | measured_points[this_times] <- 1 |
58 | 57 |
|
@@ -82,63 +81,68 @@ label_val_to_keep <- function(min_run, timeline, min_pres, this_times, times) { |
82 | 81 | run_filter <- function(newprof, |
83 | 82 | min_pres, |
84 | 83 | min_run) { |
85 | | - newprof <- tibble::tibble(mz = newprof[, 1], rt = newprof[, 2], intensi = newprof[, 3], grps = newprof[, 4]) |
86 | | - |
87 | 84 | # ordering retention time values |
88 | | - labels <- newprof$rt |
89 | | - times <- unique(labels) |
90 | | - times <- times[order(times)] |
| 85 | + # labels <- newprof$rt |
| 86 | + # times <- unique(labels) |
| 87 | + # times <- times[order(times)] |
91 | 88 |
|
92 | | - for (i in 1:length(times)) { |
93 | | - labels[which(newprof$rt == times[i])] <- i # now labels is the index of time points |
94 | | - } |
| 89 | + # for (i in 1:length(times)) { |
| 90 | + # labels[which(newprof$rt == times[i])] <- i # now labels is the index of time points |
| 91 | + # } |
95 | 92 |
|
96 | | - newprof$rt <- labels |
| 93 | + # newprof$rt <- labels |
| 94 | + newprof <- dplyr::arrange_at(newprof, "rt") |
| 95 | + times <- unique(newprof$rt) |
97 | 96 |
|
98 | 97 | # calculates the minimun number of rt points to be considered a peak |
99 | | - min_count_run <- min_run * length(times) / (max(times) - min(times)) |
100 | | - min_run <- round(min_count_run) |
| 98 | + scan_rate <- 1.0 / abs(median(diff(times))) |
| 99 | + min_count_run <- round(min_pres * min_run * scan_rate) |
101 | 100 |
|
102 | 101 | # computes unique groups |
103 | 102 | uniq_grp <- compute_uniq_grp(newprof$grps, min_count_run, min_pres) |
104 | 103 |
|
105 | 104 | # ordered by mz and grps data that are inside unigrps |
106 | 105 | newprof <- dplyr::filter(newprof, grps %in% uniq_grp) |> dplyr::arrange(grps, mz) |
107 | 106 |
|
108 | | - # computes break points i.e. indices of mass differences greater than min_mz_tol |
109 | | - breaks <- compute_breaks_3(newprof$grps) |
110 | | - |
111 | | - # init counters for loop |
112 | | - new_rec <- newprof * 0 |
113 | | - rec_pointer <- 1 |
114 | | - timeline <- rep(0, length(times)) |
115 | | - for (m in 2:length(breaks)) |
116 | | - { |
117 | | - this_prof <- dplyr::slice(newprof, (breaks[m - 1] + 1):breaks[m]) |> dplyr::arrange_at("rt") |
118 | | - |
119 | | - to_keep <- label_val_to_keep( |
120 | | - min_run, |
121 | | - timeline, |
122 | | - min_pres, |
123 | | - this_prof$rt, |
124 | | - times |
125 | | - ) |
126 | | - |
127 | | - # operation over selected indices |
128 | | - if (sum(to_keep) > 0) { |
129 | | - this_sel <- which(to_keep == 1) |
130 | | - this_new <- dplyr::slice(this_prof, this_sel) |
131 | | - r_new <- nrow(this_new) |
132 | | - new_rec[rec_pointer:(rec_pointer + r_new - 1), ] <- this_new |
133 | | - rec_pointer <- rec_pointer + r_new |
134 | | - } |
135 | | - } |
136 | | - |
137 | | - new_rec <- dplyr::slice(new_rec, 1:(rec_pointer - 1)) |
138 | | - new_rec[, 2] <- times[new_rec[, 2]] |
139 | | - |
140 | | - results <- new("list") |
141 | | - results$new_rec <- new_rec |
| 107 | + results <- dplyr::group_by(newprof, grps) |> |
| 108 | + dplyr::filter(n() >= min_count_run && abs(span(rt)) >= min_run) |> |
| 109 | + dplyr::ungroup() |> |
| 110 | + dplyr::rename(group_number = grps) |
| 111 | + |
| 112 | + # # computes break points i.e. indices of mass differences greater than min_mz_tol |
| 113 | + # breaks <- compute_breaks_3(newprof$grps) |
| 114 | + |
| 115 | + # # init counters for loop |
| 116 | + # new_rec <- newprof * 0 |
| 117 | + # rec_pointer <- 1 |
| 118 | + # timeline <- rep(0, length(times)) |
| 119 | + # for (m in 2:length(breaks)) |
| 120 | + # { |
| 121 | + # this_prof <- dplyr::slice(newprof, (breaks[m - 1] + 1):breaks[m]) |> dplyr::arrange_at("rt") |
| 122 | + |
| 123 | + # to_keep <- label_val_to_keep( |
| 124 | + # min_run, |
| 125 | + # timeline, |
| 126 | + # min_pres, |
| 127 | + # this_prof$rt, |
| 128 | + # times |
| 129 | + # ) |
| 130 | + # browser() |
| 131 | + # # operation over selected indices |
| 132 | + # if (sum(to_keep) > 0) { |
| 133 | + # this_sel <- which(to_keep == 1) |
| 134 | + # this_new <- dplyr::slice(this_prof, this_sel) |
| 135 | + # r_new <- nrow(this_new) |
| 136 | + # new_rec[rec_pointer:(rec_pointer + r_new - 1), ] <- this_new |
| 137 | + # rec_pointer <- rec_pointer + r_new |
| 138 | + # } |
| 139 | + # } |
| 140 | + |
| 141 | + # new_rec <- dplyr::slice(new_rec, 1:(rec_pointer - 1)) |
| 142 | + # new_rec[, 2] <- times[new_rec[, 2]] |
| 143 | + |
| 144 | + # results <- new("list") |
| 145 | + # results$new_rec <- new_rec |
142 | 146 |
|
143 | 147 | return(results) |
144 | 148 | } |
0 commit comments