Skip to content

Commit 180a444

Browse files
authored
Merge pull request #73 from monarch-initiative/1_7_bugfix
Bug fixes, remove drop_unused_query_nodes (breaking change)
2 parents bf8bb8d + 9e0cd64 commit 180a444

18 files changed

+55
-146
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Type: Package
33
Title: Monarch Knowledge Graph Queries
44
Description: R package for easy access, manipulation, and analysis of
55
Monarch KG data Resources.
6-
Version: 1.7
6+
Version: 2.0
77
URL: https://github.com/monarch-initiative/monarchr
88
BugReports: https://github.com/monarch-initiative/monarchr/issues
99
Authors@R:

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ importFrom(stringr,str_replace_all)
102102
importFrom(stringr,str_wrap)
103103
importFrom(tibble,tibble)
104104
importFrom(tidygraph,activate)
105+
importFrom(tidygraph,active)
105106
importFrom(tidygraph,as_tibble)
106107
importFrom(tidygraph,graph_join)
107108
importFrom(tidygraph,tbl_graph)

NEWS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# monarchr 2.0
2+
3+
## Breaking changes
4+
5+
* This breaking release drops support for `drop_unused_query_nodes` in `expand()`, which was both brittle and violated the rule that expansion should return a supergraph of the query
6+
7+
## Bug fixes
8+
9+
* Fixes a bug in `expand()` for `neo4j_engine()` causing an infinite loop.
10+
111
# monarchr 1.7
212

313
## New features

R/expand.R

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#' @param predicates A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered.
1313
#' @param categories A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched.
1414
#' @param transitive If TRUE, include transitive closure of the neighborhood. Default is FALSE. Useful in combination with predicates like `biolink:subclass_of`.
15-
#' @param drop_unused_query_nodes If TRUE, remove query nodes from the result, unless they are at the neighborhood boundary, i.e., required for connecting to the result nodes. Default is FALSE.
1615
#' @param ... Other parameters passed to methods.
1716
#'
1817
#' @return A `tbl_kgx()` graph
@@ -59,7 +58,6 @@ expand <- function(graph,
5958
predicates = NULL,
6059
categories = NULL,
6160
transitive = FALSE,
62-
drop_unused_query_nodes = FALSE,
6361

6462
...) {
6563
UseMethod("expand")

R/expand.tbl_kgx.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,19 @@
44
#' @importFrom assertthat assert_that
55
expand.tbl_kgx <- function(graph, ...) {
66
# check to see if g has a last_engine attribute
7+
active_tbl <- active(graph)
78
if(!is.null(attr(graph, "last_engine"))) {
89
engine <- attr(graph, "last_engine")
910
if(any(c("monarch_engine", "neo4j_engine") %in% class(engine))) {
10-
return(expand_neo4j_engine(engine, graph, ...))
11+
res <- expand_neo4j_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl))
12+
return(res)
1113
} else if("file_engine" %in% class(engine)) {
12-
return(expand_file_engine(engine, graph, ...))
14+
res <- expand_file_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl))
15+
return(res)
1316
} else {
1417
stop("Error: unknown or incompatible engine.")
1518
}
16-
return(expand(engine, graph, ...))
19+
# return(expand(engine, graph, ...)) # this shouldn't be reachable
1720
} else {
1821
stop("Error: tbl_kgx object does not have a most recent engine.")
1922
}

R/expand_file_engine.R

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,7 @@ transitive_query_internal <- function(engine,
55
g,
66
direction = "out",
77
predicates = NULL,
8-
categories = NULL,
9-
drop_unused_query_nodes = FALSE) {
10-
11-
# # TODO: this block will never trigger; the check is done in the main function, remove
12-
# if(length(predicates) > 1) {
13-
# # we call recusively on each predicate
14-
# for(predicate in predicates) {
15-
# g2 <- transitive_query_internal(engine,
16-
# g,
17-
# direction = direction,
18-
# predicates = predicate,
19-
# categories = categories,
20-
# drop_unused_query_nodes = TRUE)
21-
# suppressMessages(g <- tidygraph::graph_join(g, g2), classes = "message") # suppress joining info
22-
# }
23-
# }
8+
categories = NULL) {
249

2510
# assert that direction is "out" or "in"
2611
assert_that(direction == "out" | direction == "in", msg = "Direction must be 'out' or 'in' when using transitive closure.")
@@ -64,16 +49,6 @@ transitive_query_internal <- function(engine,
6449
filter(purrr::map_lgl(category, ~ any(.x %in% categories)) | id %in% query_ids)
6550
}
6651

67-
# in this logic, unused query nodes (those without any connection) are kept by default
68-
# so we need to remove them if drop_unused_query_nodes is TRUE
69-
# we can identify them in the result as those with no connected edges
70-
if(drop_unused_query_nodes) {
71-
bfs_edges <- bfs_result %>% activate(edges) %>% as_tibble()
72-
bfs_nodes <- c(bfs_edges$object, bfs_edges$subject)
73-
bfs_result <- bfs_result %>%
74-
filter(id %in% bfs_nodes)
75-
}
76-
7752
attr(bfs_result, "last_engine") <- engine
7853
return(bfs_result)
7954
}
@@ -83,8 +58,7 @@ direction_fetch_internal <- function(engine,
8358
g,
8459
direction = "out",
8560
predicates = NULL,
86-
categories = NULL,
87-
drop_unused_query_nodes = FALSE) {
61+
categories = NULL) {
8862

8963
engine_graph <- engine$graph
9064

@@ -129,15 +103,7 @@ direction_fetch_internal <- function(engine,
129103
filter(purrr::map_lgl(category, ~ any(.x %in% categories)) | id %in% node_ids)
130104
}
131105

132-
# the logic above drops unused query nodes, but we can keep them if desired
133-
# to do so we drop all the edges in the query graph, and join the result with new_edges
134-
if(!drop_unused_query_nodes) {
135-
query_no_edges <- g %>%
136-
activate(edges) %>%
137-
filter(FALSE)
138-
139-
suppressMessages(new_edges <- kg_join(query_no_edges, new_edges), classes = "message") # suppress joining info
140-
}
106+
suppressMessages(new_edges <- kg_join(g, new_edges), classes = "message") # suppress joining info
141107

142108
return(new_edges)
143109
}
@@ -152,8 +118,7 @@ expand_file_engine <- function(engine,
152118
direction = "both",
153119
predicates = NULL,
154120
categories = NULL,
155-
transitive = FALSE,
156-
drop_unused_query_nodes = FALSE) {
121+
transitive = FALSE) {
157122

158123
assert_that(is.tbl_graph(graph))
159124
assert_that(direction %in% c("in", "out", "both"))
@@ -167,14 +132,14 @@ expand_file_engine <- function(engine,
167132
stop("Transitive closure requires exactly one specified predicate.")
168133

169134
} else if(transitive) {
170-
new_edges <- transitive_query_internal(engine, graph, direction, predicates, categories, drop_unused_query_nodes)
135+
new_edges <- transitive_query_internal(engine, graph, direction, predicates, categories)
171136

172137
} else {
173138
if(direction == "out" || direction == "in") {
174-
new_edges <- direction_fetch_internal(engine, graph, direction, predicates, categories, drop_unused_query_nodes)
139+
new_edges <- direction_fetch_internal(engine, graph, direction, predicates, categories)
175140
} else if(direction == "both") {
176-
new_out_edges <- direction_fetch_internal(engine, graph, "out", predicates, categories, drop_unused_query_nodes)
177-
new_in_edges <- direction_fetch_internal(engine, graph, "in", predicates, categories, drop_unused_query_nodes)
141+
new_out_edges <- direction_fetch_internal(engine, graph, "out", predicates, categories)
142+
new_in_edges <- direction_fetch_internal(engine, graph, "in", predicates, categories)
178143
suppressMessages(new_edges <- kg_join(new_out_edges, new_in_edges), classes = "message") # suppress joining info
179144
}
180145
}

R/expand_n.R

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
#' @inheritParams expand
1212
#' @param transitive NULL (not used in this function).
1313
#'
14-
#' @return A `tbl_kgx()` graph
14+
#' @return
15+
#' A `tbl_kgx()` graph
1516
#' @export
1617
#' @examples
1718
#' ## Using example KGX file packaged with monarchr
@@ -30,7 +31,6 @@ expand_n <- function(graph,
3031
predicates = NULL,
3132
categories = NULL,
3233
transitive = NULL,
33-
drop_unused_query_nodes = FALSE,
3434
n=1,
3535
...) {
3636
## Check args
@@ -63,7 +63,6 @@ expand_n <- function(graph,
6363
predicates = check_len(predicates,n,i),
6464
categories = check_len(categories,n,i),
6565
transitive = FALSE,
66-
drop_unused_query_nodes = check_len(drop_unused_query_nodes,n,i),
6766
...)
6867
if(return_each) graph_list[[paste0("iteration",i)]] <- graph
6968
message(paste(

R/expand_neo4j_engine.R

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ expand_neo4j_engine <- function(engine,
77
predicates = NULL,
88
categories = NULL,
99
transitive = FALSE,
10-
drop_unused_query_nodes = FALSE,
1110
page_size = 1000,
1211
limit = NULL) {
1312
## Sanity checks
@@ -184,11 +183,7 @@ expand_neo4j_engine <- function(engine,
184183
tidygraph::activate(nodes) %>%
185184
mutate(pcategory = normalize_categories(category, prefs$category_priority))
186185

187-
# if drop_unused_query_nodes is FALSE, we'll keep them by
188-
# joining the result with the original graph
189-
if(!drop_unused_query_nodes) {
190-
suppressMessages(result_cumulative <- kg_join(graph, result_cumulative), classes = "message") # suppress joining info
191-
}
186+
suppressMessages(result_cumulative <- kg_join(graph, result_cumulative), classes = "message") # suppress joining info
192187

193188
attr(result_cumulative, "last_engine") <- engine
194189
return(result_cumulative)

R/fetch_nodes.neo4j_engine.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ fetch_nodes.neo4j_engine <- function(engine, ..., query_ids = NULL, page_size =
6363
query <- "MATCH (n) WHERE n.id IN $id"
6464
params <- list(id = query_ids)
6565
} else {
66-
query <- paste0("MATCH (n) WHERE ", generate_cypher_conditionals(...))
66+
query <- paste0("MATCH (n) WHERE (", generate_cypher_conditionals(...), ")")
6767
params <- list()
6868
}
6969

@@ -112,6 +112,7 @@ fetch_nodes.neo4j_engine <- function(engine, ..., query_ids = NULL, page_size =
112112
if(last_result_size > 0) {
113113
total_nodes_fetched <- total_nodes_fetched + last_result_size
114114
last_max_node_id <- max(nodes(result)$id)
115+
115116
suppressMessages(result_cumulative <- graph_join(result_cumulative, result), class = "message")
116117
message(paste("Fetching; fetched", total_nodes_fetched, "of", total_results))
117118
}

R/graph_centrality.R

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#' @inheritParams nodes
1111
#' @returns Graph object with centrality added as a new node attribute.
1212
#' @export
13+
#' @importFrom tidygraph active
14+
#' @importFrom tidygraph activate
1315
#' @examples
1416
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
1517
#' g <- file_engine(filename) |>
@@ -23,9 +25,11 @@ graph_centrality <- function(graph,
2325
fun=igraph::harmonic_centrality,
2426
col="centrality",
2527
...){
28+
active_tbl <- active(graph)
2629
message("Computing node centrality.")
2730
graph <- graph|>
2831
activate(nodes)|>
29-
dplyr::mutate(!!col:=fun(graph, ...))
32+
dplyr::mutate(!!col:=fun(graph, ...)) |>
33+
activate(!!rlang::sym(active_tbl))
3034
return(graph)
3135
}

0 commit comments

Comments
 (0)