Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Type: Package
Title: Monarch Knowledge Graph Queries
Description: R package for easy access, manipulation, and analysis of
Monarch KG data Resources.
Version: 1.7
Version: 2.0
URL: https://github.com/monarch-initiative/monarchr
BugReports: https://github.com/monarch-initiative/monarchr/issues
Authors@R:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ importFrom(stringr,str_replace_all)
importFrom(stringr,str_wrap)
importFrom(tibble,tibble)
importFrom(tidygraph,activate)
importFrom(tidygraph,active)
importFrom(tidygraph,as_tibble)
importFrom(tidygraph,graph_join)
importFrom(tidygraph,tbl_graph)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# monarchr 2.0

## Breaking changes

* This breaking release drops support for `drop_unused_query_nodes` in `expand()`, which was both brittle and violated the rule that expansion should return a supergraph of the query

## Bug fixes

* Fixes a bug in `expand()` for `neo4j_engine()` causing an infinite loop.

# monarchr 1.7

## New features
Expand Down
2 changes: 0 additions & 2 deletions R/expand.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#' @param predicates A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered.
#' @param categories A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched.
#' @param transitive If TRUE, include transitive closure of the neighborhood. Default is FALSE. Useful in combination with predicates like `biolink:subclass_of`.
#' @param drop_unused_query_nodes If TRUE, remove query nodes from the result, unless they are at the neighborhood boundary, i.e., required for connecting to the result nodes. Default is FALSE.
#' @param ... Other parameters passed to methods.
#'
#' @return A `tbl_kgx()` graph
Expand Down Expand Up @@ -59,7 +58,6 @@ expand <- function(graph,
predicates = NULL,
categories = NULL,
transitive = FALSE,
drop_unused_query_nodes = FALSE,

...) {
UseMethod("expand")
Expand Down
9 changes: 6 additions & 3 deletions R/expand.tbl_kgx.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@
#' @importFrom assertthat assert_that
expand.tbl_kgx <- function(graph, ...) {
# check to see if g has a last_engine attribute
active_tbl <- active(graph)
if(!is.null(attr(graph, "last_engine"))) {
engine <- attr(graph, "last_engine")
if(any(c("monarch_engine", "neo4j_engine") %in% class(engine))) {
return(expand_neo4j_engine(engine, graph, ...))
res <- expand_neo4j_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl))
return(res)
} else if("file_engine" %in% class(engine)) {
return(expand_file_engine(engine, graph, ...))
res <- expand_file_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl))
return(res)
} else {
stop("Error: unknown or incompatible engine.")
}
return(expand(engine, graph, ...))
# return(expand(engine, graph, ...)) # this shouldn't be reachable
} else {
stop("Error: tbl_kgx object does not have a most recent engine.")
}
Expand Down
51 changes: 8 additions & 43 deletions R/expand_file_engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,7 @@ transitive_query_internal <- function(engine,
g,
direction = "out",
predicates = NULL,
categories = NULL,
drop_unused_query_nodes = FALSE) {

# # TODO: this block will never trigger; the check is done in the main function, remove
# if(length(predicates) > 1) {
# # we call recusively on each predicate
# for(predicate in predicates) {
# g2 <- transitive_query_internal(engine,
# g,
# direction = direction,
# predicates = predicate,
# categories = categories,
# drop_unused_query_nodes = TRUE)
# suppressMessages(g <- tidygraph::graph_join(g, g2), classes = "message") # suppress joining info
# }
# }
categories = NULL) {

# assert that direction is "out" or "in"
assert_that(direction == "out" | direction == "in", msg = "Direction must be 'out' or 'in' when using transitive closure.")
Expand Down Expand Up @@ -64,16 +49,6 @@ transitive_query_internal <- function(engine,
filter(purrr::map_lgl(category, ~ any(.x %in% categories)) | id %in% query_ids)
}

# in this logic, unused query nodes (those without any connection) are kept by default
# so we need to remove them if drop_unused_query_nodes is TRUE
# we can identify them in the result as those with no connected edges
if(drop_unused_query_nodes) {
bfs_edges <- bfs_result %>% activate(edges) %>% as_tibble()
bfs_nodes <- c(bfs_edges$object, bfs_edges$subject)
bfs_result <- bfs_result %>%
filter(id %in% bfs_nodes)
}

attr(bfs_result, "last_engine") <- engine
return(bfs_result)
}
Expand All @@ -83,8 +58,7 @@ direction_fetch_internal <- function(engine,
g,
direction = "out",
predicates = NULL,
categories = NULL,
drop_unused_query_nodes = FALSE) {
categories = NULL) {

engine_graph <- engine$graph

Expand Down Expand Up @@ -129,15 +103,7 @@ direction_fetch_internal <- function(engine,
filter(purrr::map_lgl(category, ~ any(.x %in% categories)) | id %in% node_ids)
}

# the logic above drops unused query nodes, but we can keep them if desired
# to do so we drop all the edges in the query graph, and join the result with new_edges
if(!drop_unused_query_nodes) {
query_no_edges <- g %>%
activate(edges) %>%
filter(FALSE)

suppressMessages(new_edges <- kg_join(query_no_edges, new_edges), classes = "message") # suppress joining info
}
suppressMessages(new_edges <- kg_join(g, new_edges), classes = "message") # suppress joining info

return(new_edges)
}
Expand All @@ -152,8 +118,7 @@ expand_file_engine <- function(engine,
direction = "both",
predicates = NULL,
categories = NULL,
transitive = FALSE,
drop_unused_query_nodes = FALSE) {
transitive = FALSE) {

assert_that(is.tbl_graph(graph))
assert_that(direction %in% c("in", "out", "both"))
Expand All @@ -167,14 +132,14 @@ expand_file_engine <- function(engine,
stop("Transitive closure requires exactly one specified predicate.")

} else if(transitive) {
new_edges <- transitive_query_internal(engine, graph, direction, predicates, categories, drop_unused_query_nodes)
new_edges <- transitive_query_internal(engine, graph, direction, predicates, categories)

} else {
if(direction == "out" || direction == "in") {
new_edges <- direction_fetch_internal(engine, graph, direction, predicates, categories, drop_unused_query_nodes)
new_edges <- direction_fetch_internal(engine, graph, direction, predicates, categories)
} else if(direction == "both") {
new_out_edges <- direction_fetch_internal(engine, graph, "out", predicates, categories, drop_unused_query_nodes)
new_in_edges <- direction_fetch_internal(engine, graph, "in", predicates, categories, drop_unused_query_nodes)
new_out_edges <- direction_fetch_internal(engine, graph, "out", predicates, categories)
new_in_edges <- direction_fetch_internal(engine, graph, "in", predicates, categories)
suppressMessages(new_edges <- kg_join(new_out_edges, new_in_edges), classes = "message") # suppress joining info
}
}
Expand Down
5 changes: 2 additions & 3 deletions R/expand_n.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
#' @inheritParams expand
#' @param transitive NULL (not used in this function).
#'
#' @return A `tbl_kgx()` graph
#' @return
#' A `tbl_kgx()` graph
#' @export
#' @examples
#' ## Using example KGX file packaged with monarchr
Expand All @@ -30,7 +31,6 @@ expand_n <- function(graph,
predicates = NULL,
categories = NULL,
transitive = NULL,
drop_unused_query_nodes = FALSE,
n=1,
...) {
## Check args
Expand Down Expand Up @@ -63,7 +63,6 @@ expand_n <- function(graph,
predicates = check_len(predicates,n,i),
categories = check_len(categories,n,i),
transitive = FALSE,
drop_unused_query_nodes = check_len(drop_unused_query_nodes,n,i),
...)
if(return_each) graph_list[[paste0("iteration",i)]] <- graph
message(paste(
Expand Down
7 changes: 1 addition & 6 deletions R/expand_neo4j_engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ expand_neo4j_engine <- function(engine,
predicates = NULL,
categories = NULL,
transitive = FALSE,
drop_unused_query_nodes = FALSE,
page_size = 1000,
limit = NULL) {
## Sanity checks
Expand Down Expand Up @@ -184,11 +183,7 @@ expand_neo4j_engine <- function(engine,
tidygraph::activate(nodes) %>%
mutate(pcategory = normalize_categories(category, prefs$category_priority))

# if drop_unused_query_nodes is FALSE, we'll keep them by
# joining the result with the original graph
if(!drop_unused_query_nodes) {
suppressMessages(result_cumulative <- kg_join(graph, result_cumulative), classes = "message") # suppress joining info
}
suppressMessages(result_cumulative <- kg_join(graph, result_cumulative), classes = "message") # suppress joining info

attr(result_cumulative, "last_engine") <- engine
return(result_cumulative)
Expand Down
3 changes: 2 additions & 1 deletion R/fetch_nodes.neo4j_engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ fetch_nodes.neo4j_engine <- function(engine, ..., query_ids = NULL, page_size =
query <- "MATCH (n) WHERE n.id IN $id"
params <- list(id = query_ids)
} else {
query <- paste0("MATCH (n) WHERE ", generate_cypher_conditionals(...))
query <- paste0("MATCH (n) WHERE (", generate_cypher_conditionals(...), ")")
params <- list()
}

Expand Down Expand Up @@ -112,6 +112,7 @@ fetch_nodes.neo4j_engine <- function(engine, ..., query_ids = NULL, page_size =
if(last_result_size > 0) {
total_nodes_fetched <- total_nodes_fetched + last_result_size
last_max_node_id <- max(nodes(result)$id)

suppressMessages(result_cumulative <- graph_join(result_cumulative, result), class = "message")
message(paste("Fetching; fetched", total_nodes_fetched, "of", total_results))
}
Expand Down
6 changes: 5 additions & 1 deletion R/graph_centrality.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#' @inheritParams nodes
#' @returns Graph object with centrality added as a new node attribute.
#' @export
#' @importFrom tidygraph active
#' @importFrom tidygraph activate
#' @examples
#' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
#' g <- file_engine(filename) |>
Expand All @@ -23,9 +25,11 @@ graph_centrality <- function(graph,
fun=igraph::harmonic_centrality,
col="centrality",
...){
active_tbl <- active(graph)
message("Computing node centrality.")
graph <- graph|>
activate(nodes)|>
dplyr::mutate(!!col:=fun(graph, ...))
dplyr::mutate(!!col:=fun(graph, ...)) |>
activate(!!rlang::sym(active_tbl))
return(graph)
}
7 changes: 6 additions & 1 deletion R/graph_semsim.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#' @param sparse Return a sparse matrix instead of a dense matrix.
#' @param ... Additional arguments passed to the similarity function
#' (\code{fun}).
#' @import tidygraph
#' @import dplyr
#' @inheritParams nodes
#' @returns Graph object with similarity added as a new edge attribute.
#' @export
Expand All @@ -29,6 +31,7 @@ graph_semsim <- function(graph,
sparse=TRUE,
return_matrix=FALSE,
...){
active_tbl <- active(graph)
from <- to <- NULL;
message("Computing pairwise node similarity.")
X <- fun(graph, ...)
Expand All @@ -41,6 +44,8 @@ graph_semsim <- function(graph,

graph <- graph|>
activate(edges)|>
dplyr::mutate(!!col:=purrr::map2_dbl(from, to, ~ X[.y, .x]))
dplyr::mutate(!!col:=purrr::map2_dbl(from, to, ~ X[.y, .x])) |>
activate(!!rlang::sym(active_tbl))

return(graph)
}
5 changes: 4 additions & 1 deletion R/kg_edge_weights.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param encodings A list of named lists of encoding values for
#' different edge attributes.
#' @inheritParams nodes
#' @import tidygraph
#' @import dplyr
#' @export
#' @examples
Expand All @@ -25,6 +26,7 @@ kg_edge_weights <- function(graph,
encodings=monarch_edge_weight_encodings(),
fun=function(x){rowSums(x, na.rm = TRUE)}
){
active_tbl <- active(graph)
encoded_cols <- c()
for(key in names(encodings)){
nm_encoded <- paste0(key,"_encoded")
Expand Down Expand Up @@ -65,7 +67,8 @@ kg_edge_weights <- function(graph,
mutate(
across(all_of(encoded_cols),
~(min(.x, na.rm = TRUE)) / (max(.x, na.rm = TRUE))
))
)) |>
activate(!!rlang::sym(active_tbl))
}
igraph::E(graph)$weight <- fun(edges(graph)[,unique(encoded_cols)])
return(graph)
Expand Down
8 changes: 7 additions & 1 deletion R/transitive_closure.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ transitive_closure <- function(g, predicate = "biolink:subclass_of") {
if(length(predicate) != 1) {
stop("Error: predicate parameter of transitive_closure() must be length 1.")
}
# if there are no edges to close, return the input
p <- predicate
if(nrow(edges(g) |> filter(predicate == p)) == 0) {return(g)}

active_tbl <- active(g)

with_downstream <- g |>
activate(nodes) |>
Expand Down Expand Up @@ -56,7 +61,8 @@ transitive_closure <- function(g, predicate = "biolink:subclass_of") {
res <- g |>
tidygraph::bind_edges(new_edges) |>
activate(edges) |>
select(-edge_key)
select(-edge_key) |>
activate(!!rlang::sym(active_tbl))

res
}
2 changes: 2 additions & 0 deletions R/transitive_reduction.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#' @export
transitive_reduction <- function(g, predicate = "biolink:subclass_of") {
# first we make a copy
active_tbl <- active(g)
g2 <- g

# in the original, remove the predicate edges
Expand All @@ -56,6 +57,7 @@ transitive_reduction <- function(g, predicate = "biolink:subclass_of") {

# merge the original w g_reduced, adding back just the reduction edges
suppressMessages(g <- kg_join(g, g_reduced), classes = "message") # suppress joining info
g <- g |> activate(!!rlang::sym(active_tbl))

return(g)
}
3 changes: 0 additions & 3 deletions man/expand.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions man/expand_n.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading