From 25f0193ea0d3105292183914596e433d6d344ba9 Mon Sep 17 00:00:00 2001 From: schochastics Date: Sat, 5 Apr 2025 20:28:33 +0200 Subject: [PATCH 01/36] added support for netzschleuder --- NAMESPACE | 2 + R/netzschleuder.R | 80 +++++++++++++++++++++++++++++++++ man/graph_from_netzschleuder.Rd | 24 ++++++++++ man/read_from_netzschleuder.Rd | 23 ++++++++++ 4 files changed, 129 insertions(+) create mode 100644 R/netzschleuder.R create mode 100644 man/graph_from_netzschleuder.Rd create mode 100644 man/read_from_netzschleuder.Rd diff --git a/NAMESPACE b/NAMESPACE index 6c0dbb1..088d600 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand +export(graph_from_netzschleuder) export(lesmis_gml) export(lesmis_graphml) export(lesmis_pajek) +export(read_from_netzschleuder) importFrom(igraph,vcount) diff --git a/R/netzschleuder.R b/R/netzschleuder.R new file mode 100644 index 0000000..8bd7090 --- /dev/null +++ b/R/netzschleuder.R @@ -0,0 +1,80 @@ +#' Download a graph from the Netzschleuder data catalogue +#' Netzschleuder () is a large online repository for +#' network datasets with the aim of aiding scientific research. +#' @param name character. name of the network dataset. +#' @param net character. If the dataset contains several networks this is the network name. +#' @return a named list containing an edge list and node attribute data frame and some metadata +#' @export +read_from_netzschleuder <- function(name, net = NULL) { + if (is.null(net)) { + net <- name + } + zip_url <- paste0( + "https://networks.skewed.de/net/", + name, + "/files/", + net, + ".csv.zip" + ) + + temp <- tempfile() + utils::download.file(zip_url, temp, quiet = TRUE) #TODO: add better error handling + zip_contents <- utils::unzip(temp, list = TRUE) + + edge_file_name <- zip_contents$Name[grepl("edge", zip_contents$Name)] + node_file_name <- zip_contents$Name[grepl("node", zip_contents$Name)] + meta_file_name <- zip_contents$Name[grepl("gprops", zip_contents$Name)] + + edges_df <- utils::read.csv(unz(temp, edge_file_name)) + 1 + names(edges_df)[c(1, 2)] <- c("from", "to") + + nodes_df <- utils::read.csv(unz(temp, node_file_name)) + names(nodes_df)[1] <- "id" + nodes_df$id <- nodes_df$id + 1 + if ("X_pos" %in% names(nodes_df)) { + pos_array <- gsub("array\\(\\[|\\]|\\)", "", nodes_df[["X_pos"]]) + split_coords <- strsplit(pos_array, ",") + + x_vals <- sapply(split_coords, function(x) as.numeric(trimws(x[1]))) + y_vals <- sapply(split_coords, function(x) as.numeric(trimws(x[2]))) + + nodes_df[["X_pos"]] <- NULL + nodes_df$x <- x_vals + nodes_df$y <- y_vals + } + + meta_df <- utils::read.csv(unz(temp, meta_file_name)) + on.exit(unlink(temp)) + + list(nodes = nodes_df, edges = edges_df, meta = meta_df) +} + +#' Create a graph from the Netzschleuder data catalogue +#' +#' Netzschleuder () is a large online repository for +#' network datasets with the aim of aiding scientific research. +#' @param name character. name of the network dataset. +#' @param net character. If the dataset contains several networks this is the network name. +#' @param directed logical. Whether a directed graph is constructed. +#' @param bipartite logical. Whether a bipartite graph is constructed. +#' @return a new graph object. +#' @export +graph_from_netzschleuder <- function( + name, + net = NULL, + directed = FALSE, + bipartite = FALSE +) { + graph_data <- read_from_netzschleuder(name, net) + g <- igraph::graph_from_data_frame( + graph_data$edges, + directed = directed, + vertices = graph_data$nodes + ) + if (bipartite) { + types <- rep(FALSE, igraph::vcount(g)) + types[graph_data$nodes$id %in% graph_data$edges[, 1]] <- TRUE + g <- igraph::set_vertex_attr(g, "type", value = types) + } + g +} diff --git a/man/graph_from_netzschleuder.Rd b/man/graph_from_netzschleuder.Rd new file mode 100644 index 0000000..2719ca3 --- /dev/null +++ b/man/graph_from_netzschleuder.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/netzschleuder.R +\name{graph_from_netzschleuder} +\alias{graph_from_netzschleuder} +\title{Create a graph from the Netzschleuder data catalogue} +\usage{ +graph_from_netzschleuder(name, net = NULL, directed = FALSE, bipartite = FALSE) +} +\arguments{ +\item{name}{character. name of the network dataset.} + +\item{net}{character. If the dataset contains several networks this is the network name.} + +\item{directed}{logical. Whether a directed graph is constructed.} + +\item{bipartite}{logical. Whether a bipartite graph is constructed.} +} +\value{ +a new graph object. +} +\description{ +Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for +network datasets with the aim of aiding scientific research. +} diff --git a/man/read_from_netzschleuder.Rd b/man/read_from_netzschleuder.Rd new file mode 100644 index 0000000..df2ce08 --- /dev/null +++ b/man/read_from_netzschleuder.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/netzschleuder.R +\name{read_from_netzschleuder} +\alias{read_from_netzschleuder} +\title{Download a graph from the Netzschleuder data catalogue +Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for +network datasets with the aim of aiding scientific research.} +\usage{ +read_from_netzschleuder(name, net = NULL) +} +\arguments{ +\item{name}{character. name of the network dataset.} + +\item{net}{character. If the dataset contains several networks this is the network name.} +} +\value{ +a named list containing an edge list and node attribute data frame and some metadata +} +\description{ +Download a graph from the Netzschleuder data catalogue +Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for +network datasets with the aim of aiding scientific research. +} From 67ec488a779a71872cba6e53c6f7d04e34fa2713 Mon Sep 17 00:00:00 2001 From: schochastics Date: Sat, 5 Apr 2025 18:32:02 +0000 Subject: [PATCH 02/36] chore: Auto-update from GitHub Actions Run: https://github.com/igraph/igraphdata/actions/runs/14284649633 --- .github/dep-suggests-matrix.json | 0 .github/versions-matrix.json | 1 - 2 files changed, 1 deletion(-) delete mode 100644 .github/dep-suggests-matrix.json delete mode 100644 .github/versions-matrix.json diff --git a/.github/dep-suggests-matrix.json b/.github/dep-suggests-matrix.json deleted file mode 100644 index e69de29..0000000 diff --git a/.github/versions-matrix.json b/.github/versions-matrix.json deleted file mode 100644 index 5af4434..0000000 --- a/.github/versions-matrix.json +++ /dev/null @@ -1 +0,0 @@ -{"include":[{"os":"macos-latest","r":"4.4"},{"os":"macos-latest","r":"4.3"},{"os":"windows-latest","r":"devel"},{"os":"windows-latest","r":"4.4"},{"os":"windows-latest","r":"4.3"},{"os":"ubuntu-22.04","r":"devel","http-user-agent":"release"},{"os":"ubuntu-22.04","r":"4.4"},{"os":"ubuntu-22.04","r":"4.3"},{"os":"ubuntu-22.04","r":"4.2"},{"os":"ubuntu-22.04","r":"4.1"},{"os":"ubuntu-22.04","r":"4.0"},{"os":"ubuntu-22.04","r":"4.4","covr":"true","desc":"with covr"}]} From 5c5895d1a630d55245ac3a541c1a6908901f3451 Mon Sep 17 00:00:00 2001 From: schochastics Date: Sat, 5 Apr 2025 20:54:29 +0200 Subject: [PATCH 03/36] better source/target handling --- R/netzschleuder.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 8bd7090..f14dacb 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -25,8 +25,13 @@ read_from_netzschleuder <- function(name, net = NULL) { node_file_name <- zip_contents$Name[grepl("node", zip_contents$Name)] meta_file_name <- zip_contents$Name[grepl("gprops", zip_contents$Name)] - edges_df <- utils::read.csv(unz(temp, edge_file_name)) + 1 - names(edges_df)[c(1, 2)] <- c("from", "to") + edges_df <- utils::read.csv(unz(temp, edge_file_name)) + source_loc <- grep("source", names(edges_df)) + target_loc <- grep("target", names(edges_df)) + + edges_df[[source_loc]] <- edges_df[[source_loc]] + 1 + edges_df[[target_loc]] <- edges_df[[target_loc]] + 1 + names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") nodes_df <- utils::read.csv(unz(temp, node_file_name)) names(nodes_df)[1] <- "id" From 233b9e00d79fd88c91804e8f9a1c535701f7df97 Mon Sep 17 00:00:00 2001 From: schochastics Date: Sun, 6 Apr 2025 20:12:13 +0200 Subject: [PATCH 04/36] added token for restricted networks --- R/netzschleuder.R | 9 +++++++-- man/read_from_netzschleuder.Rd | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index f14dacb..6eea858 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -3,9 +3,10 @@ #' network datasets with the aim of aiding scientific research. #' @param name character. name of the network dataset. #' @param net character. If the dataset contains several networks this is the network name. +#' @param token character. Some networks have restricted access and need a toke. See #' @return a named list containing an edge list and node attribute data frame and some metadata #' @export -read_from_netzschleuder <- function(name, net = NULL) { +read_from_netzschleuder <- function(name, net = NULL, token = NULL) { if (is.null(net)) { net <- name } @@ -18,7 +19,11 @@ read_from_netzschleuder <- function(name, net = NULL) { ) temp <- tempfile() - utils::download.file(zip_url, temp, quiet = TRUE) #TODO: add better error handling + headers <- NULL + if (!is.null(token)) { + headers <- c("WWW-Authenticate" = token) + } + utils::download.file(zip_url, temp, headers = headers, quiet = TRUE) #TODO: add better error handling zip_contents <- utils::unzip(temp, list = TRUE) edge_file_name <- zip_contents$Name[grepl("edge", zip_contents$Name)] diff --git a/man/read_from_netzschleuder.Rd b/man/read_from_netzschleuder.Rd index df2ce08..44cda19 100644 --- a/man/read_from_netzschleuder.Rd +++ b/man/read_from_netzschleuder.Rd @@ -6,12 +6,14 @@ Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for network datasets with the aim of aiding scientific research.} \usage{ -read_from_netzschleuder(name, net = NULL) +read_from_netzschleuder(name, net = NULL, token = NULL) } \arguments{ \item{name}{character. name of the network dataset.} \item{net}{character. If the dataset contains several networks this is the network name.} + +\item{token}{character. Some networks have restricted access and need a toke. See \url{https://networks.skewed.de/restricted}} } \value{ a named list containing an edge list and node attribute data frame and some metadata From e521a71619d8133d2514bab4d47e1dd93f47a704 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 7 Apr 2025 08:16:47 +0200 Subject: [PATCH 05/36] added minty for type checking --- DESCRIPTION | 3 ++- R/netzschleuder.R | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 73ea12d..d2ffb92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,8 @@ BugReports: https://github.com/igraph/igraphdata/issues Depends: R (>= 2.10) Imports: - igraph (>= 1.5.0) + igraph (>= 1.5.0), + minty Suggests: testthat (>= 3.0.0) Encoding: UTF-8 diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 6eea858..9759c0e 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -30,7 +30,7 @@ read_from_netzschleuder <- function(name, net = NULL, token = NULL) { node_file_name <- zip_contents$Name[grepl("node", zip_contents$Name)] meta_file_name <- zip_contents$Name[grepl("gprops", zip_contents$Name)] - edges_df <- utils::read.csv(unz(temp, edge_file_name)) + edges_df <- minty::type_convert(utils::read.csv(unz(temp, edge_file_name))) source_loc <- grep("source", names(edges_df)) target_loc <- grep("target", names(edges_df)) @@ -38,7 +38,7 @@ read_from_netzschleuder <- function(name, net = NULL, token = NULL) { edges_df[[target_loc]] <- edges_df[[target_loc]] + 1 names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") - nodes_df <- utils::read.csv(unz(temp, node_file_name)) + nodes_df <- minty::type_convert(utils::read.csv(unz(temp, node_file_name))) names(nodes_df)[1] <- "id" nodes_df$id <- nodes_df$id + 1 if ("X_pos" %in% names(nodes_df)) { From 4b07fc2df3e2a99746bac73129b0d6bd55587d33 Mon Sep 17 00:00:00 2001 From: David Schoch Date: Thu, 10 Apr 2025 13:19:50 +0200 Subject: [PATCH 06/36] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Maëlle Salmon --- R/netzschleuder.R | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 9759c0e..797fbda 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -6,7 +6,7 @@ #' @param token character. Some networks have restricted access and need a toke. See #' @return a named list containing an edge list and node attribute data frame and some metadata #' @export -read_from_netzschleuder <- function(name, net = NULL, token = NULL) { +read_from_netzschleuder <- function(name, ..., net = NULL, token = NULL) { if (is.null(net)) { net <- name } @@ -26,7 +26,7 @@ read_from_netzschleuder <- function(name, net = NULL, token = NULL) { utils::download.file(zip_url, temp, headers = headers, quiet = TRUE) #TODO: add better error handling zip_contents <- utils::unzip(temp, list = TRUE) - edge_file_name <- zip_contents$Name[grepl("edge", zip_contents$Name)] + edge_file_name <- grep("edge", zip_contents$Name, value = TRUE) node_file_name <- zip_contents$Name[grepl("node", zip_contents$Name)] meta_file_name <- zip_contents$Name[grepl("gprops", zip_contents$Name)] @@ -54,6 +54,7 @@ read_from_netzschleuder <- function(name, net = NULL, token = NULL) { } meta_df <- utils::read.csv(unz(temp, meta_file_name)) + on.exit(unlink(temp)) list(nodes = nodes_df, edges = edges_df, meta = meta_df) @@ -81,10 +82,12 @@ graph_from_netzschleuder <- function( directed = directed, vertices = graph_data$nodes ) + if (bipartite) { types <- rep(FALSE, igraph::vcount(g)) types[graph_data$nodes$id %in% graph_data$edges[, 1]] <- TRUE g <- igraph::set_vertex_attr(g, "type", value = types) } + g } From 33ae5edf99f2fee25a49fc566b764dd275d20f42 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 11:22:53 +0000 Subject: [PATCH 07/36] chore: Auto-update from GitHub Actions Run: https://github.com/igraph/igraphdata/actions/runs/14379148488 --- man/read_from_netzschleuder.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/read_from_netzschleuder.Rd b/man/read_from_netzschleuder.Rd index 44cda19..6592fd9 100644 --- a/man/read_from_netzschleuder.Rd +++ b/man/read_from_netzschleuder.Rd @@ -6,7 +6,7 @@ Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for network datasets with the aim of aiding scientific research.} \usage{ -read_from_netzschleuder(name, net = NULL, token = NULL) +read_from_netzschleuder(name, ..., net = NULL, token = NULL) } \arguments{ \item{name}{character. name of the network dataset.} From db8b0dbbc0eb3e274952f58d8628531474deda68 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 13:29:11 +0200 Subject: [PATCH 08/36] changes of review --- R/netzschleuder.R | 39 ++++++++++++++++++--------------- man/graph_from_netzschleuder.Rd | 10 +++++++-- man/read_from_netzschleuder.Rd | 4 ++-- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 797fbda..59a278a 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -2,20 +2,18 @@ #' Netzschleuder () is a large online repository for #' network datasets with the aim of aiding scientific research. #' @param name character. name of the network dataset. -#' @param net character. If the dataset contains several networks this is the network name. +#' @param network character. If the dataset contains several networks this is the network name. #' @param token character. Some networks have restricted access and need a toke. See #' @return a named list containing an edge list and node attribute data frame and some metadata #' @export -read_from_netzschleuder <- function(name, ..., net = NULL, token = NULL) { - if (is.null(net)) { - net <- name +read_from_netzschleuder <- function(name, ..., network = NULL, token = NULL) { + if (is.null(network)) { + network <- name } - zip_url <- paste0( - "https://networks.skewed.de/net/", + zip_url <- sprintf( + "https://networks.skewed.de/net/%s/files/%s.csv.zip", name, - "/files/", - net, - ".csv.zip" + network ) temp <- tempfile() @@ -27,20 +25,25 @@ read_from_netzschleuder <- function(name, ..., net = NULL, token = NULL) { zip_contents <- utils::unzip(temp, list = TRUE) edge_file_name <- grep("edge", zip_contents$Name, value = TRUE) - node_file_name <- zip_contents$Name[grepl("node", zip_contents$Name)] - meta_file_name <- zip_contents$Name[grepl("gprops", zip_contents$Name)] + node_file_name <- grep("node", zip_contents$Name, value = TRUE) + meta_file_name <- grep("gprops", zip_contents$Name, value = TRUE) - edges_df <- minty::type_convert(utils::read.csv(unz(temp, edge_file_name))) + edges_df_raw <- utils::read.csv(unz(temp, edge_file_name)) + edges_df <- minty::type_convert(edges_df_raw) source_loc <- grep("source", names(edges_df)) target_loc <- grep("target", names(edges_df)) + # netzschleuder uses 0-indexing, igraph uses 1-indexing edges_df[[source_loc]] <- edges_df[[source_loc]] + 1 edges_df[[target_loc]] <- edges_df[[target_loc]] + 1 names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") - nodes_df <- minty::type_convert(utils::read.csv(unz(temp, node_file_name))) + nodes_df_raw <- utils::read.csv(unz(temp, node_file_name)) + nodes_df <- minty::type_convert(nodes_df_raw) names(nodes_df)[1] <- "id" - nodes_df$id <- nodes_df$id + 1 + + # netzschleuder uses 0-indexing, igraph uses 1-indexing + nodes_df[[id]] <- nodes_df[[id]] + 1 if ("X_pos" %in% names(nodes_df)) { pos_array <- gsub("array\\(\\[|\\]|\\)", "", nodes_df[["X_pos"]]) split_coords <- strsplit(pos_array, ",") @@ -64,19 +67,19 @@ read_from_netzschleuder <- function(name, ..., net = NULL, token = NULL) { #' #' Netzschleuder () is a large online repository for #' network datasets with the aim of aiding scientific research. -#' @param name character. name of the network dataset. -#' @param net character. If the dataset contains several networks this is the network name. +#' @inheritParams read_from_netzschleuder #' @param directed logical. Whether a directed graph is constructed. #' @param bipartite logical. Whether a bipartite graph is constructed. #' @return a new graph object. #' @export graph_from_netzschleuder <- function( name, - net = NULL, + ..., + network = NULL, directed = FALSE, bipartite = FALSE ) { - graph_data <- read_from_netzschleuder(name, net) + graph_data <- read_from_netzschleuder(name, network) g <- igraph::graph_from_data_frame( graph_data$edges, directed = directed, diff --git a/man/graph_from_netzschleuder.Rd b/man/graph_from_netzschleuder.Rd index 2719ca3..db0030c 100644 --- a/man/graph_from_netzschleuder.Rd +++ b/man/graph_from_netzschleuder.Rd @@ -4,12 +4,18 @@ \alias{graph_from_netzschleuder} \title{Create a graph from the Netzschleuder data catalogue} \usage{ -graph_from_netzschleuder(name, net = NULL, directed = FALSE, bipartite = FALSE) +graph_from_netzschleuder( + name, + ..., + network = NULL, + directed = FALSE, + bipartite = FALSE +) } \arguments{ \item{name}{character. name of the network dataset.} -\item{net}{character. If the dataset contains several networks this is the network name.} +\item{network}{character. If the dataset contains several networks this is the network name.} \item{directed}{logical. Whether a directed graph is constructed.} diff --git a/man/read_from_netzschleuder.Rd b/man/read_from_netzschleuder.Rd index 6592fd9..db22bce 100644 --- a/man/read_from_netzschleuder.Rd +++ b/man/read_from_netzschleuder.Rd @@ -6,12 +6,12 @@ Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for network datasets with the aim of aiding scientific research.} \usage{ -read_from_netzschleuder(name, ..., net = NULL, token = NULL) +read_from_netzschleuder(name, ..., network = NULL, token = NULL) } \arguments{ \item{name}{character. name of the network dataset.} -\item{net}{character. If the dataset contains several networks this is the network name.} +\item{network}{character. If the dataset contains several networks this is the network name.} \item{token}{character. Some networks have restricted access and need a toke. See \url{https://networks.skewed.de/restricted}} } From 51046b97ddc26e602a47be1b8d1a093e078e4abe Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 13:54:03 +0200 Subject: [PATCH 09/36] added missing quotes --- R/netzschleuder.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 59a278a..10b322e 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -43,7 +43,7 @@ read_from_netzschleuder <- function(name, ..., network = NULL, token = NULL) { names(nodes_df)[1] <- "id" # netzschleuder uses 0-indexing, igraph uses 1-indexing - nodes_df[[id]] <- nodes_df[[id]] + 1 + nodes_df[["id"]] <- nodes_df[["id"]] + 1 if ("X_pos" %in% names(nodes_df)) { pos_array <- gsub("array\\(\\[|\\]|\\)", "", nodes_df[["X_pos"]]) split_coords <- strsplit(pos_array, ",") From d8b1f774a5578a7f63b130cb26f377b08b22689f Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 14:17:11 +0200 Subject: [PATCH 10/36] added proper httr2 requests with custom ratelimits --- DESCRIPTION | 5 +++-- R/netzschleuder.R | 39 +++++++++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d2ffb92..dbaf569 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,9 +20,10 @@ License: CC BY-SA 4.0 + file LICENSE URL: http://igraph.org BugReports: https://github.com/igraph/igraphdata/issues Depends: - R (>= 2.10) + R (>= 4.0) Imports: - igraph (>= 1.5.0), + httr2, + igraph (>= 2.0.0), minty Suggests: testthat (>= 3.0.0) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 10b322e..0308c84 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -1,3 +1,34 @@ +download_file <- function(zip_url, token = NULL, file) { + req <- httr2::request(zip_url) + req <- httr2::req_throttle(req, capacity = 20, fill_time_s = 60) + + if (!is.null(token)) { + req <- httr2::req_headers(req, `WWW-Authenticate` = token) + } + + req <- httr2::req_user_agent( + req, + "R package igraphdata (github.com/igraph/igraphdata)" + ) + + resp <- httr2::req_perform(req) + + if (httr2::resp_status(resp) != 200) { + stop("Failed to download file. Status: ", httr2::resp_status(resp)) + } + + if (!grepl("application/zip", httr2::resp_content_type(resp))) { + warning( + "Response does not look like a ZIP file (Content-Type: ", + httr2::resp_content_type(resp), + ")" + ) + } + + writeBin(httr2::resp_body_raw(resp), file) + invisible(NULL) +} + #' Download a graph from the Netzschleuder data catalogue #' Netzschleuder () is a large online repository for #' network datasets with the aim of aiding scientific research. @@ -15,13 +46,9 @@ read_from_netzschleuder <- function(name, ..., network = NULL, token = NULL) { name, network ) + temp <- tempfile(fileext = "zip") + download_file(zip_url, token = token, file = temp) - temp <- tempfile() - headers <- NULL - if (!is.null(token)) { - headers <- c("WWW-Authenticate" = token) - } - utils::download.file(zip_url, temp, headers = headers, quiet = TRUE) #TODO: add better error handling zip_contents <- utils::unzip(temp, list = TRUE) edge_file_name <- grep("edge", zip_contents$Name, value = TRUE) From bc6a34a410ad3987c35fb52d63d59b83ba200465 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 22:12:41 +0200 Subject: [PATCH 11/36] added slash notation and directed/bipartite checking via metadata --- DESCRIPTION | 4 +- R/netzschleuder.R | 87 +++++++++++++++++++++++---------- man/graph_from_netzschleuder.Rd | 12 +---- man/read_from_netzschleuder.Rd | 6 +-- 4 files changed, 67 insertions(+), 42 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dbaf569..f762671 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,9 +22,11 @@ BugReports: https://github.com/igraph/igraphdata/issues Depends: R (>= 4.0) Imports: + cli, httr2, igraph (>= 2.0.0), - minty + minty, + rlang Suggests: testthat (>= 3.0.0) Encoding: UTF-8 diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 0308c84..1d71b44 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -1,5 +1,5 @@ -download_file <- function(zip_url, token = NULL, file) { - req <- httr2::request(zip_url) +make_request <- function(url, token = NULL) { + req <- httr2::request(url) req <- httr2::req_throttle(req, capacity = 20, fill_time_s = 60) if (!is.null(token)) { @@ -17,35 +17,71 @@ download_file <- function(zip_url, token = NULL, file) { stop("Failed to download file. Status: ", httr2::resp_status(resp)) } - if (!grepl("application/zip", httr2::resp_content_type(resp))) { - warning( - "Response does not look like a ZIP file (Content-Type: ", - httr2::resp_content_type(resp), - ")" - ) + resp +} + +resolve_name <- function(x) { + if (grepl("/", x)) { + return(strsplit(x, "/", fixed = TRUE)[[1]]) + } else { + c(x, x) } +} +download_file <- function(zip_url, token = NULL, file) { + resp <- make_request(zip_url, token) writeBin(httr2::resp_body_raw(resp), file) invisible(NULL) } +meta_from_netzschleuder <- function(net_ident, call = rlang::caller_env()) { + url <- sprintf("https://networks.skewed.de/api/net/%s", net_ident[1]) + resp <- make_request(url) + raw <- httr2::resp_body_json(resp) + if (net_ident[1] == net_ident[2] && length(unlist(raw$nets)) > 1) { + cli::cli_abort( + c( + "{net_ident[1]} is a collection and downloading a whole collection is not permitted.", + "i" = "see {.url https://networks.skewed.de/net/{net_ident[1]}}" + ), + call = call + ) + } else if (net_ident[1] == net_ident[2]) { + return(raw) + } else { + idx <- which(unlist(raw[["nets"]]) == net_ident[2]) + if (length(idx) == 0) { + cli::cli_abort( + c( + "{net_ident[2]} is not part of the collection {net_ident[1]}.", + "i" = "see {.url https://networks.skewed.de/net/{net_ident[1]}}" + ), + call = call + ) + } + raw[["analyses"]] <- raw[["analyses"]][[net_ident[2]]] + raw[["nets"]] <- raw[["nets"]][idx] + raw + } +} + #' Download a graph from the Netzschleuder data catalogue #' Netzschleuder () is a large online repository for #' network datasets with the aim of aiding scientific research. -#' @param name character. name of the network dataset. -#' @param network character. If the dataset contains several networks this is the network name. +#' @param name character. name of the network dataset. To get a network from a collection, use `/`. #' @param token character. Some networks have restricted access and need a toke. See #' @return a named list containing an edge list and node attribute data frame and some metadata #' @export -read_from_netzschleuder <- function(name, ..., network = NULL, token = NULL) { - if (is.null(network)) { - network <- name - } +read_from_netzschleuder <- function(name, token = NULL) { + net_ident <- resolve_name(name) + meta <- meta_from_netzschleuder(net_ident) + zip_url <- sprintf( "https://networks.skewed.de/net/%s/files/%s.csv.zip", - name, - network + net_ident[1], + net_ident[2] ) + temp <- tempfile(fileext = "zip") download_file(zip_url, token = token, file = temp) @@ -53,7 +89,7 @@ read_from_netzschleuder <- function(name, ..., network = NULL, token = NULL) { edge_file_name <- grep("edge", zip_contents$Name, value = TRUE) node_file_name <- grep("node", zip_contents$Name, value = TRUE) - meta_file_name <- grep("gprops", zip_contents$Name, value = TRUE) + gprops_file_name <- grep("gprops", zip_contents$Name, value = TRUE) edges_df_raw <- utils::read.csv(unz(temp, edge_file_name)) edges_df <- minty::type_convert(edges_df_raw) @@ -83,11 +119,11 @@ read_from_netzschleuder <- function(name, ..., network = NULL, token = NULL) { nodes_df$y <- y_vals } - meta_df <- utils::read.csv(unz(temp, meta_file_name)) + gprops_df <- utils::read.csv(unz(temp, gprops_file_name)) on.exit(unlink(temp)) - list(nodes = nodes_df, edges = edges_df, meta = meta_df) + list(nodes = nodes_df, edges = edges_df, gprops = gprops_df, meta = meta) } #' Create a graph from the Netzschleuder data catalogue @@ -95,18 +131,15 @@ read_from_netzschleuder <- function(name, ..., network = NULL, token = NULL) { #' Netzschleuder () is a large online repository for #' network datasets with the aim of aiding scientific research. #' @inheritParams read_from_netzschleuder -#' @param directed logical. Whether a directed graph is constructed. -#' @param bipartite logical. Whether a bipartite graph is constructed. #' @return a new graph object. #' @export graph_from_netzschleuder <- function( - name, - ..., - network = NULL, - directed = FALSE, - bipartite = FALSE + name ) { - graph_data <- read_from_netzschleuder(name, network) + graph_data <- read_from_netzschleuder(name) + directed <- graph_data$meta[["analyses"]][["is_directed"]] + bipartite <- graph_data$meta[["analyses"]][["is_bipartite"]] + g <- igraph::graph_from_data_frame( graph_data$edges, directed = directed, diff --git a/man/graph_from_netzschleuder.Rd b/man/graph_from_netzschleuder.Rd index db0030c..123eb2e 100644 --- a/man/graph_from_netzschleuder.Rd +++ b/man/graph_from_netzschleuder.Rd @@ -4,18 +4,10 @@ \alias{graph_from_netzschleuder} \title{Create a graph from the Netzschleuder data catalogue} \usage{ -graph_from_netzschleuder( - name, - ..., - network = NULL, - directed = FALSE, - bipartite = FALSE -) +graph_from_netzschleuder(name, directed = FALSE, bipartite = FALSE) } \arguments{ -\item{name}{character. name of the network dataset.} - -\item{network}{character. If the dataset contains several networks this is the network name.} +\item{name}{character. name of the network dataset. To get a network from a collection, use \verb{/}.} \item{directed}{logical. Whether a directed graph is constructed.} diff --git a/man/read_from_netzschleuder.Rd b/man/read_from_netzschleuder.Rd index db22bce..4ac268d 100644 --- a/man/read_from_netzschleuder.Rd +++ b/man/read_from_netzschleuder.Rd @@ -6,12 +6,10 @@ Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for network datasets with the aim of aiding scientific research.} \usage{ -read_from_netzschleuder(name, ..., network = NULL, token = NULL) +read_from_netzschleuder(name, token = NULL) } \arguments{ -\item{name}{character. name of the network dataset.} - -\item{network}{character. If the dataset contains several networks this is the network name.} +\item{name}{character. name of the network dataset. To get a network from a collection, use \verb{/}.} \item{token}{character. Some networks have restricted access and need a toke. See \url{https://networks.skewed.de/restricted}} } From d7a0b06854dbb58dc5d1fa81f52e4cba6a7c1542 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 22:15:40 +0200 Subject: [PATCH 12/36] suppress minty warnings --- R/netzschleuder.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 1d71b44..4b075ba 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -92,7 +92,7 @@ read_from_netzschleuder <- function(name, token = NULL) { gprops_file_name <- grep("gprops", zip_contents$Name, value = TRUE) edges_df_raw <- utils::read.csv(unz(temp, edge_file_name)) - edges_df <- minty::type_convert(edges_df_raw) + edges_df <- suppressWarnings(minty::type_convert(edges_df_raw)) source_loc <- grep("source", names(edges_df)) target_loc <- grep("target", names(edges_df)) @@ -102,7 +102,7 @@ read_from_netzschleuder <- function(name, token = NULL) { names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") nodes_df_raw <- utils::read.csv(unz(temp, node_file_name)) - nodes_df <- minty::type_convert(nodes_df_raw) + nodes_df <- suppressWarnings(minty::type_convert(nodes_df_raw)) names(nodes_df)[1] <- "id" # netzschleuder uses 0-indexing, igraph uses 1-indexing From ca892642479dcff35655e2e86d029daba7c9f40c Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 22:17:09 +0200 Subject: [PATCH 13/36] docs update --- man/graph_from_netzschleuder.Rd | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/man/graph_from_netzschleuder.Rd b/man/graph_from_netzschleuder.Rd index 123eb2e..64389c9 100644 --- a/man/graph_from_netzschleuder.Rd +++ b/man/graph_from_netzschleuder.Rd @@ -4,14 +4,10 @@ \alias{graph_from_netzschleuder} \title{Create a graph from the Netzschleuder data catalogue} \usage{ -graph_from_netzschleuder(name, directed = FALSE, bipartite = FALSE) +graph_from_netzschleuder(name) } \arguments{ \item{name}{character. name of the network dataset. To get a network from a collection, use \verb{/}.} - -\item{directed}{logical. Whether a directed graph is constructed.} - -\item{bipartite}{logical. Whether a bipartite graph is constructed.} } \value{ a new graph object. From a7c2ef192607dcfcfc3383b46afeeb32eccfabff Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 10 Apr 2025 22:28:08 +0200 Subject: [PATCH 14/36] correct collection url --- R/netzschleuder.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 4b075ba..40a16bf 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -36,13 +36,14 @@ download_file <- function(zip_url, token = NULL, file) { meta_from_netzschleuder <- function(net_ident, call = rlang::caller_env()) { url <- sprintf("https://networks.skewed.de/api/net/%s", net_ident[1]) + collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) resp <- make_request(url) raw <- httr2::resp_body_json(resp) if (net_ident[1] == net_ident[2] && length(unlist(raw$nets)) > 1) { cli::cli_abort( c( "{net_ident[1]} is a collection and downloading a whole collection is not permitted.", - "i" = "see {.url https://networks.skewed.de/net/{net_ident[1]}}" + "i" = "see {.url {collection_url}}" ), call = call ) @@ -54,7 +55,7 @@ meta_from_netzschleuder <- function(net_ident, call = rlang::caller_env()) { cli::cli_abort( c( "{net_ident[2]} is not part of the collection {net_ident[1]}.", - "i" = "see {.url https://networks.skewed.de/net/{net_ident[1]}}" + "i" = "see {.url {colelction_url}}" ), call = call ) From 4b473a17635b5526b328365cb061c53293baa6f4 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 14 Apr 2025 08:51:50 +0200 Subject: [PATCH 15/36] new netzschleuder namespace --- NAMESPACE | 5 ++- R/netzschleuder.R | 66 +++++++++++++++++++++++---------- man/graph_from_netzschleuder.Rd | 18 --------- man/netzschleuder.Rd | 54 +++++++++++++++++++++++++++ man/read_from_netzschleuder.Rd | 23 ------------ 5 files changed, 103 insertions(+), 63 deletions(-) delete mode 100644 man/graph_from_netzschleuder.Rd create mode 100644 man/netzschleuder.Rd delete mode 100644 man/read_from_netzschleuder.Rd diff --git a/NAMESPACE b/NAMESPACE index 088d600..9c5c212 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,8 +1,9 @@ # Generated by roxygen2: do not edit by hand -export(graph_from_netzschleuder) export(lesmis_gml) export(lesmis_graphml) export(lesmis_pajek) -export(read_from_netzschleuder) +export(ns_df) +export(ns_graph) +export(ns_metadata) importFrom(igraph,vcount) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 40a16bf..21b4201 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -34,7 +34,45 @@ download_file <- function(zip_url, token = NULL, file) { invisible(NULL) } -meta_from_netzschleuder <- function(net_ident, call = rlang::caller_env()) { +#' Download and Convert Graph Data from Netzschleuder +#' +#' These functions provide tools to interact with the Netzschleuder network dataset archive. +#' Netzschleuder () is a large online repository for network datasets, +#' aimed at aiding scientific research. +#' \describe{ +#' \item{`ns_metadata()`}{ retrieves metadata about a network or network collection.} +#' \item{`ns_df()`}{downloads the graph data as data frames (nodes, edges, and graph properties).} +#' \item{`ns_graph()`}{creates an `igraph` object directly from Netzschleuder.} +#' } +#' +#' @param name Character. The name of the network dataset. To get a network from a collection, +#' use the format `/`. +#' @param token Character. Some networks have restricted access and require a token. +#' See . +#' +#' @return +#' \describe{ +#' \item{`ns_metadata()`}{A list containing metadata for the dataset.} +#' \item{`ns_df()`}{A named list with `nodes`, `edges`, `gprops`, and `meta`.} +#' \item{`ns_graph()`}{An `igraph` object.} +#' } +#' @examples +#' \dontrun{ +#' # Get metadata +#' ns_metadata("copenhagen/calls") +#' +#' # Download network as data frames +#' graph_data <- ns_df("copenhagen/calls") +#' +#' # Create an igraph object +#' g <- ns_graph("copenhagen/calls") +#' } +#' +#' @seealso +#' @rdname netzschleuder +#' @export +ns_metadata <- function(name, call = rlang::caller_env()) { + net_ident <- resolve_name(name) url <- sprintf("https://networks.skewed.de/api/net/%s", net_ident[1]) collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) resp <- make_request(url) @@ -55,7 +93,7 @@ meta_from_netzschleuder <- function(net_ident, call = rlang::caller_env()) { cli::cli_abort( c( "{net_ident[2]} is not part of the collection {net_ident[1]}.", - "i" = "see {.url {colelction_url}}" + "i" = "see {.url {collection_url}}" ), call = call ) @@ -66,16 +104,11 @@ meta_from_netzschleuder <- function(net_ident, call = rlang::caller_env()) { } } -#' Download a graph from the Netzschleuder data catalogue -#' Netzschleuder () is a large online repository for -#' network datasets with the aim of aiding scientific research. -#' @param name character. name of the network dataset. To get a network from a collection, use `/`. -#' @param token character. Some networks have restricted access and need a toke. See -#' @return a named list containing an edge list and node attribute data frame and some metadata +#' @rdname netzschleuder #' @export -read_from_netzschleuder <- function(name, token = NULL) { +ns_df <- function(name, token = NULL) { + meta <- ns_metadata(name) net_ident <- resolve_name(name) - meta <- meta_from_netzschleuder(net_ident) zip_url <- sprintf( "https://networks.skewed.de/net/%s/files/%s.csv.zip", @@ -127,17 +160,10 @@ read_from_netzschleuder <- function(name, token = NULL) { list(nodes = nodes_df, edges = edges_df, gprops = gprops_df, meta = meta) } -#' Create a graph from the Netzschleuder data catalogue -#' -#' Netzschleuder () is a large online repository for -#' network datasets with the aim of aiding scientific research. -#' @inheritParams read_from_netzschleuder -#' @return a new graph object. +#' @rdname netzschleuder #' @export -graph_from_netzschleuder <- function( - name -) { - graph_data <- read_from_netzschleuder(name) +ns_graph <- function(name) { + graph_data <- ns_df(name) directed <- graph_data$meta[["analyses"]][["is_directed"]] bipartite <- graph_data$meta[["analyses"]][["is_bipartite"]] diff --git a/man/graph_from_netzschleuder.Rd b/man/graph_from_netzschleuder.Rd deleted file mode 100644 index 64389c9..0000000 --- a/man/graph_from_netzschleuder.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/netzschleuder.R -\name{graph_from_netzschleuder} -\alias{graph_from_netzschleuder} -\title{Create a graph from the Netzschleuder data catalogue} -\usage{ -graph_from_netzschleuder(name) -} -\arguments{ -\item{name}{character. name of the network dataset. To get a network from a collection, use \verb{/}.} -} -\value{ -a new graph object. -} -\description{ -Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for -network datasets with the aim of aiding scientific research. -} diff --git a/man/netzschleuder.Rd b/man/netzschleuder.Rd new file mode 100644 index 0000000..ee48d49 --- /dev/null +++ b/man/netzschleuder.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/netzschleuder.R +\name{ns_metadata} +\alias{ns_metadata} +\alias{ns_df} +\alias{ns_graph} +\title{Download and Convert Graph Data from Netzschleuder} +\usage{ +ns_metadata(name, call = rlang::caller_env()) + +ns_df(name, token = NULL) + +ns_graph(name) +} +\arguments{ +\item{name}{Character. The name of the network dataset. To get a network from a collection, +use the format \verb{/}.} + +\item{token}{Character. Some networks have restricted access and require a token. +See \url{https://networks.skewed.de/restricted}.} +} +\value{ +\describe{ +\item{\code{ns_metadata()}}{A list containing metadata for the dataset.} +\item{\code{ns_df()}}{A named list with \code{nodes}, \code{edges}, \code{gprops}, and \code{meta}.} +\item{\code{ns_graph()}}{An \code{igraph} object.} +} +} +\description{ +These functions provide tools to interact with the Netzschleuder network dataset archive. +Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for network datasets, +aimed at aiding scientific research. +\describe{ +\item{\code{ns_metadata()}}{ retrieves metadata about a network or network collection.} +\item{\code{ns_df()}}{downloads the graph data as data frames (nodes, edges, and graph properties).} +\item{\code{ns_graph()}}{creates an \code{igraph} object directly from Netzschleuder.} +} +} +\examples{ +\dontrun{ +# Get metadata +ns_metadata("copenhagen/calls") + +# Download network as data frames +graph_data <- ns_df("copenhagen/calls") + +# Create an igraph object +g <- ns_graph("copenhagen/calls") +} + +} +\seealso{ +\url{https://networks.skewed.de/} +} diff --git a/man/read_from_netzschleuder.Rd b/man/read_from_netzschleuder.Rd deleted file mode 100644 index 4ac268d..0000000 --- a/man/read_from_netzschleuder.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/netzschleuder.R -\name{read_from_netzschleuder} -\alias{read_from_netzschleuder} -\title{Download a graph from the Netzschleuder data catalogue -Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for -network datasets with the aim of aiding scientific research.} -\usage{ -read_from_netzschleuder(name, token = NULL) -} -\arguments{ -\item{name}{character. name of the network dataset. To get a network from a collection, use \verb{/}.} - -\item{token}{character. Some networks have restricted access and need a toke. See \url{https://networks.skewed.de/restricted}} -} -\value{ -a named list containing an edge list and node attribute data frame and some metadata -} -\description{ -Download a graph from the Netzschleuder data catalogue -Netzschleuder (\url{https://networks.skewed.de/}) is a large online repository for -network datasets with the aim of aiding scientific research. -} From e60885df1e97c4612f99823adbd0315fa1d365a1 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 14 Apr 2025 08:58:51 +0200 Subject: [PATCH 16/36] removed call argument --- R/netzschleuder.R | 5 ++--- man/netzschleuder.Rd | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 21b4201..f17075b 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -71,7 +71,7 @@ download_file <- function(zip_url, token = NULL, file) { #' @seealso #' @rdname netzschleuder #' @export -ns_metadata <- function(name, call = rlang::caller_env()) { +ns_metadata <- function(name) { net_ident <- resolve_name(name) url <- sprintf("https://networks.skewed.de/api/net/%s", net_ident[1]) collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) @@ -82,8 +82,7 @@ ns_metadata <- function(name, call = rlang::caller_env()) { c( "{net_ident[1]} is a collection and downloading a whole collection is not permitted.", "i" = "see {.url {collection_url}}" - ), - call = call + ) ) } else if (net_ident[1] == net_ident[2]) { return(raw) diff --git a/man/netzschleuder.Rd b/man/netzschleuder.Rd index ee48d49..d5765b2 100644 --- a/man/netzschleuder.Rd +++ b/man/netzschleuder.Rd @@ -6,7 +6,7 @@ \alias{ns_graph} \title{Download and Convert Graph Data from Netzschleuder} \usage{ -ns_metadata(name, call = rlang::caller_env()) +ns_metadata(name) ns_df(name, token = NULL) From 6e45a5d5fa0dda8f62ef1b47a0387776ae63e31b Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 14 Apr 2025 09:16:26 +0200 Subject: [PATCH 17/36] removed rlang --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f762671..8de90c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,8 +25,7 @@ Imports: cli, httr2, igraph (>= 2.0.0), - minty, - rlang + minty Suggests: testthat (>= 3.0.0) Encoding: UTF-8 From dd495a4fe2b2e549b02ab790f70bd0c8552cd0e9 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 14 Apr 2025 20:51:31 +0200 Subject: [PATCH 18/36] added token to ns_graph and error handling for gprops file --- R/netzschleuder.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index f17075b..9501821 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -152,7 +152,10 @@ ns_df <- function(name, token = NULL) { nodes_df$y <- y_vals } - gprops_df <- utils::read.csv(unz(temp, gprops_file_name)) + gprops_df <- tryCatch( + utils::read.csv(unz(temp, gprops_file_name)), + error = function(e) readLines(unz(temp, gprops_file_name)) + ) on.exit(unlink(temp)) @@ -161,8 +164,8 @@ ns_df <- function(name, token = NULL) { #' @rdname netzschleuder #' @export -ns_graph <- function(name) { - graph_data <- ns_df(name) +ns_graph <- function(name, token = NULL) { + graph_data <- ns_df(name, token = token) directed <- graph_data$meta[["analyses"]][["is_directed"]] bipartite <- graph_data$meta[["analyses"]][["is_bipartite"]] From 7a8004ce2251baf274860694ef9c9629d7dec271 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 14 Apr 2025 20:53:54 +0200 Subject: [PATCH 19/36] docs --- man/netzschleuder.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/netzschleuder.Rd b/man/netzschleuder.Rd index d5765b2..eb58be5 100644 --- a/man/netzschleuder.Rd +++ b/man/netzschleuder.Rd @@ -10,7 +10,7 @@ ns_metadata(name) ns_df(name, token = NULL) -ns_graph(name) +ns_graph(name, token = NULL) } \arguments{ \item{name}{Character. The name of the network dataset. To get a network from a collection, From e29f90f14ccdfe781dada7d522a33028c8f96729 Mon Sep 17 00:00:00 2001 From: schochastics Date: Mon, 14 Apr 2025 21:11:35 +0200 Subject: [PATCH 20/36] rate limit works --- R/netzschleuder.R | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 9501821..049a1d4 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -1,16 +1,14 @@ -make_request <- function(url, token = NULL) { - req <- httr2::request(url) - req <- httr2::req_throttle(req, capacity = 20, fill_time_s = 60) +base_req <- httr2::request("https://networks.skewed.de") |> + httr2::req_throttle(capacity = 20, fill_time_s = 60) |> + httr2::req_user_agent("R package igraphdata (github.com/igraph/igraphdata)") + +make_request <- function(path, token = NULL) { + req <- httr2::req_url_path(base_req, path) if (!is.null(token)) { req <- httr2::req_headers(req, `WWW-Authenticate` = token) } - req <- httr2::req_user_agent( - req, - "R package igraphdata (github.com/igraph/igraphdata)" - ) - resp <- httr2::req_perform(req) if (httr2::resp_status(resp) != 200) { @@ -73,9 +71,9 @@ download_file <- function(zip_url, token = NULL, file) { #' @export ns_metadata <- function(name) { net_ident <- resolve_name(name) - url <- sprintf("https://networks.skewed.de/api/net/%s", net_ident[1]) + path <- sprintf("api/net/%s", net_ident[1]) collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) - resp <- make_request(url) + resp <- make_request(path) raw <- httr2::resp_body_json(resp) if (net_ident[1] == net_ident[2] && length(unlist(raw$nets)) > 1) { cli::cli_abort( @@ -110,7 +108,7 @@ ns_df <- function(name, token = NULL) { net_ident <- resolve_name(name) zip_url <- sprintf( - "https://networks.skewed.de/net/%s/files/%s.csv.zip", + "net/%s/files/%s.csv.zip", net_ident[1], net_ident[2] ) From bcd1b1a6e6d767d2cc95341d827bcc326da1e9ff Mon Sep 17 00:00:00 2001 From: schochastics Date: Tue, 15 Apr 2025 14:06:14 +0200 Subject: [PATCH 21/36] put base_req into onload --- R/netzschleuder.R | 8 ++++---- R/zzz.R | 9 +++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 R/zzz.R diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 049a1d4..34a7934 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -1,9 +1,9 @@ -base_req <- httr2::request("https://networks.skewed.de") |> - httr2::req_throttle(capacity = 20, fill_time_s = 60) |> - httr2::req_user_agent("R package igraphdata (github.com/igraph/igraphdata)") +get_base_req <- function() { + .pkg_env$base_req +} make_request <- function(path, token = NULL) { - req <- httr2::req_url_path(base_req, path) + req <- httr2::req_url_path(get_base_req(), path) if (!is.null(token)) { req <- httr2::req_headers(req, `WWW-Authenticate` = token) diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..1656a51 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,9 @@ +.pkg_env <- new.env(parent = emptyenv()) + +.onLoad <- function(libname, pkgname) { + base_req <- httr2::request("https://networks.skewed.de") |> + httr2::req_throttle(capacity = 20, fill_time_s = 60) |> + httr2::req_user_agent("R package igraphdata (github.com/igraph/igraphdata)") + + .pkg_env$base_req <- base_req +} From 4431e4bae31c3a34cd53fa41676e8d5adf4a2c21 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 16 Apr 2025 16:38:44 +0200 Subject: [PATCH 22/36] added more comments for clarity --- R/netzschleuder.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 34a7934..2f7bcc6 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -133,6 +133,7 @@ ns_df <- function(name, token = NULL) { names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") nodes_df_raw <- utils::read.csv(unz(temp, node_file_name)) + #suppress warning if no character columns found nodes_df <- suppressWarnings(minty::type_convert(nodes_df_raw)) names(nodes_df)[1] <- "id" @@ -150,6 +151,7 @@ ns_df <- function(name, token = NULL) { nodes_df$y <- y_vals } + #gprops file is not always a valid csv file. In that case, simply read it as text gprops_df <- tryCatch( utils::read.csv(unz(temp, gprops_file_name)), error = function(e) readLines(unz(temp, gprops_file_name)) From a52dec8a7a1f85e3e5605e5132633684ac06af6a Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 16 Apr 2025 16:39:05 +0200 Subject: [PATCH 23/36] added David as ctb and a sentence about netzschleuder --- DESCRIPTION | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8de90c1..5489d8c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,13 +9,16 @@ Authors@R: c( person("Kirill", "Müller", , "kirill@cynkra.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1416-3412")), person("Maëlle", "Salmon", role = "ctb"), + person("David","Schoch", role = "ctb", + comment = c(ORCID="0000-0003-2952-4812")), person("Chan Zuckerberg Initiative", role = "fnd") ) Description: A small collection of various network data sets, to use with the 'igraph' package: the Enron email network, various food webs, interactions in the immunoglobulin protein, the karate club network, Koenigsberg's bridges, visuotactile brain areas of the macaque monkey, - UK faculty friendship network, domestic US flights network, etc. + UK faculty friendship network, domestic US flights network, etc. Also provides + access to the API of . License: CC BY-SA 4.0 + file LICENSE URL: http://igraph.org BugReports: https://github.com/igraph/igraphdata/issues From 10b25c4d06edd5b26628b746298b00eddae46858 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 16 Apr 2025 14:42:09 +0000 Subject: [PATCH 24/36] chore: Auto-update from GitHub Actions Run: https://github.com/igraph/igraphdata/actions/runs/14495485835 --- man/igraphdata-package.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/igraphdata-package.Rd b/man/igraphdata-package.Rd index 7902410..ab66ecf 100644 --- a/man/igraphdata-package.Rd +++ b/man/igraphdata-package.Rd @@ -6,7 +6,7 @@ \alias{igraphdata-package} \title{igraphdata: A Collection of Network Data Sets for the 'igraph' Package} \description{ -A small collection of various network data sets, to use with the 'igraph' package: the Enron email network, various food webs, interactions in the immunoglobulin protein, the karate club network, Koenigsberg's bridges, visuotactile brain areas of the macaque monkey, UK faculty friendship network, domestic US flights network, etc. +A small collection of various network data sets, to use with the 'igraph' package: the Enron email network, various food webs, interactions in the immunoglobulin protein, the karate club network, Koenigsberg's bridges, visuotactile brain areas of the macaque monkey, UK faculty friendship network, domestic US flights network, etc. Also provides access to the API of \url{https://networks.skewed.de/}. } \seealso{ Useful links: @@ -28,6 +28,7 @@ Other contributors: \itemize{ \item Szabolcs Horvát (\href{https://orcid.org/0000-0002-3100-523X}{ORCID}) [contributor] \item Maëlle Salmon [contributor] + \item David Schoch (\href{https://orcid.org/0000-0003-2952-4812}{ORCID}) [contributor] \item Chan Zuckerberg Initiative [funder] } From 44852806b82ecbc97cbb2be735b06063b60ba2dd Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 17 Apr 2025 10:03:02 +0200 Subject: [PATCH 25/36] review changes --- DESCRIPTION | 5 +++-- R/netzschleuder.R | 26 ++++++++++++++++++-------- man/igraphdata-package.Rd | 1 + 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5489d8c..e0dd565 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Authors@R: c( person("Kirill", "Müller", , "kirill@cynkra.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1416-3412")), person("Maëlle", "Salmon", role = "ctb"), - person("David","Schoch", role = "ctb", + person("David","Schoch", role = "aut", comment = c(ORCID="0000-0003-2952-4812")), person("Chan Zuckerberg Initiative", role = "fnd") ) @@ -28,8 +28,9 @@ Imports: cli, httr2, igraph (>= 2.0.0), - minty + withr Suggests: + minty, testthat (>= 3.0.0) Encoding: UTF-8 LazyData: true diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 2f7bcc6..d2ac3db 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -70,6 +70,11 @@ download_file <- function(zip_url, token = NULL, file) { #' @rdname netzschleuder #' @export ns_metadata <- function(name) { + if (!requireNamespace("minty", quietly = TRUE)) { + cli::cli_abort( + "The package `minty` is needed for this function. Please install it." + ) + } net_ident <- resolve_name(name) path <- sprintf("api/net/%s", net_ident[1]) collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) @@ -143,19 +148,24 @@ ns_df <- function(name, token = NULL) { pos_array <- gsub("array\\(\\[|\\]|\\)", "", nodes_df[["X_pos"]]) split_coords <- strsplit(pos_array, ",") - x_vals <- sapply(split_coords, function(x) as.numeric(trimws(x[1]))) - y_vals <- sapply(split_coords, function(x) as.numeric(trimws(x[2]))) + x_vals <- vapply( + split_coords, + function(x) as.numeric(trimws(x[1])), + numeric(1) + ) + y_vals <- vapply( + split_coords, + function(x) as.numeric(trimws(x[2])), + numeric(1) + ) nodes_df[["X_pos"]] <- NULL - nodes_df$x <- x_vals - nodes_df$y <- y_vals + nodes_df[["x"]] <- x_vals + nodes_df[["y"]] <- y_vals } #gprops file is not always a valid csv file. In that case, simply read it as text - gprops_df <- tryCatch( - utils::read.csv(unz(temp, gprops_file_name)), - error = function(e) readLines(unz(temp, gprops_file_name)) - ) + gprops_df <- readLines(unz(temp, gprops_file_name)) on.exit(unlink(temp)) diff --git a/man/igraphdata-package.Rd b/man/igraphdata-package.Rd index ab66ecf..0103785 100644 --- a/man/igraphdata-package.Rd +++ b/man/igraphdata-package.Rd @@ -22,6 +22,7 @@ Useful links: Authors: \itemize{ \item Gábor Csárdi (\href{https://orcid.org/0000-0001-7098-9676}{ORCID}) + \item David Schoch (\href{https://orcid.org/0000-0003-2952-4812}{ORCID}) } Other contributors: From 02fb9db5a1699d1f647e3aee55102b0a5552bb21 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 17 Apr 2025 08:05:19 +0000 Subject: [PATCH 26/36] chore: Auto-update from GitHub Actions Run: https://github.com/igraph/igraphdata/actions/runs/14510993378 --- man/igraphdata-package.Rd | 1 - 1 file changed, 1 deletion(-) diff --git a/man/igraphdata-package.Rd b/man/igraphdata-package.Rd index 0103785..af34170 100644 --- a/man/igraphdata-package.Rd +++ b/man/igraphdata-package.Rd @@ -29,7 +29,6 @@ Other contributors: \itemize{ \item Szabolcs Horvát (\href{https://orcid.org/0000-0002-3100-523X}{ORCID}) [contributor] \item Maëlle Salmon [contributor] - \item David Schoch (\href{https://orcid.org/0000-0003-2952-4812}{ORCID}) [contributor] \item Chan Zuckerberg Initiative [funder] } From 0c717fcc3d0ba4b2042e0ddc1bdca32f88875d3d Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 17 Apr 2025 11:04:05 +0200 Subject: [PATCH 27/36] moved minty check to ns_df --- R/netzschleuder.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index d2ac3db..e2cb53f 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -70,11 +70,6 @@ download_file <- function(zip_url, token = NULL, file) { #' @rdname netzschleuder #' @export ns_metadata <- function(name) { - if (!requireNamespace("minty", quietly = TRUE)) { - cli::cli_abort( - "The package `minty` is needed for this function. Please install it." - ) - } net_ident <- resolve_name(name) path <- sprintf("api/net/%s", net_ident[1]) collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) @@ -109,6 +104,11 @@ ns_metadata <- function(name) { #' @rdname netzschleuder #' @export ns_df <- function(name, token = NULL) { + if (!requireNamespace("minty", quietly = TRUE)) { + cli::cli_abort( + "The package `minty` is needed for this function. Please install it." + ) + } meta <- ns_metadata(name) net_ident <- resolve_name(name) From 1974d6a9189bf7169d6157280ed480bdb4b7f634 Mon Sep 17 00:00:00 2001 From: schochastics Date: Thu, 17 Apr 2025 15:25:53 +0200 Subject: [PATCH 28/36] better name error handling --- DESCRIPTION | 3 +-- R/netzschleuder.R | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e0dd565..390a767 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,8 +27,7 @@ Depends: Imports: cli, httr2, - igraph (>= 2.0.0), - withr + igraph (>= 2.0.0) Suggests: minty, testthat (>= 3.0.0) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index e2cb53f..c30b1e7 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -19,8 +19,19 @@ make_request <- function(path, token = NULL) { } resolve_name <- function(x) { + #remove trailing / + x <- sub("/$", "", x) + #remove double slash + x <- sub("//", "/", x) + if (grepl("/", x)) { - return(strsplit(x, "/", fixed = TRUE)[[1]]) + x_split <- strsplit(x, "/", fixed = TRUE)[[1]] + if (length(x_split) > 2) { + cli::cli_abort( + "{.arg name} has {length(x_split)} components instead of 2." + ) + } + return(x_split) } else { c(x, x) } @@ -91,8 +102,7 @@ ns_metadata <- function(name) { c( "{net_ident[2]} is not part of the collection {net_ident[1]}.", "i" = "see {.url {collection_url}}" - ), - call = call + ) ) } raw[["analyses"]] <- raw[["analyses"]][[net_ident[2]]] From 29412cf42733392d51ad7e47a965cc5e187252cb Mon Sep 17 00:00:00 2001 From: schochastics Date: Fri, 18 Apr 2025 21:05:06 +0200 Subject: [PATCH 29/36] moved all pkgs to suggest and removed onLoad --- DESCRIPTION | 9 +++++---- R/netzschleuder.R | 19 ++++++++++++++----- R/zzz.R | 9 --------- 3 files changed, 19 insertions(+), 18 deletions(-) delete mode 100644 R/zzz.R diff --git a/DESCRIPTION b/DESCRIPTION index 390a767..e5b1c9f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,11 +25,12 @@ BugReports: https://github.com/igraph/igraphdata/issues Depends: R (>= 4.0) Imports: - cli, + igraph (>= 2.0.0), + rlang +Suggests: + cli, + minty, httr2, - igraph (>= 2.0.0) -Suggests: - minty, testthat (>= 3.0.0) Encoding: UTF-8 LazyData: true diff --git a/R/netzschleuder.R b/R/netzschleuder.R index c30b1e7..f282849 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -1,8 +1,20 @@ +#' @keywords internal +.pkg_env <- new.env(parent = emptyenv()) + get_base_req <- function() { + if (!exists("base_req", envir = .pkg_env, inherits = FALSE)) { + base_req <- httr2::request("https://networks.skewed.de") |> + httr2::req_throttle(capacity = 20, fill_time_s = 60) |> + httr2::req_user_agent( + "R package igraphdata (github.com/igraph/igraphdata)" + ) + .pkg_env$base_req <- base_req + } .pkg_env$base_req } make_request <- function(path, token = NULL) { + rlang::check_installed("httr2") req <- httr2::req_url_path(get_base_req(), path) if (!is.null(token)) { @@ -81,6 +93,7 @@ download_file <- function(zip_url, token = NULL, file) { #' @rdname netzschleuder #' @export ns_metadata <- function(name) { + rlang::check_installed("cli") net_ident <- resolve_name(name) path <- sprintf("api/net/%s", net_ident[1]) collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) @@ -114,11 +127,7 @@ ns_metadata <- function(name) { #' @rdname netzschleuder #' @export ns_df <- function(name, token = NULL) { - if (!requireNamespace("minty", quietly = TRUE)) { - cli::cli_abort( - "The package `minty` is needed for this function. Please install it." - ) - } + rlang::check_installed("minty") meta <- ns_metadata(name) net_ident <- resolve_name(name) diff --git a/R/zzz.R b/R/zzz.R deleted file mode 100644 index 1656a51..0000000 --- a/R/zzz.R +++ /dev/null @@ -1,9 +0,0 @@ -.pkg_env <- new.env(parent = emptyenv()) - -.onLoad <- function(libname, pkgname) { - base_req <- httr2::request("https://networks.skewed.de") |> - httr2::req_throttle(capacity = 20, fill_time_s = 60) |> - httr2::req_user_agent("R package igraphdata (github.com/igraph/igraphdata)") - - .pkg_env$base_req <- base_req -} From 0ad558d61df7ebe2c8e2e578c6e6cef20da9e145 Mon Sep 17 00:00:00 2001 From: schochastics Date: Fri, 18 Apr 2025 21:31:29 +0200 Subject: [PATCH 30/36] earlier renaming and better regex --- R/netzschleuder.R | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index f282849..5402e65 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -150,11 +150,11 @@ ns_df <- function(name, token = NULL) { edges_df <- suppressWarnings(minty::type_convert(edges_df_raw)) source_loc <- grep("source", names(edges_df)) target_loc <- grep("target", names(edges_df)) + names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") # netzschleuder uses 0-indexing, igraph uses 1-indexing - edges_df[[source_loc]] <- edges_df[[source_loc]] + 1 - edges_df[[target_loc]] <- edges_df[[target_loc]] + 1 - names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") + edges_df[["from"]] <- edges_df[["from"]] + 1 + edges_df[["to"]] <- edges_df[["to"]] + 1 nodes_df_raw <- utils::read.csv(unz(temp, node_file_name)) #suppress warning if no character columns found @@ -164,26 +164,16 @@ ns_df <- function(name, token = NULL) { # netzschleuder uses 0-indexing, igraph uses 1-indexing nodes_df[["id"]] <- nodes_df[["id"]] + 1 if ("X_pos" %in% names(nodes_df)) { - pos_array <- gsub("array\\(\\[|\\]|\\)", "", nodes_df[["X_pos"]]) - split_coords <- strsplit(pos_array, ",") + regex <- gregexpr("-?\\d+\\.\\d+", nodes_df[["X_pos"]]) + matches <- regmatches(nodes_df[["X_pos"]], regex) - x_vals <- vapply( - split_coords, - function(x) as.numeric(trimws(x[1])), - numeric(1) - ) - y_vals <- vapply( - split_coords, - function(x) as.numeric(trimws(x[2])), - numeric(1) - ) + mat <- vapply(matches, as.numeric, numeric(2)) nodes_df[["X_pos"]] <- NULL - nodes_df[["x"]] <- x_vals - nodes_df[["y"]] <- y_vals + nodes_df[["x"]] <- mat[1, ] + nodes_df[["y"]] <- mat[2, ] } - #gprops file is not always a valid csv file. In that case, simply read it as text gprops_df <- readLines(unz(temp, gprops_file_name)) on.exit(unlink(temp)) From 3cd0496ddacf96f8515d7c845b662da13e168c94 Mon Sep 17 00:00:00 2001 From: schochastics Date: Fri, 18 Apr 2025 21:38:25 +0200 Subject: [PATCH 31/36] double parenthesis --- R/netzschleuder.R | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 5402e65..2ab5c6f 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -95,30 +95,30 @@ download_file <- function(zip_url, token = NULL, file) { ns_metadata <- function(name) { rlang::check_installed("cli") net_ident <- resolve_name(name) - path <- sprintf("api/net/%s", net_ident[1]) - collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[1]) + path <- sprintf("api/net/%s", net_ident[[1]]) + collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[[1]]) resp <- make_request(path) raw <- httr2::resp_body_json(resp) - if (net_ident[1] == net_ident[2] && length(unlist(raw$nets)) > 1) { + if (net_ident[[1]] == net_ident[[2]] && length(unlist(raw$nets)) > 1) { cli::cli_abort( c( - "{net_ident[1]} is a collection and downloading a whole collection is not permitted.", + "{net_ident[[1]]} is a collection and downloading a whole collection is not permitted.", "i" = "see {.url {collection_url}}" ) ) - } else if (net_ident[1] == net_ident[2]) { + } else if (net_ident[[1]] == net_ident[[2]]) { return(raw) } else { - idx <- which(unlist(raw[["nets"]]) == net_ident[2]) + idx <- which(unlist(raw[["nets"]]) == net_ident[[2]]) if (length(idx) == 0) { cli::cli_abort( c( - "{net_ident[2]} is not part of the collection {net_ident[1]}.", + "{net_ident[[2]]} is not part of the collection {net_ident[[1]]}.", "i" = "see {.url {collection_url}}" ) ) } - raw[["analyses"]] <- raw[["analyses"]][[net_ident[2]]] + raw[["analyses"]] <- raw[["analyses"]][[net_ident[[2]]]] raw[["nets"]] <- raw[["nets"]][idx] raw } @@ -133,8 +133,8 @@ ns_df <- function(name, token = NULL) { zip_url <- sprintf( "net/%s/files/%s.csv.zip", - net_ident[1], - net_ident[2] + net_ident[[1]], + net_ident[[2]] ) temp <- tempfile(fileext = "zip") @@ -153,8 +153,8 @@ ns_df <- function(name, token = NULL) { names(edges_df)[c(source_loc, target_loc)] <- c("from", "to") # netzschleuder uses 0-indexing, igraph uses 1-indexing - edges_df[["from"]] <- edges_df[["from"]] + 1 - edges_df[["to"]] <- edges_df[["to"]] + 1 + edges_df[["from"]] <- edges_df[["from"]] + 1L + edges_df[["to"]] <- edges_df[["to"]] + 1L nodes_df_raw <- utils::read.csv(unz(temp, node_file_name)) #suppress warning if no character columns found @@ -162,7 +162,7 @@ ns_df <- function(name, token = NULL) { names(nodes_df)[1] <- "id" # netzschleuder uses 0-indexing, igraph uses 1-indexing - nodes_df[["id"]] <- nodes_df[["id"]] + 1 + nodes_df[["id"]] <- nodes_df[["id"]] + 1L if ("X_pos" %in% names(nodes_df)) { regex <- gregexpr("-?\\d+\\.\\d+", nodes_df[["X_pos"]]) matches <- regmatches(nodes_df[["X_pos"]], regex) @@ -196,7 +196,7 @@ ns_graph <- function(name, token = NULL) { if (bipartite) { types <- rep(FALSE, igraph::vcount(g)) - types[graph_data$nodes$id %in% graph_data$edges[, 1]] <- TRUE + types[graph_data$nodes$id %in% graph_data$edges[[1]]] <- TRUE g <- igraph::set_vertex_attr(g, "type", value = types) } From eb12a3a6747ad48cebd5d56812901f7a93e3db04 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 23 Apr 2025 20:36:13 +0200 Subject: [PATCH 32/36] allow metadata as input to ns_graph --- R/netzschleuder.R | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 2ab5c6f..33f98ba 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -68,6 +68,7 @@ download_file <- function(zip_url, token = NULL, file) { #' #' @param name Character. The name of the network dataset. To get a network from a collection, #' use the format `/`. +#' @param collection Logical. If TRUE, get the metadata of a whole collection of networks. #' @param token Character. Some networks have restricted access and require a token. #' See . #' @@ -92,14 +93,23 @@ download_file <- function(zip_url, token = NULL, file) { #' @seealso #' @rdname netzschleuder #' @export -ns_metadata <- function(name) { +ns_metadata <- function(name, collection = FALSE) { rlang::check_installed("cli") net_ident <- resolve_name(name) path <- sprintf("api/net/%s", net_ident[[1]]) collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[[1]]) resp <- make_request(path) raw <- httr2::resp_body_json(resp) - if (net_ident[[1]] == net_ident[[2]] && length(unlist(raw$nets)) > 1) { + class(raw) <- c("ns_meta", class(raw)) + raw[["is_collection"]] <- collection + raw[["collection_name"]] <- net_ident[[1]] + if (collection) { + return(raw) + } else if ( + net_ident[[1]] == net_ident[[2]] && + length(unlist(raw$nets)) > 1 && + !collection + ) { cli::cli_abort( c( "{net_ident[[1]]} is a collection and downloading a whole collection is not permitted.", @@ -128,8 +138,21 @@ ns_metadata <- function(name) { #' @export ns_df <- function(name, token = NULL) { rlang::check_installed("minty") - meta <- ns_metadata(name) - net_ident <- resolve_name(name) + if (is.character(name)) { + meta <- ns_metadata(name, collection = FALSE) + net_ident <- resolve_name(name) + } else if (inherits(name, "ns_meta")) { + if (name[["is_collection"]]) { + cli::cli_abort(c( + "{.arg name} contains the meta data of a whole collection and downloading a whole collection is not permitted.", + "i" = "set collection = FALSE in `ns_metadata()`" + )) + } + meta <- name + net_ident <- c(meta[["collection_name"]], meta[["nets"]]) + } else { + cli::cli_abort("{.arg name} must be a string or a `ns_meta` object.") + } zip_url <- sprintf( "net/%s/files/%s.csv.zip", From c9ef882777ce9bb7264bc0f9c2a596dec242e933 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 23 Apr 2025 18:38:33 +0000 Subject: [PATCH 33/36] chore: Auto-update from GitHub Actions Run: https://github.com/igraph/igraphdata/actions/runs/14625553900 --- man/netzschleuder.Rd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/man/netzschleuder.Rd b/man/netzschleuder.Rd index eb58be5..3e23b5a 100644 --- a/man/netzschleuder.Rd +++ b/man/netzschleuder.Rd @@ -6,7 +6,7 @@ \alias{ns_graph} \title{Download and Convert Graph Data from Netzschleuder} \usage{ -ns_metadata(name) +ns_metadata(name, collection = FALSE) ns_df(name, token = NULL) @@ -16,6 +16,8 @@ ns_graph(name, token = NULL) \item{name}{Character. The name of the network dataset. To get a network from a collection, use the format \verb{/}.} +\item{collection}{Logical. If TRUE, get the metadata of a whole collection of networks.} + \item{token}{Character. Some networks have restricted access and require a token. See \url{https://networks.skewed.de/restricted}.} } From 94bab6994a982812db65d63749e6ebd11cc67cad Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 23 Apr 2025 21:02:56 +0200 Subject: [PATCH 34/36] implement file size limit --- R/netzschleuder.R | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index 33f98ba..a8d1a28 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -13,10 +13,13 @@ get_base_req <- function() { .pkg_env$base_req } -make_request <- function(path, token = NULL) { +make_request <- function(path, token = NULL, method = "GET") { rlang::check_installed("httr2") req <- httr2::req_url_path(get_base_req(), path) - + req <- httr2::req_method(req, method) + if (method == "HEAD") { + req <- httr2::req_headers(req, `Accept-Encoding` = "identity") + } if (!is.null(token)) { req <- httr2::req_headers(req, `WWW-Authenticate` = token) } @@ -49,8 +52,17 @@ resolve_name <- function(x) { } } -download_file <- function(zip_url, token = NULL, file) { - resp <- make_request(zip_url, token) +download_file <- function(zip_url, token = NULL, file, size_limit) { + resp <- make_request(zip_url, token, method = "HEAD") + byte_size <- as.numeric(httr2::resp_headers(resp)[["content-length"]]) + gb_size <- round(byte_size / 1024^3, 4) + if (gb_size > size_limit) { + cli::cli_abort(c( + "{zip_url} has a size of {gb_size} GB and exceeds the size limit of {size_limit} GB.", + "i" = "To download the file, set {.arg size_limit} to a value greater than {gb_size}" + )) + } + resp <- make_request(zip_url, token, method = "GET") writeBin(httr2::resp_body_raw(resp), file) invisible(NULL) } @@ -70,6 +82,7 @@ download_file <- function(zip_url, token = NULL, file) { #' use the format `/`. #' @param collection Logical. If TRUE, get the metadata of a whole collection of networks. #' @param token Character. Some networks have restricted access and require a token. +#' @param size_limit Numeric. Maximum allowed file size in GB. Larger files will be prevented from being downloaded. #' See . #' #' @return @@ -136,7 +149,7 @@ ns_metadata <- function(name, collection = FALSE) { #' @rdname netzschleuder #' @export -ns_df <- function(name, token = NULL) { +ns_df <- function(name, token = NULL, size_limit = 1) { rlang::check_installed("minty") if (is.character(name)) { meta <- ns_metadata(name, collection = FALSE) @@ -161,7 +174,7 @@ ns_df <- function(name, token = NULL) { ) temp <- tempfile(fileext = "zip") - download_file(zip_url, token = token, file = temp) + download_file(zip_url, token = token, file = temp, size_limit = size_limit) zip_contents <- utils::unzip(temp, list = TRUE) @@ -206,8 +219,8 @@ ns_df <- function(name, token = NULL) { #' @rdname netzschleuder #' @export -ns_graph <- function(name, token = NULL) { - graph_data <- ns_df(name, token = token) +ns_graph <- function(name, token = NULL, size_limit = 1) { + graph_data <- ns_df(name, token = token, size_limit = size_limit) directed <- graph_data$meta[["analyses"]][["is_directed"]] bipartite <- graph_data$meta[["analyses"]][["is_bipartite"]] From 18f87d1f795550e423d9a4b52a2db1c486d7924d Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 23 Apr 2025 21:03:19 +0200 Subject: [PATCH 35/36] docs --- man/netzschleuder.Rd | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/man/netzschleuder.Rd b/man/netzschleuder.Rd index 3e23b5a..c5a30a0 100644 --- a/man/netzschleuder.Rd +++ b/man/netzschleuder.Rd @@ -8,9 +8,9 @@ \usage{ ns_metadata(name, collection = FALSE) -ns_df(name, token = NULL) +ns_df(name, token = NULL, size_limit = 1) -ns_graph(name, token = NULL) +ns_graph(name, token = NULL, size_limit = 1) } \arguments{ \item{name}{Character. The name of the network dataset. To get a network from a collection, @@ -18,7 +18,9 @@ use the format \verb{/}.} \item{collection}{Logical. If TRUE, get the metadata of a whole collection of networks.} -\item{token}{Character. Some networks have restricted access and require a token. +\item{token}{Character. Some networks have restricted access and require a token.} + +\item{size_limit}{Numeric. Maximum allowed file size in GB. Larger files will be prevented from being downloaded. See \url{https://networks.skewed.de/restricted}.} } \value{ From 2a6d9f1775ce7bf3c44e3ab6e1788b515f463643 Mon Sep 17 00:00:00 2001 From: schochastics Date: Wed, 23 Apr 2025 21:14:43 +0200 Subject: [PATCH 36/36] added meta data print --- NAMESPACE | 1 + R/netzschleuder.R | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 9c5c212..e2c36a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(print,ns_meta) export(lesmis_gml) export(lesmis_graphml) export(lesmis_pajek) diff --git a/R/netzschleuder.R b/R/netzschleuder.R index a8d1a28..2033701 100644 --- a/R/netzschleuder.R +++ b/R/netzschleuder.R @@ -238,3 +238,27 @@ ns_graph <- function(name, token = NULL, size_limit = 1) { g } + +#' @export +print.ns_meta <- function(x, ...) { + if (x[["is_collection"]]) { + cat("Netzschleuder Metadata for the collection:", x[["collection_name"]]) + cat("Number of Networks:", length(x[["nets"]])) + } else { + cat( + "Netzschleuder Metadata for: ", + x[["collection_name"]], + "/", + x[["nets"]][[1]], + sep = "" + ) + cat("\n") + cat("Number of vertices:", x$analyses$num_vertices) + cat("\n") + cat("Number of Edges:", x$analyses$num_edges) + cat("\n") + cat("Directed:", x$analyses$is_directed) + cat("\n") + cat("Bipartite:", x$analyses$is_bipartite) + } +}