Skip to content

Commit d80f4b6

Browse files
schochasticsmaelle
andauthored
feat: New ns_graph() and lower-level ns_df() and ns_metadata() to download from [netzschleuder](https://networks.skewed.de) (#23)
Co-authored-by: Maëlle Salmon <[email protected]> Co-authored-by: schochastics <[email protected]>
1 parent 81d5cec commit d80f4b6

File tree

5 files changed

+339
-5
lines changed

5 files changed

+339
-5
lines changed

DESCRIPTION

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,28 @@ Authors@R: c(
99
person("Kirill", "Müller", , "[email protected]", role = c("aut", "cre"),
1010
comment = c(ORCID = "0000-0002-1416-3412")),
1111
person("Maëlle", "Salmon", role = "ctb"),
12+
person("David","Schoch", role = "aut",
13+
comment = c(ORCID="0000-0003-2952-4812")),
1214
person("Chan Zuckerberg Initiative", role = "fnd")
1315
)
1416
Description: A small collection of various network data sets, to use with
1517
the 'igraph' package: the Enron email network, various food webs,
1618
interactions in the immunoglobulin protein, the karate club network,
1719
Koenigsberg's bridges, visuotactile brain areas of the macaque monkey,
18-
UK faculty friendship network, domestic US flights network, etc.
20+
UK faculty friendship network, domestic US flights network, etc. Also provides
21+
access to the API of <https://networks.skewed.de/>.
1922
License: CC BY-SA 4.0 + file LICENSE
2023
URL: http://igraph.org
2124
BugReports: https://github.com/igraph/igraphdata/issues
2225
Depends:
23-
R (>= 2.10)
26+
R (>= 4.0)
2427
Imports:
25-
igraph (>= 1.5.0)
26-
Suggests:
28+
igraph (>= 2.0.0),
29+
rlang
30+
Suggests:
31+
cli,
32+
minty,
33+
httr2,
2734
testthat (>= 3.0.0)
2835
Encoding: UTF-8
2936
LazyData: true

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(print,ns_meta)
34
export(lesmis_gml)
45
export(lesmis_graphml)
56
export(lesmis_pajek)
7+
export(ns_df)
8+
export(ns_graph)
9+
export(ns_metadata)
610
importFrom(igraph,vcount)

R/netzschleuder.R

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
#' @keywords internal
2+
.pkg_env <- new.env(parent = emptyenv())
3+
4+
get_base_req <- function() {
5+
if (!exists("base_req", envir = .pkg_env, inherits = FALSE)) {
6+
base_req <- httr2::request("https://networks.skewed.de") |>
7+
httr2::req_throttle(capacity = 20, fill_time_s = 60) |>
8+
httr2::req_user_agent(
9+
"R package igraphdata (github.com/igraph/igraphdata)"
10+
)
11+
.pkg_env$base_req <- base_req
12+
}
13+
.pkg_env$base_req
14+
}
15+
16+
make_request <- function(path, token = NULL, method = "GET") {
17+
rlang::check_installed("httr2")
18+
req <- httr2::req_url_path(get_base_req(), path)
19+
req <- httr2::req_method(req, method)
20+
if (method == "HEAD") {
21+
req <- httr2::req_headers(req, `Accept-Encoding` = "identity")
22+
}
23+
if (!is.null(token)) {
24+
req <- httr2::req_headers(req, `WWW-Authenticate` = token)
25+
}
26+
27+
resp <- httr2::req_perform(req)
28+
29+
if (httr2::resp_status(resp) != 200) {
30+
stop("Failed to download file. Status: ", httr2::resp_status(resp))
31+
}
32+
33+
resp
34+
}
35+
36+
resolve_name <- function(x) {
37+
#remove trailing /
38+
x <- sub("/$", "", x)
39+
#remove double slash
40+
x <- sub("//", "/", x)
41+
42+
if (grepl("/", x)) {
43+
x_split <- strsplit(x, "/", fixed = TRUE)[[1]]
44+
if (length(x_split) > 2) {
45+
cli::cli_abort(
46+
"{.arg name} has {length(x_split)} components instead of 2."
47+
)
48+
}
49+
return(x_split)
50+
} else {
51+
c(x, x)
52+
}
53+
}
54+
55+
download_file <- function(zip_url, token = NULL, file, size_limit) {
56+
resp <- make_request(zip_url, token, method = "HEAD")
57+
byte_size <- as.numeric(httr2::resp_headers(resp)[["content-length"]])
58+
gb_size <- round(byte_size / 1024^3, 4)
59+
if (gb_size > size_limit) {
60+
cli::cli_abort(c(
61+
"{zip_url} has a size of {gb_size} GB and exceeds the size limit of {size_limit} GB.",
62+
"i" = "To download the file, set {.arg size_limit} to a value greater than {gb_size}"
63+
))
64+
}
65+
resp <- make_request(zip_url, token, method = "GET")
66+
writeBin(httr2::resp_body_raw(resp), file)
67+
invisible(NULL)
68+
}
69+
70+
#' Download and Convert Graph Data from Netzschleuder
71+
#'
72+
#' These functions provide tools to interact with the Netzschleuder network dataset archive.
73+
#' Netzschleuder (<https://networks.skewed.de/>) is a large online repository for network datasets,
74+
#' aimed at aiding scientific research.
75+
#' \describe{
76+
#' \item{`ns_metadata()`}{ retrieves metadata about a network or network collection.}
77+
#' \item{`ns_df()`}{downloads the graph data as data frames (nodes, edges, and graph properties).}
78+
#' \item{`ns_graph()`}{creates an `igraph` object directly from Netzschleuder.}
79+
#' }
80+
#'
81+
#' @param name Character. The name of the network dataset. To get a network from a collection,
82+
#' use the format `<collection_name>/<network_name>`.
83+
#' @param collection Logical. If TRUE, get the metadata of a whole collection of networks.
84+
#' @param token Character. Some networks have restricted access and require a token.
85+
#' @param size_limit Numeric. Maximum allowed file size in GB. Larger files will be prevented from being downloaded.
86+
#' See <https://networks.skewed.de/restricted>.
87+
#'
88+
#' @return
89+
#' \describe{
90+
#' \item{`ns_metadata()`}{A list containing metadata for the dataset.}
91+
#' \item{`ns_df()`}{A named list with `nodes`, `edges`, `gprops`, and `meta`.}
92+
#' \item{`ns_graph()`}{An `igraph` object.}
93+
#' }
94+
#' @examples
95+
#' \dontrun{
96+
#' # Get metadata
97+
#' ns_metadata("copenhagen/calls")
98+
#'
99+
#' # Download network as data frames
100+
#' graph_data <- ns_df("copenhagen/calls")
101+
#'
102+
#' # Create an igraph object
103+
#' g <- ns_graph("copenhagen/calls")
104+
#' }
105+
#'
106+
#' @seealso <https://networks.skewed.de/>
107+
#' @rdname netzschleuder
108+
#' @export
109+
ns_metadata <- function(name, collection = FALSE) {
110+
rlang::check_installed("cli")
111+
net_ident <- resolve_name(name)
112+
path <- sprintf("api/net/%s", net_ident[[1]])
113+
collection_url <- sprintf("https://networks.skewed.de/net/%s", net_ident[[1]])
114+
resp <- make_request(path)
115+
raw <- httr2::resp_body_json(resp)
116+
class(raw) <- c("ns_meta", class(raw))
117+
raw[["is_collection"]] <- collection
118+
raw[["collection_name"]] <- net_ident[[1]]
119+
if (collection) {
120+
return(raw)
121+
} else if (
122+
net_ident[[1]] == net_ident[[2]] &&
123+
length(unlist(raw$nets)) > 1 &&
124+
!collection
125+
) {
126+
cli::cli_abort(
127+
c(
128+
"{net_ident[[1]]} is a collection and downloading a whole collection is not permitted.",
129+
"i" = "see {.url {collection_url}}"
130+
)
131+
)
132+
} else if (net_ident[[1]] == net_ident[[2]]) {
133+
return(raw)
134+
} else {
135+
idx <- which(unlist(raw[["nets"]]) == net_ident[[2]])
136+
if (length(idx) == 0) {
137+
cli::cli_abort(
138+
c(
139+
"{net_ident[[2]]} is not part of the collection {net_ident[[1]]}.",
140+
"i" = "see {.url {collection_url}}"
141+
)
142+
)
143+
}
144+
raw[["analyses"]] <- raw[["analyses"]][[net_ident[[2]]]]
145+
raw[["nets"]] <- raw[["nets"]][idx]
146+
raw
147+
}
148+
}
149+
150+
#' @rdname netzschleuder
151+
#' @export
152+
ns_df <- function(name, token = NULL, size_limit = 1) {
153+
rlang::check_installed("minty")
154+
if (is.character(name)) {
155+
meta <- ns_metadata(name, collection = FALSE)
156+
net_ident <- resolve_name(name)
157+
} else if (inherits(name, "ns_meta")) {
158+
if (name[["is_collection"]]) {
159+
cli::cli_abort(c(
160+
"{.arg name} contains the meta data of a whole collection and downloading a whole collection is not permitted.",
161+
"i" = "set collection = FALSE in `ns_metadata()`"
162+
))
163+
}
164+
meta <- name
165+
net_ident <- c(meta[["collection_name"]], meta[["nets"]])
166+
} else {
167+
cli::cli_abort("{.arg name} must be a string or a `ns_meta` object.")
168+
}
169+
170+
zip_url <- sprintf(
171+
"net/%s/files/%s.csv.zip",
172+
net_ident[[1]],
173+
net_ident[[2]]
174+
)
175+
176+
temp <- tempfile(fileext = "zip")
177+
download_file(zip_url, token = token, file = temp, size_limit = size_limit)
178+
179+
zip_contents <- utils::unzip(temp, list = TRUE)
180+
181+
edge_file_name <- grep("edge", zip_contents$Name, value = TRUE)
182+
node_file_name <- grep("node", zip_contents$Name, value = TRUE)
183+
gprops_file_name <- grep("gprops", zip_contents$Name, value = TRUE)
184+
185+
edges_df_raw <- utils::read.csv(unz(temp, edge_file_name))
186+
edges_df <- suppressWarnings(minty::type_convert(edges_df_raw))
187+
source_loc <- grep("source", names(edges_df))
188+
target_loc <- grep("target", names(edges_df))
189+
names(edges_df)[c(source_loc, target_loc)] <- c("from", "to")
190+
191+
# netzschleuder uses 0-indexing, igraph uses 1-indexing
192+
edges_df[["from"]] <- edges_df[["from"]] + 1L
193+
edges_df[["to"]] <- edges_df[["to"]] + 1L
194+
195+
nodes_df_raw <- utils::read.csv(unz(temp, node_file_name))
196+
#suppress warning if no character columns found
197+
nodes_df <- suppressWarnings(minty::type_convert(nodes_df_raw))
198+
names(nodes_df)[1] <- "id"
199+
200+
# netzschleuder uses 0-indexing, igraph uses 1-indexing
201+
nodes_df[["id"]] <- nodes_df[["id"]] + 1L
202+
if ("X_pos" %in% names(nodes_df)) {
203+
regex <- gregexpr("-?\\d+\\.\\d+", nodes_df[["X_pos"]])
204+
matches <- regmatches(nodes_df[["X_pos"]], regex)
205+
206+
mat <- vapply(matches, as.numeric, numeric(2))
207+
208+
nodes_df[["X_pos"]] <- NULL
209+
nodes_df[["x"]] <- mat[1, ]
210+
nodes_df[["y"]] <- mat[2, ]
211+
}
212+
213+
gprops_df <- readLines(unz(temp, gprops_file_name))
214+
215+
on.exit(unlink(temp))
216+
217+
list(nodes = nodes_df, edges = edges_df, gprops = gprops_df, meta = meta)
218+
}
219+
220+
#' @rdname netzschleuder
221+
#' @export
222+
ns_graph <- function(name, token = NULL, size_limit = 1) {
223+
graph_data <- ns_df(name, token = token, size_limit = size_limit)
224+
directed <- graph_data$meta[["analyses"]][["is_directed"]]
225+
bipartite <- graph_data$meta[["analyses"]][["is_bipartite"]]
226+
227+
g <- igraph::graph_from_data_frame(
228+
graph_data$edges,
229+
directed = directed,
230+
vertices = graph_data$nodes
231+
)
232+
233+
if (bipartite) {
234+
types <- rep(FALSE, igraph::vcount(g))
235+
types[graph_data$nodes$id %in% graph_data$edges[[1]]] <- TRUE
236+
g <- igraph::set_vertex_attr(g, "type", value = types)
237+
}
238+
239+
g
240+
}
241+
242+
#' @export
243+
print.ns_meta <- function(x, ...) {
244+
if (x[["is_collection"]]) {
245+
cat("Netzschleuder Metadata for the collection:", x[["collection_name"]])
246+
cat("Number of Networks:", length(x[["nets"]]))
247+
} else {
248+
cat(
249+
"Netzschleuder Metadata for: ",
250+
x[["collection_name"]],
251+
"/",
252+
x[["nets"]][[1]],
253+
sep = ""
254+
)
255+
cat("\n")
256+
cat("Number of vertices:", x$analyses$num_vertices)
257+
cat("\n")
258+
cat("Number of Edges:", x$analyses$num_edges)
259+
cat("\n")
260+
cat("Directed:", x$analyses$is_directed)
261+
cat("\n")
262+
cat("Bipartite:", x$analyses$is_bipartite)
263+
}
264+
}

man/igraphdata-package.Rd

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/netzschleuder.Rd

Lines changed: 58 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)