Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 20 additions & 27 deletions R/fig_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,17 @@ get_bar_chart_seq_count <- function(obs_data,
return(p)
}

#' Hospital admissions over time
#' Percent of ED visits due to covid over time
#'
#' @param location Location to plot (abbreviation)
#' @param date_range Vector of date range to plot, will use min and max
#' @param temporal_granularity Temporal granularity to plot
#' @param location_data Data.frame of location metadata to translate codes to
#' abbreviations
#' @param plot_name Name of plot
#' @param output_fp File.path to save plot
#' @param url URL for hospital admissions
#' @param output_fp File path to save plot
#' @param data_fp Filepath to NSSP data at the state and national level.
#' Originally obtained from: https://data.cdc.gov/Public-Health-Surveillance/NSSP-Emergency-Department-Visit-Trajectories-by-St/rdmq-nq56/about_data #nolint
#'
#' @returns ggplot object
#' @autoglobal
Expand All @@ -220,37 +221,28 @@ get_plot_hosp_admissions <- function(location_to_plot,
"output", "figs",
"data_figs"
),
url = "https://raw.githubusercontent.com/CDCgov/covid19-forecast-hub/refs/heads/main/target-data/covid-hospital-admissions.csv") { # nolint
if (location_to_plot != "US minus CA") {
raw_data <- read_csv(url) |>
left_join(location_data, by = "location") |>
filter(
abbreviation == !!location_to_plot,
date >= min(date_range),
date <= max(date_range)
)
} else {
raw_data <- read_csv(url) |>
left_join(location_data, by = "location") |>
filter(
!abbreviation %in% c("US", "CA"),
date >= min(date_range),
date <= max(date_range)
) |>
group_by(date) |>
summarise(value = sum(value, na.rm = TRUE))
}

data_fp = file.path("input", "nssp_states_and_national.csv")) { # nolint

nssp_cdcgov <- read_csv(data_fp)
raw_data <- nssp_cdcgov |>
left_join(location_data,
by = c("geography" = "location_name") # nolint
) |>
rename(date = week_end) |>
filter(
abbreviation == !!location_to_plot,
date >= min(date_range),
date <= max(date_range)
)
Comment on lines +226 to +236
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify the NSSP CSV schema and check for a sample of join key values

if [ -f "input/nssp_states_and_national.csv" ]; then
  echo "=== CSV column names ==="
  head -1 input/nssp_states_and_national.csv
  
  echo ""
  echo "=== Sample geography values (first 10 unique) ==="
  tail -n +2 input/nssp_states_and_national.csv | cut -d',' -f1 | sort -u | head -10
else
  echo "NSSP CSV file not found at input/nssp_states_and_national.csv"
  echo ""
  echo "Searching for NSSP-related CSV files in the repository..."
  find . -name "*nssp*.csv" -o -name "*states*.csv" | head -20
fi

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 608


🏁 Script executed:

#!/bin/bash
# Find where location_data is created/loaded in the R file
grep -n "location_data" R/fig_data.R | head -20

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 248


🏁 Script executed:

#!/bin/bash
# View the function signature and parameter documentation
sed -n '200,240p' R/fig_data.R

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 1772


🏁 Script executed:

#!/bin/bash
# Find where get_plot_hosp_admissions is called
echo "=== Function calls to get_plot_hosp_admissions ==="
rg "get_plot_hosp_admissions" --type r -B 2 -A 5

echo ""
echo "=== Search for location_data creation/loading ==="
rg "location_data\s*<-|location_data\s*=" --type r -B 2 -A 2 | head -50

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 5289


🏁 Script executed:

#!/bin/bash
# Find the tar_target definition for location_data
rg "tar_target.*location_data|name\s*=\s*location_data" --type r -B 5 -A 10

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 928


🏁 Script executed:

#!/bin/bash
# Find location_fp definition
echo "=== Finding location_fp ==="
rg "location_fp\s*=" --type r -B 2 -A 2

echo ""
echo "=== Search for location CSV files ==="
find . -name "*location*" -type f | grep -E "\.(csv|tsv)$" | head -20

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 454


🏁 Script executed:

#!/bin/bash
# Search for location_fp assignment
echo "=== Finding location_fp assignment ==="
rg "location_fp.*<-|location_fp\s*=" --type r -B 1 -A 1

echo ""
echo "=== Check targets config file for location_fp ==="
grep -n "location_fp" targets/config_targets.R | head -20

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 430


🏁 Script executed:

#!/bin/bash
# View the tar_target definition for location_fp
sed -n '30,45p' targets/config_targets.R

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 849


🏁 Script executed:

#!/bin/bash
# Fetch and inspect the FluSight locations CSV
echo "=== Fetching FluSight locations CSV ==="
curl -s "https://raw.githubusercontent.com/cdcepi/FluSight-forecast-hub/refs/heads/main/auxiliary-data/locations.csv" | head -5 > /tmp/locations.csv

if [ -s /tmp/locations.csv ]; then
  echo "=== Column names ==="
  head -1 /tmp/locations.csv
  
  echo ""
  echo "=== First few rows ==="
  head -6 /tmp/locations.csv
else
  echo "Failed to fetch locations CSV"
fi

Repository: epiforecasts/evalvariantnowcasthub

Length of output: 742


Correct the join key: use location instead of location_name.

The NSSP CSV's geography column contains numeric FIPS codes (e.g., "1", "10", "100", "1000"), which correspond to the FluSight locations CSV's location column (e.g., "01", "02", "04"), not location_name (which contains place names like "Alabama", "Alaska"). The current join will fail to match any rows.

Change:

by = c("geography" = "location_name")

To:

by = c("geography" = "location")
🤖 Prompt for AI Agents
In @R/fig_data.R around lines 226-236, The left_join between nssp_cdcgov and
location_data is using the wrong key; change the join in the pipeline that
produces raw_data (the left_join call involving nssp_cdcgov and location_data)
to match geography to the FluSight `location` column instead of `location_name`
so rows with FIPS codes align correctly (i.e., use by = c("geography" =
"location") in the left_join definition).


p <- ggplot(raw_data) +
geom_bar(aes(x = date, y = value),
geom_line(aes(x = date, y = percent_visits_covid),
fill = "black",
stat = "identity", position = "dodge"
) +
get_plot_theme() +
xlab("") +
ylab("Hospital admissions") +
ylab("Percent of ED visits\ndue to COVID") +
# scale_y_continuous(transform = "log10")+
scale_x_date(
limits = c(min(date_range), max(date_range)),
Expand All @@ -263,7 +255,8 @@ get_plot_hosp_admissions <- function(location_to_plot,
size = 10
),
legend.position = "bottom"
)
) +
ggtitle(glue::glue("{location_to_plot}"))
dir_create(output_fp, recurse = TRUE)
ggsave(file.path(output_fp, glue::glue("{plot_name}.png")),
plot = p,
Expand Down
26 changes: 17 additions & 9 deletions R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ utils::globalVariables(c(
"obs_freq", # <get_plot_obs_clade_freq>
"clades_modeled", # <get_bar_chart_seq_count>
"sequences", # <get_bar_chart_seq_count>
"week_end", # <get_plot_hosp_admissions>
"abbreviation", # <get_plot_hosp_admissions>
"value", # <get_plot_hosp_admissions>
"percent_visits_covid", # <get_plot_hosp_admissions>
"location", # <get_plot_seq_counts_by_loc>
"sequences", # <get_plot_seq_counts_by_loc>
"population", # <get_plot_seq_counts_by_loc>
Expand Down Expand Up @@ -105,6 +106,13 @@ utils::globalVariables(c(
"location", # <get_plot_by_location>
"model", # <get_plot_by_nowcast_date>
"nowcast_date", # <get_plot_by_nowcast_date>
"model_id", # <get_plot_coverage_overall>
"interval_range", # <get_plot_coverage_overall>
"interval_coverage", # <get_plot_coverage_overall>
"empirical_coverage", # <get_plot_coverage_overall>
"90", # <get_plot_coverage_overall>
"50", # <get_plot_coverage_overall>
"interval_label", # <get_plot_coverage_overall>
"model", # <get_plot_overall>
"target_date", # <get_plot_horizon>
"nowcast_date", # <get_plot_horizon>
Expand Down Expand Up @@ -138,13 +146,13 @@ utils::globalVariables(c(
"model", # <get_plot_bias_by_date>
"bias", # <get_plot_bias_by_date>
"avg_bias", # <get_plot_bias_by_date>
"model_id", # <get_plot_coverage_overall>
"location", # <get_plot_coverage_overall>
"interval_range", # <get_plot_coverage_overall>
"interval_coverage", # <get_plot_coverage_overall>
"empirical_coverage", # <get_plot_coverage_overall>
"95", # <get_plot_coverage_overall>
"50", # <get_plot_coverage_overall>
"interval_label", # <get_plot_coverage_overall>
"model_id", # <get_plot_coverage_by_loc>
"location", # <get_plot_coverage_by_loc>
"interval_range", # <get_plot_coverage_by_loc>
"interval_coverage", # <get_plot_coverage_by_loc>
"empirical_coverage", # <get_plot_coverage_by_loc>
"90", # <get_plot_coverage_by_loc>
"50", # <get_plot_coverage_by_loc>
"interval_label", # <get_plot_coverage_by_loc>
NULL
))
Loading