diff --git a/metadata/baseline_models/baseline_models.csv b/metadata/baseline_models/baseline_models.csv
new file mode 100644
index 0000000..f6d0d0e
--- /dev/null
+++ b/metadata/baseline_models/baseline_models.csv
@@ -0,0 +1,2 @@
+data_right_trunc
+FALSE
diff --git a/scratch/gam.R b/scratch/gam.R
new file mode 100644
index 0000000..946a36d
--- /dev/null
+++ b/scratch/gam.R
@@ -0,0 +1,61 @@
+library(mgcv)
+library(targets)
+library(dplyr)
+library(tidyr)
+library(lubridate)
+
+tar_load(score_hosp_quantiles)
+
+## Join ww site metadata ##
+
+tar_load(ww_data)
+
+ww_data_gam <- ww_data |>
+  group_by(location_abbr, location_name, site, site_pop, forecast_date) |>
+  summarise(
+    sampling_freq = mean(diff(date)), # This means doing mean twice
+    last_sample = max(date)
+  ) |>
+  group_by(location_abbr, location_name) |>
+  summarise(
+    n_sites = length(unique(site)), # Number of wastewater sites per location
+    pop_cov = sum(unique(site_pop)), # There will be an issue with for Berlin
+    # - need to find total popns instead.
+    latency_med = median(ymd(forecast_date) - last_sample),
+    latency_mean = mean(ymd(forecast_date) - last_sample),
+    latency_min = min(ymd(forecast_date) - last_sample),
+    sampling_freq = mean(sampling_freq)
+  )
+
+## Add scores
+
+score_gam <- score_hosp_quantiles |>
+  select(model, include_ww, location, forecast_date, wis, date) |>
+  filter(model == "wwinference") |>
+  rename(location_name = location)
+
+# Scoring at 28_day time horizon
+score_gam <- score_gam |>
+  group_by(location_name, forecast_date, include_ww) |>
+  filter(date == max(date))
+
+# Pivot_wider to get hosp only and with ww wis
+score_gam <- score_gam |>
+  mutate(include_ww = if_else(include_ww, "with_ww", "hosp_only")) |>
+  pivot_wider(
+    id_cols = c("location_name", "forecast_date"),
+    names_from = include_ww,
+    values_from = wis
+  )
+
+ww_data_gam <- right_join(ww_data_gam, score_gam, by = "location_name")
+
+gamm(
+  with_ww ~ s(hosp_only) +
+    s(n_sites) +
+    s(pop_cov) + # normalised
+    s(sampling_freq) + # make numeric
+    s(latency), # maybe bs=cc here if dependent on time of year??
+  data = ww_data_gam
+)
+# Need to add: variability in ww data (from upper and lower bounds)