1+ library(shinyBS )
12library(shiny )
23library(bslib )
3- library(DT )
4+ library(gt )
45library(connectapi )
56library(dplyr )
67library(purrr )
78library(lubridate )
89library(tidyr )
10+ library(shinyjs )
11+ library(shinycssloaders )
12+ library(shinybusy )
13+
14+ source(" ./ui.R" )
915
1016# cache data to disk with a refresh every 8h, table renders in ~7m when cache
1117# is expired, deleted, or on initial deploy
1218shinyOptions(
1319 cache = cachem :: cache_disk(" ./app_cache/cache/" , max_age = 60 * 60 * 8 )
1420)
1521
16- source(" get_usage.R" )
17-
1822# Hacky function to get a list of Content class objects without making a request
1923# for each item. These objects differ from the ones created by `content_item()`
2024# because they also include the full owner info as returned by `get_content()`.
@@ -27,79 +31,97 @@ as_content_list <- function(content_df, client) {
2731 })
2832}
2933
30- # checks to see if a content item has failed jobs within the last 30d, grabs
31- # usage data if it does, then compiles content, job, and usage data together
32- # into a tibble, returning it.
33- get_failed_job_data <- function (item , usage ) {
34- failed_jobs <- get_jobs(item ) | >
35- # filter out successful and running jobs
36- filter(exit_code != 0 & status != 0 & ! (is.na(end_time )))
37-
38- if (is.null(failed_jobs ) || nrow(failed_jobs ) == 0 ) {
34+ # user email is not on the content item owner object so we request it
35+ # TODO: store user_guid = user_email list so we don't lookup the same user's
36+ # email for each failing content item they own
37+ get_user_email <- function (client , guid ) {
38+ user_endpoint <- paste0(" v1/users/" , guid )
39+ user <- client $ GET(user_endpoint )
40+ user $ email
41+ }
42+
43+ # filters content jobs down to failures and sets content_recovered depending on
44+ # whether or not the latest job ended in error
45+ filter_to_failed_jobs <- function (jobs ) {
46+ failed_jobs <- if (nrow(jobs ) == 0 ) {
47+ data.frame ()
48+ } else {
49+ # grab the latest job and all failing jobs
50+ latest_job <- jobs %> %
51+ slice_max(start_time , with_ties = FALSE )
52+ failed_jobs <- jobs %> %
53+ # filter out successful and running jobs
54+ filter(exit_code != 0 & ! (is.na(exit_code )) & status != 0 & ! (is.na(end_time ))) | >
55+ # grab only the columns we use for cleaner dplyr pipeline
56+ select(end_time , exit_code , tag , key )
57+ # set content_recovered depending on if latest_job was in failed_jobs
58+ failed_jobs %> %
59+ mutate(
60+ content_recovered = ifelse(latest_job $ key %in% failed_jobs $ key , FALSE , TRUE )
61+ )
62+ }
63+ failed_jobs
64+ }
65+
66+ # checks to see if a content item has failed jobs within the last 30d, then
67+ # compiles content and job data into a tibble, returning it.
68+ get_failed_job_data <- function (item , client ) {
69+ jobs <- tryCatch(
70+ {
71+ get_jobs(item )
72+ }, error = function (e ) {
73+ print(paste(" Error encountered with item: " , item , e $ message ))
74+ data.frame ()
75+ })
76+ failed_jobs <- filter_to_failed_jobs(jobs )
77+ all_failed_jobs <- if (nrow(failed_jobs ) == 0 ) {
3978 # content item does not have failed jobs
40- return ( NULL )
79+ data.frame ()
4180 } else {
42- last_visit <- usage %> %
43- filter(content_guid == item $ content $ guid ) %> %
44- slice_max(timestamp ) %> %
45- select(timestamp )
46- if (nrow(last_visit ) == 0 ) { # display date 0 for content without visits
47- last_visit <- last_visit %> %
48- bind_rows(data.frame (timestamp = as.POSIXct(0 )))
49- }
50- # return required information from https://github.com/posit-dev/connect/issues/30288
51- all_failed_jobs <- map_dfr(seq_len(nrow(failed_jobs )), ~
52- tibble(
53- " content_title" = item $ content $ title ,
54- " content_guid" = item $ content $ guid ,
55- " content_owner" = item $ content $ owner [[1 ]]$ username ,
56- " job_failed_at" = failed_jobs $ end_time [.x ],
57- " failed_job_type" = failed_jobs $ tag [.x ],
58- " failure_reason" = failed_jobs $ exit_code [.x ],
59- " last_deployed_time" = item $ content $ last_deployed_time ,
60- " last_visited" = as.POSIXct(last_visit $ timestamp )
61- )
62- )
63- all_failed_jobs
81+ owner_email <- get_user_email(client , item $ content $ owner_guid )
82+ failed_jobs %> %
83+ mutate(
84+ content_title = item $ content $ title ,
85+ content_guid = item $ content $ guid ,
86+ content_owner = item $ content $ owner [[1 ]]$ username ,
87+ log_url = paste0(item $ content $ dashboard_url ,
88+ " /logs?logKey=" ,
89+ failed_jobs $ key ),
90+ owner_email = owner_email ,
91+ content_url = item $ content $ dashboard_url
92+ )
6493 }
94+ all_failed_jobs
6595}
6696
6797server <- function (input , output , session ) {
6898 # initialize Connect API client
6999 client <- connect()
70100
71- # TODO: use `v1/content/failed` when #30414 merges so we only list content we
72- # know has failed before, filter to deployed within last 60d for now
101+ # TODO: use `v1/content/failed` to get content items with failed last job
102+ # filter to deployed within last year for now
73103 content_list <- reactive({
74104 content <- get_content(client , limit = inf )
75105 content <- content %> %
76- filter(last_deployed_time > = (Sys.time() - days( 60 )))
106+ filter(last_deployed_time > = (Sys.time() - years( 1 )))
77107 as_content_list(content , client )
78108 }) | > bindCache(" static_key" )
79109
80- # cache last 30d of usage (Jobs.MaxCompleted is 30d), takes ~5m to build
81- usage <- reactive({
82- from = (Sys.time() - days(30 ))
83- to = Sys.time()
84- get_usage(client , from , to ) # ~100 pages of results
85- }) | > bindCache(" static_key" )
86-
87- # cache failed jobs data, takes ~2m to build with content filtered to items
88- # deployed within the last 60d
110+ # cache failed jobs data, takes ~5m to build with content filtered to items
111+ # deployed within the last year
89112 bad_content_df <- reactive({
90- req(content_list(), usage())
91- map_dfr(content_list(), ~ get_failed_job_data(.x , usage()))
92- }) | > bindCache(" static_key" )
93-
94- # output the datatable of failed jobs
95- output $ jobs <- renderDT({
96- datatable(bad_content_df() | >
97- # map job type to something more readable
98- mutate(failed_job_type = case_when(
99- failed_job_type %in% c(" build_report" , " build_site" , " build_jupyter" ) ~ " Building" ,
100- failed_job_type %in% c(" packrat_restore" , " python_restore" ) ~ " Restoring environment" ,
101- failed_job_type == " configure_report" ~ " Configuring report" ,
102- failed_job_type %in% c(" run_app" ,
113+ req(content_list())
114+ bad_content <- map_dfr(content_list(), ~ get_failed_job_data(.x , client ))
115+ bad_content %> %
116+ rename(job_failed_at = end_time ,
117+ failed_job_type = tag ,
118+ failure_reason = exit_code ) %> %
119+ # map job type to something more readable
120+ mutate(failed_job_type = case_when(
121+ failed_job_type %in% c(" build_report" , " build_site" , " build_jupyter" ) ~ " Building" ,
122+ failed_job_type %in% c(" packrat_restore" , " python_restore" ) ~ " Restoring environment" ,
123+ failed_job_type == " configure_report" ~ " Configuring report" ,
124+ failed_job_type %in% c(" run_app" ,
103125 " run_api" ,
104126 " run_tensorflow" ,
105127 " run_python_api" ,
@@ -110,45 +132,92 @@ server <- function(input, output, session) {
110132 " run_fastapi_app" ,
111133 " run_voila_app" ,
112134 " run_pyshiny_app" ) ~ " Running" ,
113- failed_job_type == " render_shiny" ~ " Rendering" ,
114- failed_job_type == " ctrl_extraction" ~ " Extracting parameters" ,
115- TRUE ~ failed_job_type )) | >
116- # map exit codes to something more readable
117- mutate(failure_reason = case_when(
118- failure_reason %in% c(1 , 2 , 134 ) ~ " failed to run / error during running" ,
119- failure_reason == 137 ~ " out of memory" ,
120- failure_reason %in% c(255 , 15 , 130 ) ~ " process terminated by server" ,
121- failure_reason %in% c(13 , 127 ) ~ " configuration / permissions error" ,
122- # treat any unmapped exit_code integers as characters
123- TRUE ~ as.character(failure_reason ))) | >
124- mutate(content_title = replace_na(content_title , " " )),
125- rownames = FALSE ,
126- escape = FALSE ,
127- options = list ( # non-interactive table for this prototype
128- paging = FALSE ,
129- searching = FALSE ,
130- ordering = FALSE ,
131- info = FALSE ,
132- dom = " t"
133- )
135+ failed_job_type == " render_shiny" ~ " Rendering" ,
136+ failed_job_type == " ctrl_extraction" ~ " Extracting parameters" ,
137+ TRUE ~ failed_job_type ),
138+ # map exit codes to something more readable
139+ failure_reason = case_when(
140+ failure_reason %in% c(1 , 2 , 134 ) ~ " failed to run / error during running" ,
141+ failure_reason == 137 ~ " out of memory" ,
142+ failure_reason %in% c(255 , 15 , 130 ) ~ " process terminated by server" ,
143+ failure_reason %in% c(13 , 127 ) ~ " configuration / permissions error" ,
144+ # treat any unmapped exit_code integers as characters
145+ TRUE ~ as.character(failure_reason ))) %> %
146+ group_by(content_guid ) %> %
147+ mutate(content_guid = paste0(' <a href="' ,
148+ first(content_url ),
149+ ' " target="_blank">' ,
150+ first(content_title ),
151+ ' </a>' )) %> %
152+ mutate(owner_email = paste0(' <span style="font-size: 32px;">' ,
153+ " <a href='mailto:" ,
154+ owner_email ,
155+ " ?subject=Problem%20with%20" ,
156+ gsub(" '" ,
157+ " %27" ,
158+ gsub(' "' ,
159+ " %22" ,
160+ content_title )),
161+ " &body=Please%20investigate:%0A" ,
162+ log_url ,
163+ " '>" ,
164+ " ✉" ,
165+ " </a></span>" )) %> %
166+ mutate(log_url = paste0(' <a href="' ,
167+ log_url ,
168+ ' " target="_blank">' ,
169+ ' <span style="font-size: 32px;">🗒' ,
170+ ' </a></span>' )) %> %
171+ mutate(content_guid = ifelse(! content_recovered ,
172+ paste(content_guid , " <span style='color: red;'>⚠️</span>" ),
173+ content_guid )) %> %
174+ select(- content_url , - content_title , - key )
175+ }) | > bindCache(" static_key" )
176+
177+ # show helpful information about what is and is not in failed jobs data
178+ # along with definitions of terms and descriptions of filter behavior
179+ observeEvent(input $ show_help , {
180+ showModal(modalDialog(
181+ title = " Helpful info about this app" ,
182+ easyClose = TRUE ,
183+ size = " m" ,
184+ help_information )
134185 )
135186 })
136- }
137-
138- ui <- fluidPage(
139- fluidRow(
140- column(12 ,
141- titlePanel(" Content With Issues (table view)" )
142- )
143- ),
187+
188+ # populate owners filter with username from compiled failed jobs data
189+ observe({
190+ updateSelectInput(session ,
191+ " owner_filter" ,
192+ choices = unique(bad_content_df()$ content_owner ))
193+ })
144194
145- fluidRow(
146- column(12 ,
147- titlePanel(tags $ h6(" All failed jobs on content deployed within 60d:" )),
148- DTOutput(" jobs" ),
149- )
150- )
151- )
152-
195+ # output the great table of failed jobs
196+ # TODO: better reflect current applied filters
197+ output $ jobs <- render_gt({
198+ bad_content_df() %> %
199+ filter(if (input $ currently_failing ) content_recovered == FALSE else TRUE ) %> %
200+ filter(if (input $ not_notified ) failed_job_type %in% c(" Running" ,
201+ " Configuring report" ,
202+ " Restoring environment" ,
203+ " Extracting parameters" ) else TRUE ) %> %
204+ filter(if (! is.null(input $ job_type )) failed_job_type %in% input $ job_type else TRUE ) %> %
205+ filter(if (! is.null(input $ owner_filter )) content_owner %in% input $ owner_filter else TRUE ) %> %
206+ filter(if (! is.null(input $ failure_reason )) failure_reason %in% input $ failure_reason else TRUE ) %> %
207+ gt() %> %
208+ fmt_markdown(columns = c(log_url , owner_email )) %> %
209+ sub_missing(columns = everything(), missing_text = " " ) %> %
210+ cols_label(job_failed_at = " Date of Failure" ,
211+ failure_reason = " Reason for Failure" ,
212+ failed_job_type = " Job Type" ,
213+ content_owner = " Owner" ,
214+ owner_email = " Email Owner" ,
215+ log_url = " Open Logs" ) %> %
216+ cols_hide(content_recovered ) %> %
217+ opt_interactive(use_page_size_select = TRUE ,
218+ use_sorting = TRUE ,
219+ use_search = TRUE )
220+ })
221+ }
153222
154- shinyApp(ui , server )
223+ shinyApp(ui , server )
0 commit comments