diff --git a/collector/src/bin/collector.rs b/collector/src/bin/collector.rs index c3937d213..8c59da293 100644 --- a/collector/src/bin/collector.rs +++ b/collector/src/bin/collector.rs @@ -137,6 +137,7 @@ struct SharedBenchmarkConfig { artifact_id: ArtifactId, toolchain: Toolchain, record_duration: bool, + job_id: Option, } fn check_measureme_installed() -> Result<(), String> { @@ -847,12 +848,14 @@ fn main_result() -> anyhow::Result { runtime.group, &toolchain, &artifact_id, + None, ))?; let shared = SharedBenchmarkConfig { artifact_id, toolchain, record_duration: true, + job_id: None, }; let config = RuntimeBenchmarkConfig::new( runtime_suite, @@ -996,6 +999,7 @@ fn main_result() -> anyhow::Result { toolchain, artifact_id, record_duration: true, + job_id: None, }; let config = CompileBenchmarkConfig { benchmarks, @@ -1048,7 +1052,12 @@ fn main_result() -> anyhow::Result { let toolchain = create_toolchain_from_published_version(&tag, &host_target_tuple)?; let conn = rt.block_on(pool.connection()); - rt.block_on(bench_published_artifact(conn, toolchain, &benchmark_dirs)) + rt.block_on(bench_published_artifact( + conn, + toolchain, + &benchmark_dirs, + None, + )) } NextArtifact::Commit { commit, @@ -1132,6 +1141,7 @@ fn main_result() -> anyhow::Result { None, &toolchain, &artifact_id, + None, ))?; let runtime_config = RuntimeBenchmarkConfig { @@ -1143,6 +1153,7 @@ fn main_result() -> anyhow::Result { artifact_id, toolchain, record_duration: true, + job_id: None, }; rt.block_on(run_benchmarks( @@ -1173,7 +1184,12 @@ fn main_result() -> anyhow::Result { let conn = rt.block_on(pool.connection()); let toolchain = create_toolchain_from_published_version(&toolchain, &host_target_tuple)?; - rt.block_on(bench_published_artifact(conn, toolchain, &benchmark_dirs))?; + rt.block_on(bench_published_artifact( + conn, + toolchain, + &benchmark_dirs, + None, + ))?; Ok(0) } @@ -1495,8 +1511,9 @@ async fn run_job_queue_benchmarks( // not with a benchmark. conn.record_error( artifact_row_id, - &format!("job:{}", benchmark_job.id()), + "Job failure", &format!("Error while benchmarking job {benchmark_job:?}: {error:?}"), + Some(benchmark_job.id()), ) .await; @@ -1656,6 +1673,7 @@ async fn run_benchmark_job( artifact_id, toolchain, record_duration: false, + job_id: Some(job.id()), }; // A failure here means that it was not possible to compile something, that likely won't resolve @@ -1723,6 +1741,7 @@ async fn create_benchmark_configs( None, toolchain, artifact_id, + Some(job.id()), ) .await?; Some(RuntimeBenchmarkConfig { @@ -2047,6 +2066,7 @@ async fn load_runtime_benchmarks( group: Option, toolchain: &Toolchain, artifact_id: &ArtifactId, + job_id: Option, ) -> anyhow::Result { let BenchmarkSuiteCompilation { suite, @@ -2059,7 +2079,7 @@ async fn load_runtime_benchmarks( RuntimeCompilationOpts::default(), )?; - record_runtime_compilation_errors(conn, artifact_id, failed_to_compile).await; + record_runtime_compilation_errors(conn, artifact_id, failed_to_compile, job_id).await; Ok(suite) } @@ -2067,11 +2087,12 @@ async fn record_runtime_compilation_errors( connection: &mut dyn Connection, artifact_id: &ArtifactId, errors: HashMap, + job_id: Option, ) { let artifact_row_number = connection.artifact_id(artifact_id).await; for (krate, error) in errors { connection - .record_error(artifact_row_number, &krate, &error) + .record_error(artifact_row_number, &krate, &error, job_id) .await; } } @@ -2153,6 +2174,7 @@ async fn run_benchmarks( &collector, runtime.filter, runtime.iterations, + shared.job_id, ) .await .context("Runtime benchmarks failed") @@ -2175,6 +2197,7 @@ async fn bench_published_artifact( mut connection: Box, toolchain: Toolchain, dirs: &BenchmarkDirs<'_>, + job_id: Option, ) -> anyhow::Result<()> { let artifact_id = ArtifactId::Tag(toolchain.id.clone()); @@ -2200,6 +2223,7 @@ async fn bench_published_artifact( None, &toolchain, &artifact_id, + job_id, ) .await?; @@ -2207,6 +2231,7 @@ async fn bench_published_artifact( artifact_id, toolchain, record_duration: true, + job_id: None, }; run_benchmarks( connection.as_mut(), @@ -2295,6 +2320,7 @@ async fn bench_compile( collector.artifact_row_id, &benchmark_name.0, &format!("{s:?}"), + shared.job_id, ) .await; }; diff --git a/collector/src/runtime/mod.rs b/collector/src/runtime/mod.rs index b165a0551..d08d3a8ac 100644 --- a/collector/src/runtime/mod.rs +++ b/collector/src/runtime/mod.rs @@ -35,6 +35,7 @@ pub async fn bench_runtime( collector: &CollectorCtx, filter: RuntimeBenchmarkFilter, iterations: u32, + job_id: Option, ) -> anyhow::Result<()> { let filtered = suite.filtered_benchmark_count(&filter); println!("Executing {filtered} benchmarks\n"); @@ -89,7 +90,12 @@ pub async fn bench_runtime( if let Err(error) = result { eprintln!("collector error: {error:#}"); tx.conn() - .record_error(collector.artifact_row_id, &step_name, &format!("{error:?}")) + .record_error( + collector.artifact_row_id, + &step_name, + &format!("{error:?}"), + job_id, + ) .await; }; diff --git a/database/schema.md b/database/schema.md index 1e8fce8ee..e7ec66503 100644 --- a/database/schema.md +++ b/database/schema.md @@ -250,14 +250,21 @@ bors_sha pr parent_sha complete requested include exclude runs commi ### error -Records a compilation or runtime error for an artifact and a benchmark. +Records an error within the application namely a; +- compilation +- runtime +- error contextual to a benchmark job -``` -sqlite> select * from error limit 1; -aid benchmark error ----------- --- ----- -1 syn-1.0.89 Failed to compile... -``` +Columns: + +* **id** (`BIGINT` / `SERIAL`): Primary key identifier for the error row; + auto increments with each new error. +* **aid** (`INTERGER`): References the artifact id column. +* **context** (`TEXT NOT NULL`): A little message to be able to understand a + bit more about why or where the error occured. +* **message** (`TEXT NOT NULL`): The error message. +* **job_id** (`INTEGER`): A nullable job_id which, if it exists it will inform + us as to which job this error is part of. ## New benchmarking design We are currently implementing a new design for dispatching benchmarks to collector(s) and storing diff --git a/database/src/bin/postgres-to-sqlite.rs b/database/src/bin/postgres-to-sqlite.rs index c6299544e..20615fd34 100644 --- a/database/src/bin/postgres-to-sqlite.rs +++ b/database/src/bin/postgres-to-sqlite.rs @@ -191,12 +191,12 @@ impl Table for Error { } fn postgres_select_statement(&self, since_weeks_ago: Option) -> String { - let s = "select benchmark, aid, error from ".to_string() + self.name(); + let s = "select context, aid, message from ".to_string() + self.name(); with_filter_clause_maybe(s, ARTIFACT_JOIN_AND_WHERE, since_weeks_ago) } fn sqlite_insert_statement(&self) -> &'static str { - "insert into error (benchmark, aid, error) VALUES (?, ?, ?)" + "insert into error (context, aid, message) VALUES (?, ?, ?)" } fn sqlite_execute_insert(&self, statement: &mut rusqlite::Statement, row: tokio_postgres::Row) { diff --git a/database/src/bin/sqlite-to-postgres.rs b/database/src/bin/sqlite-to-postgres.rs index 138c3fe35..a290dea95 100644 --- a/database/src/bin/sqlite-to-postgres.rs +++ b/database/src/bin/sqlite-to-postgres.rs @@ -241,9 +241,10 @@ struct Error; #[derive(Serialize)] struct ErrorRow<'a> { + id: i32, aid: i32, - benchmark: &'a str, - error: Nullable<&'a str>, + context: &'a str, + message: Nullable<&'a str>, } impl Table for Error { @@ -252,11 +253,11 @@ impl Table for Error { } fn sqlite_attributes() -> &'static str { - "aid, benchmark, error" + "id, aid, context, message, job_id" } fn postgres_attributes() -> &'static str { - "aid, benchmark, error" + "id, aid, context, message, job_id" } fn postgres_generated_id_attribute() -> Option<&'static str> { @@ -266,9 +267,10 @@ impl Table for Error { fn write_postgres_csv_row(writer: &mut csv::Writer, row: &rusqlite::Row) { writer .serialize(ErrorRow { - aid: row.get(0).unwrap(), - benchmark: row.get_ref(1).unwrap().as_str().unwrap(), - error: row.get_ref(2).unwrap().try_into().unwrap(), + id: row.get(0).unwrap(), + aid: row.get(1).unwrap(), + context: row.get_ref(2).unwrap().as_str().unwrap(), + message: row.get_ref(3).unwrap().try_into().unwrap(), }) .unwrap(); } diff --git a/database/src/pool.rs b/database/src/pool.rs index 571b4510e..9ffc3b89a 100644 --- a/database/src/pool.rs +++ b/database/src/pool.rs @@ -74,7 +74,13 @@ pub trait Connection: Send + Sync { profile: Profile, scenario: Scenario, ); - async fn record_error(&self, artifact: ArtifactIdNumber, krate: &str, error: &str); + async fn record_error( + &self, + artifact: ArtifactIdNumber, + context: &str, + message: &str, + job_id: Option, + ); async fn record_rustc_crate( &self, collection: CollectionId, @@ -1094,8 +1100,8 @@ mod tests { // Request 1 will have artifact with errors let aid1 = ctx.upsert_master_artifact("sha1").await; - db.record_error(aid1, "crate1", "error1").await; - db.record_error(aid1, "crate2", "error2").await; + db.record_error(aid1, "crate1", "error1", None).await; + db.record_error(aid1, "crate2", "error2", None).await; // Request 2 will have artifact without errors let _aid2 = ctx.upsert_master_artifact("sha2").await; diff --git a/database/src/pool/postgres.rs b/database/src/pool/postgres.rs index 0be955e88..ce764a617 100644 --- a/database/src/pool/postgres.rs +++ b/database/src/pool/postgres.rs @@ -387,6 +387,29 @@ static MIGRATIONS: &[&str] = &[ r#" ALTER TABLE collector_config ADD COLUMN commit_sha TEXT NULL; "#, + r#" + CREATE TABLE error_new ( + id SERIAL PRIMARY KEY, + aid INTEGER NOT NULL REFERENCES artifact(id) ON DELETE CASCADE ON UPDATE CASCADE, + message TEXT NOT NULL, + context TEXT NOT NULL, + job_id INTEGER + ); + + INSERT INTO + error_new (aid, message, context) + SELECT + aid, + error, + benchmark + FROM + error; + + DROP TABLE error; + ALTER TABLE error_new RENAME TO error; + + CREATE INDEX error_artifact_idx ON error(aid); + "#, ]; #[async_trait::async_trait] @@ -577,7 +600,7 @@ impl PostgresConnection { .prepare("insert into rustc_compilation (aid, cid, crate, duration) VALUES ($1, $2, $3, $4)") .await .unwrap(), - get_error: conn.prepare("select benchmark, error from error where aid = $1").await.unwrap(), + get_error: conn.prepare("select context, message from error where aid = $1").await.unwrap(), insert_pstat_series: conn.prepare("insert into pstat_series (crate, profile, scenario, backend, target, metric) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT DO NOTHING RETURNING id").await.unwrap(), select_pstat_series: conn.prepare("select id from pstat_series where crate = $1 and profile = $2 and scenario = $3 and backend = $4 and target = $5 and metric = $6").await.unwrap(), collection_id: conn.prepare("insert into collection (perf_commit) VALUES ($1) returning id").await.unwrap(), @@ -684,8 +707,8 @@ impl PostgresConnection { ), errors AS ( SELECT artifacts.name AS tag, - error.benchmark, - error.error + error.context, + error.message FROM error -- Use right join to only return errors for selected artifacts RIGHT JOIN artifacts ON error.aid = artifacts.id @@ -693,12 +716,12 @@ impl PostgresConnection { -- Select request duplicated for each pair of (benchmark, error) SELECT completed.*, - errors.benchmark, - errors.error + errors.context, + errors.message FROM completed LEFT JOIN errors ON errors.tag = completed.tag -- Re-sort the requests, because the original order may be lost - ORDER BY completed.completed_at DESC; + ORDER BY completed.completed_at DESC ")).await.unwrap(), get_jobs_of_in_progress_benchmark_requests: conn.prepare(&format!(" -- Get in progress requests @@ -1180,11 +1203,22 @@ where ArtifactIdNumber(aid) } - async fn record_error(&self, artifact: ArtifactIdNumber, krate: &str, error: &str) { + async fn record_error( + &self, + artifact: ArtifactIdNumber, + context: &str, + message: &str, + job_id: Option, + ) { self.conn() .execute( - "insert into error (benchmark, aid, error) VALUES ($1, $2, $3)", - &[&krate, &(artifact.0 as i32), &error], + "insert into error (context, aid, message, job_id) VALUES ($1, $2, $3, $4)", + &[ + &context, + &(artifact.0 as i32), + &message, + &job_id.map(|id| id as i32), + ], ) .await .unwrap(); diff --git a/database/src/pool/sqlite.rs b/database/src/pool/sqlite.rs index 1d69e133d..f36c77f52 100644 --- a/database/src/pool/sqlite.rs +++ b/database/src/pool/sqlite.rs @@ -406,6 +406,31 @@ static MIGRATIONS: &[Migration] = &[ alter table pstat_series_with_target rename to pstat_series; "#, ), + Migration::new( + r#" + CREATE TABLE error_new ( + id INTEGER PRIMARY KEY, + aid INTEGER NOT NULL REFERENCES artifact(id) ON DELETE CASCADE ON UPDATE CASCADE, + message TEXT NOT NULL, + context TEXT NOT NULL, + job_id INTEGER + ); + + INSERT INTO + error_new (aid, message, context) + SELECT + aid, + error, + benchmark + FROM + error; + + DROP TABLE error; + ALTER TABLE error_new RENAME TO error; + + CREATE INDEX error_artifact_idx ON error(aid); + "#, + ), ]; #[async_trait::async_trait] @@ -765,11 +790,20 @@ impl Connection for SqliteConnection { unimplemented!("recording raw self profile files is not implemented for sqlite") } - async fn record_error(&self, artifact: ArtifactIdNumber, krate: &str, error: &str) { + async fn record_error( + &self, + artifact: ArtifactIdNumber, + context: &str, + message: &str, + job_id: Option, + ) { + if job_id.is_some() { + no_queue_implementation_abort!() + } self.raw_ref() .execute( - "insert into error (benchmark, aid, error) VALUES (?, ?, ?)", - params![krate, &artifact.0, &error], + "insert into error (context, aid, message) VALUES (?, ?, ?)", + params![context, &artifact.0, &message], ) .unwrap(); } @@ -935,7 +969,7 @@ impl Connection for SqliteConnection { } async fn get_error(&self, aid: crate::ArtifactIdNumber) -> HashMap { self.raw_ref() - .prepare_cached("select benchmark, error from error where aid = ?") + .prepare_cached("select context, message from error where aid = ?") .unwrap() .query_map(params![&aid.0], |row| Ok((row.get(0)?, row.get(1)?))) .unwrap()