Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 156 additions & 4 deletions site/src/job_queue.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
use std::{str::FromStr, sync::Arc};
use std::{
path::Path,
str::FromStr,
sync::{Arc, LazyLock},
};

use crate::load::{partition_in_place, SiteCtxt};
use chrono::Utc;
use chrono::{DateTime, NaiveDate, Utc};
use database::{BenchmarkRequest, BenchmarkRequestStatus, BenchmarkRequestType};
use hashbrown::HashSet;
use parking_lot::RwLock;
use regex::Regex;
use tokio::time::{self, Duration};

/// Store the latest master commits or do nothing if all of them are
Expand Down Expand Up @@ -36,6 +41,89 @@ async fn create_benchmark_request_master_commits(
Ok(())
}

/// Parses strings in the following formats extracting the Date & release tag
/// `static.rust-lang.org/dist/2016-05-24/channel-rust-1.9.0.toml`
/// `static.rust-lang.org/dist/2016-05-31/channel-rust-nightly.toml`
/// `static.rust-lang.org/dist/2016-06-01/channel-rust-beta.toml`
/// `static.rust-lang.org/dist/2025-06-26/channel-rust-1.89-beta.toml`
/// `static.rust-lang.org/dist/2025-06-26/channel-rust-1.89.0-beta.toml`
/// `static.rust-lang.org/dist/2025-06-26/channel-rust-1.89.0-beta.2.toml`
fn parse_release_string(url: &str) -> Option<(String, DateTime<Utc>)> {
static VERSION_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(\d+\.\d+\.\d+)").unwrap());

// Grab ".../YYYY-MM-DD/FILE.toml" components with Path helpers.
let file = Path::new(url).file_name().and_then(|n| n.to_str())?;

let date_str = Path::new(url)
.parent()
.and_then(Path::file_name)
.and_then(|n| n.to_str())?;

// No other beta releases are recognized as toolchains.
//
// We also have names like this:
//
// * channel-rust-1.75-beta.toml
// * channel-rust-1.75.0-beta.toml
// * channel-rust-1.75.0-beta.1.toml
//
// Which should get ignored for now, they're not consumable via rustup yet.
if file.contains("beta") && file != "channel-rust-beta.toml" {
return None;
}

// Parse the YYYY-MM-DD segment and stamp it with *current* UTC time.
if let Ok(naive) = NaiveDate::parse_from_str(date_str, "%Y-%m-%d") {
let published = naive
.and_hms_opt(0, 0, 0)
.expect("valid HMS")
.and_local_timezone(Utc)
.single()
.unwrap();

// Special-case the rolling beta channel.
if file == "channel-rust-beta.toml" {
return Some((format!("beta-{date_str}"), published));
}

// Otherwise pull out a semver like "1.70.0" and return it.
if let Some(cap) = VERSION_RE.captures(file).and_then(|m| m.get(1)) {
return Some((cap.as_str().to_owned(), published));
}
}

None
}

/// Store the latest release commits or do nothing if all of them are
/// already in the database
async fn create_benchmark_request_releases(
conn: &dyn database::pool::Connection,
) -> anyhow::Result<()> {
let releases: String = reqwest::get("https://static.rust-lang.org/manifests.txt")
.await?
.text()
.await?;
// TODO; delete at some point in the future
let cutoff: chrono::DateTime<Utc> = chrono::DateTime::from_str("2025-06-01T00:00:00.000Z")?;

for release_string in releases.lines().rev().take(20) {
Copy link
Member

@Kobzol Kobzol Jul 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit different than we need - here you load the last 20 lines, but we need the last 20 release artifacts (imagine e.g. that we had 20 lines with mostly nightly releases, then we'd take only a few actual release artifacts). Check how the parse_published... function was used, we can just copy paste it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My previous method, I think, was implementing what is there currently. The current function parses all of the list then makes a decision if I understand correctly;

// site/src/load.rs L:236

        let artifacts: Vec<_> = artifact_list
            .lines()
            .rev()
            .filter_map(parse_published_artifact_tag) // Will parse every item in the list
            .take(20)
            .filter(|artifact| {
                !tested_artifacts.contains(artifact.as_str())
                    && !in_progress_tagged_artifacts.contains(artifact.as_str())
            })
            .collect();

So I think what I was doing previously was fine?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

069f182 I've changed it 👍

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's say that we take the last 3 (instead of 20) releases, to simplify this. If we have this input:

nightly-A
1.10.0
nightly-B
1.11.0
1.12.0
nightly-C
nightly-D

then
for release_string in releases.lines().rev().take(3) will iterate over nightly-D, nightly-C and 1.12.0, and extract only 1.12.0.

What we want instead is to parse 1.12.0, 1.11.0 and 1.10.0.

artifact_list
            .lines()
            .rev()
            .filter_map(parse_published_artifact_tag) // Will parse every item in the list
            .take(3)

goes through the lines in reverse order, keeps only the release artifact lines in the iterator chain, and then takes the first three. That's exactly what we want.

The .collect() is not needed, but otherwise the new logic is fine 👍

if let Some((name, date_time)) = parse_release_string(release_string) {
if date_time >= cutoff {
let release_request = BenchmarkRequest::create_release(
&name,
date_time,
BenchmarkRequestStatus::ArtifactsReady,
"",
"",
);
conn.insert_benchmark_request(&release_request).await;
}
}
}
Ok(())
}

/// Sorts try and master requests that are in the `ArtifactsReady` status.
/// Doesn't consider in-progress requests or release artifacts.
fn sort_benchmark_requests(done: &HashSet<String>, request_queue: &mut [BenchmarkRequest]) {
Expand Down Expand Up @@ -186,6 +274,8 @@ async fn cron_enqueue_jobs(site_ctxt: &Arc<SiteCtxt>) -> anyhow::Result<()> {
let mut conn = site_ctxt.conn().await;
// Put the master commits into the `benchmark_requests` queue
create_benchmark_request_master_commits(site_ctxt, &*conn).await?;
// Put the releases into the `benchmark_requests` queue
create_benchmark_request_releases(&*conn).await?;
enqueue_next_job(&mut *conn).await?;
Ok(())
}
Expand Down Expand Up @@ -213,10 +303,21 @@ pub async fn cron_main(site_ctxt: Arc<RwLock<Option<Arc<SiteCtxt>>>>, seconds: u
#[cfg(test)]
mod tests {
use super::*;
use chrono::{Datelike, Duration, TimeZone, Utc};

use chrono::{Datelike, Duration, NaiveDate, TimeZone, Utc};
use database::tests::run_postgres_test;

/// Helper: unwrap the Option, panic otherwise.
fn tag(url: &str) -> String {
parse_release_string(url)
.expect("Some") // Option<_>
.0 // take the tag
}

/// Helper: unwrap the DateTime and keep only the YYYY-MM-DD part
fn day(url: &str) -> NaiveDate {
parse_release_string(url).expect("Some").1.date_naive()
}

fn days_ago(day_str: &str) -> chrono::DateTime<Utc> {
// Walk backwards until the first non-digit, then slice
let days = day_str
Expand Down Expand Up @@ -602,4 +703,55 @@ mod tests {
})
.await;
}

#[test]
fn parses_stable_versions() {
assert_eq!(
tag("static.rust-lang.org/dist/2016-05-24/channel-rust-1.9.0.toml"),
"1.9.0"
);
assert_eq!(
day("static.rust-lang.org/dist/2016-05-24/channel-rust-1.9.0.toml"),
NaiveDate::from_ymd_opt(2016, 5, 24).unwrap()
);

assert_eq!(
tag("static.rust-lang.org/dist/2025-06-26/channel-rust-1.88.0.toml"),
"1.88.0"
);
assert_eq!(
day("static.rust-lang.org/dist/2025-06-26/channel-rust-1.88.0.toml"),
NaiveDate::from_ymd_opt(2025, 6, 26).unwrap()
);
}

#[test]
fn parses_plain_beta_channel() {
let want = "beta-2016-06-01";
let url = "static.rust-lang.org/dist/2016-06-01/channel-rust-beta.toml";

assert_eq!(tag(url), want);
assert_eq!(day(url), NaiveDate::from_ymd_opt(2016, 6, 1).unwrap());
}

#[test]
fn skips_unconsumable_channels() {
// nightly never returns Anything
assert!(parse_release_string(
"static.rust-lang.org/dist/2016-05-31/channel-rust-nightly.toml"
)
.is_none());

// versioned-beta artefacts are skipped too
for should_ignore in [
"static.rust-lang.org/dist/2025-06-26/channel-rust-1.89-beta.toml",
"static.rust-lang.org/dist/2025-06-26/channel-rust-1.89.0-beta.toml",
"static.rust-lang.org/dist/2025-06-26/channel-rust-1.89.0-beta.2.toml",
] {
assert!(
parse_release_string(should_ignore).is_none(),
"{should_ignore} should be ignored"
);
}
}
}
Loading