Skip to content

Commit 7a11278

Browse files
authored
Merge pull request #10763 from Turbo87/semver-ord
Add `semver_ord(num)` SQL function and `version.semver_ord` column
2 parents 4d818bd + 2f6d121 commit 7a11278

File tree

4 files changed

+232
-0
lines changed

4 files changed

+232
-0
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
use crates_io_test_db::TestDatabase;
2+
use diesel::prelude::*;
3+
use diesel::sql_types::{Nullable, Text};
4+
use diesel_async::RunQueryDsl;
5+
use std::fmt::Debug;
6+
7+
#[tokio::test]
8+
async fn test_jsonb_output() {
9+
let test_db = TestDatabase::new();
10+
let mut conn = test_db.async_connect().await;
11+
12+
let mut check = async |num| {
13+
let query = format!("select semver_ord('{num}') as output");
14+
15+
#[derive(QueryableByName)]
16+
struct Row {
17+
#[diesel(sql_type = Nullable<Text>)]
18+
output: Option<String>,
19+
}
20+
21+
diesel::sql_query(query)
22+
.get_result::<Row>(&mut conn)
23+
.await
24+
.unwrap()
25+
.output
26+
.unwrap_or_default()
27+
};
28+
29+
insta::assert_snapshot!(check("0.0.0").await, @r#"[0, 0, 0, {}]"#);
30+
insta::assert_snapshot!(check("1.0.0-alpha.1").await, @r#"[1, 0, 0, [true, "alpha", false, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, ""]]"#);
31+
32+
// see https://crates.io/crates/cursed-trying-to-break-cargo/1.0.0-0.HDTV-BluRay.1020p.YTSUB.L33TRip.mkv – thanks @Gankra!
33+
insta::assert_snapshot!(check("1.0.0-0.HDTV-BluRay.1020p.YTSUB.L33TRip.mkv").await, @r#"[1, 0, 0, [false, 0, true, "HDTV-BluRay", true, "1020p", true, "YTSUB", true, "L33TRip", true, "mkv", null, null, null, null, null, null, null, null, ""]]"#);
34+
35+
// Invalid version string
36+
insta::assert_snapshot!(check("foo").await, @"");
37+
38+
// Version string with a lot of prerelease identifiers
39+
insta::assert_snapshot!(check("1.2.3-1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.end").await, @r#"[1, 2, 3, [false, 1, false, 2, false, 3, false, 4, false, 5, false, 6, false, 7, false, 8, false, 9, false, 10, "11.12.13.14.15.16.17.end"]]"#);
40+
}
41+
42+
/// This test checks that the `semver_ord` function orders versions correctly.
43+
///
44+
/// The test data is a list of versions in a random order. The versions are then
45+
/// ordered by the `semver_ord` function and the result is compared to the
46+
/// expected order (see <https://semver.org/#spec-item-11>).
47+
///
48+
/// The test data was imported from <https://github.com/dtolnay/semver/blob/1.0.26/tests/test_version.rs#L223-L242>.
49+
#[tokio::test]
50+
async fn test_spec_order() {
51+
let test_db = TestDatabase::new();
52+
let mut conn = test_db.async_connect().await;
53+
54+
#[derive(QueryableByName)]
55+
struct Row {
56+
#[diesel(sql_type = Text)]
57+
num: String,
58+
}
59+
60+
impl Debug for Row {
61+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62+
f.write_str(&self.num)
63+
}
64+
}
65+
66+
let mut check = async |order| {
67+
let query = format!(
68+
r#"
69+
with nums as (
70+
select unnest(array[
71+
'1.0.0-beta',
72+
'1.0.0-alpha',
73+
'1.0.0-rc.1',
74+
'1.0.0',
75+
'1.0.0-beta.2',
76+
'1.0.0-alpha.1',
77+
'1.0.0-alpha.beta',
78+
'1.0.0-beta.11'
79+
]) as num
80+
)
81+
select num
82+
from nums
83+
order by semver_ord(num) {order};
84+
"#
85+
);
86+
87+
diesel::sql_query(query)
88+
.load::<Row>(&mut conn)
89+
.await
90+
.unwrap()
91+
};
92+
93+
insta::assert_debug_snapshot!(check("asc").await, @r"
94+
[
95+
1.0.0-alpha,
96+
1.0.0-alpha.1,
97+
1.0.0-alpha.beta,
98+
1.0.0-beta,
99+
1.0.0-beta.2,
100+
1.0.0-beta.11,
101+
1.0.0-rc.1,
102+
1.0.0,
103+
]
104+
");
105+
106+
insta::assert_debug_snapshot!(check("desc").await, @r"
107+
[
108+
1.0.0,
109+
1.0.0-rc.1,
110+
1.0.0-beta.11,
111+
1.0.0-beta.2,
112+
1.0.0-beta,
113+
1.0.0-alpha.beta,
114+
1.0.0-alpha.1,
115+
1.0.0-alpha,
116+
]
117+
");
118+
}

crates/crates_io_database_dump/src/dump-db.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ id = "public"
233233
crate_id = "public"
234234
num = "public"
235235
num_no_build = "public"
236+
semver_ord = "private"
236237
updated_at = "public"
237238
created_at = "public"
238239
downloads = "public"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
drop trigger trigger_set_semver_ord on versions;
2+
drop function set_semver_ord();
3+
alter table versions drop column semver_ord;
4+
drop function semver_ord;
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
-- Add `semver_ord(num)` function to convert a semver string into a JSONB array for version comparison purposes.
2+
3+
create or replace function semver_ord(num varchar) returns jsonb as $$
4+
declare
5+
-- We need to ensure that the prerelease array has the same length for all
6+
-- versions since shorter arrays have lower precedence in JSONB. We store
7+
-- the first 10 parts of the prerelease string as pairs of booleans and
8+
-- numbers or text values, and then a final text item for the remaining
9+
-- parts.
10+
max_prerelease_parts constant int := 10;
11+
12+
-- We ignore the "build metadata" part of the semver string, since it has
13+
-- no impact on the version ordering.
14+
match_result text[] := regexp_match(num, '^(\d+).(\d+).(\d+)(?:-([0-9A-Za-z\-.]+))?');
15+
16+
prerelease jsonb;
17+
prerelease_parts text[];
18+
prerelease_part text;
19+
i int := 0;
20+
begin
21+
if match_result is null then
22+
return null;
23+
end if;
24+
25+
if match_result[4] is null then
26+
-- A JSONB object has higher precedence than an array, and versions with
27+
-- prerelease specifiers should have lower precedence than those without.
28+
prerelease := json_build_object();
29+
else
30+
prerelease := to_jsonb(array_fill(NULL::bool, ARRAY[max_prerelease_parts * 2 + 1]));
31+
32+
-- Split prerelease string by `.` and "append" items to
33+
-- the `prerelease` array.
34+
prerelease_parts := string_to_array(match_result[4], '.');
35+
36+
foreach prerelease_part in array prerelease_parts[1:max_prerelease_parts + 1]
37+
loop
38+
-- Parse parts as numbers if they consist of only digits.
39+
if regexp_like(prerelease_part, '^\d+$') then
40+
-- In JSONB a number has higher precedence than a string but in
41+
-- semver it is the other way around, so we use true/false to
42+
-- work around this.
43+
prerelease := jsonb_set(prerelease, array[i::text], to_jsonb(false));
44+
prerelease := jsonb_set(prerelease, array[(i + 1)::text], to_jsonb(prerelease_part::numeric));
45+
else
46+
prerelease := jsonb_set(prerelease, array[i::text], to_jsonb(true));
47+
prerelease := jsonb_set(prerelease, array[(i + 1)::text], to_jsonb(prerelease_part));
48+
end if;
49+
50+
-- Exit the loop if we have reached the maximum number of parts.
51+
i := i + 2;
52+
exit when i >= max_prerelease_parts * 2;
53+
end loop;
54+
55+
prerelease := jsonb_set(prerelease, array[(max_prerelease_parts * 2)::text], to_jsonb(array_to_string(prerelease_parts[max_prerelease_parts + 1:], '.')));
56+
end if;
57+
58+
-- Return an array with the major, minor, patch, and prerelease parts.
59+
return json_build_array(
60+
match_result[1]::numeric,
61+
match_result[2]::numeric,
62+
match_result[3]::numeric,
63+
prerelease
64+
);
65+
end;
66+
$$ language plpgsql immutable;
67+
68+
comment on function semver_ord is 'Converts a semver string into a JSONB array for version comparison purposes. The array has the following format: [major, minor, patch, prerelease] and when used for sorting follow the precedence rules defined in the semver specification (https://semver.org/#spec-item-11).';
69+
70+
71+
-- Add corresponding column to the `versions` table.
72+
73+
alter table versions
74+
add semver_ord jsonb;
75+
76+
comment on column versions.semver_ord is 'JSONB representation of the version number for sorting purposes.';
77+
78+
79+
-- Create a trigger to set the `semver_ord` column when inserting a new version.
80+
-- Ideally, we would use a generated column for this, but introducing such a
81+
-- column would require a full table rewrite, which is not feasible for large
82+
-- tables.
83+
84+
create or replace function set_semver_ord() returns trigger as $$
85+
begin
86+
new.semver_ord := semver_ord(new.num);
87+
return new;
88+
end
89+
$$ language plpgsql;
90+
91+
create or replace trigger trigger_set_semver_ord
92+
before insert on versions
93+
for each row
94+
execute procedure set_semver_ord();
95+
96+
97+
-- Populate the `semver_ord` column for existing versions.
98+
-- This query should be run manually in small batches to avoid locking the
99+
-- table for too long.
100+
101+
-- with versions_to_update as (
102+
-- select id, num
103+
-- from versions
104+
-- where semver_ord = 'null'::jsonb
105+
-- limit 1000
106+
-- )
107+
-- update versions
108+
-- set semver_ord = semver_ord(num)
109+
-- where id in (select id from versions_to_update);

0 commit comments

Comments
 (0)