Skip to content

Commit 6d901dc

Browse files
committed
Add semver_ord(num) SQL function and version.semver_ord column
We currently rely on the Rust `semver` crate to implement our "sort by semantic versioning" functionality, that is used by web interface, but also to determine the "default version". This has the downside that we need to load the full list of version numbers for a crate from the database to the API server, sort it and then throw away the ones that we don't need. This commit implements a `semver_ord(num)` pgSQL function that returns a JSONB array, which has the same ordering precedence as the Semantic Versioning spec (https://semver.org/#spec-item-11), with the small caveat that it only supports up to 15 prerelease parts. The maximum number of prerelease parts in our current dataset is 7, so 15 should be plenty. The database migration in this commit also adds a new `semver_ord` column to the `versions` table, and an on-insert trigger function that automatically derives the `semver_ord` column from the `num` column value. Once this migration has run, the existing versions can be backfilled by running the following SQL script, until all versions are processed: ```sql with versions_to_update as ( select id, num from versions where semver_ord = 'null'::jsonb limit 1000 ) update versions set semver_ord = semver_ord(num) where id in (select id from versions_to_update); ```
1 parent 27829c7 commit 6d901dc

File tree

3 files changed

+170
-0
lines changed

3 files changed

+170
-0
lines changed
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
use crates_io_test_db::TestDatabase;
2+
use diesel::prelude::*;
3+
use diesel::sql_types::Text;
4+
use diesel_async::RunQueryDsl;
5+
use std::fmt::Debug;
6+
7+
/// This test checks that the `semver_ord` function orders versions correctly.
8+
///
9+
/// The test data is a list of versions in a random order. The versions are then
10+
/// ordered by the `semver_ord` function and the result is compared to the
11+
/// expected order (see <https://semver.org/#spec-item-11>).
12+
///
13+
/// The test data was imported from <https://github.com/dtolnay/semver/blob/1.0.26/tests/test_version.rs#L223-L242>.
14+
#[tokio::test]
15+
async fn test_spec_order() {
16+
let test_db = TestDatabase::new();
17+
let mut conn = test_db.async_connect().await;
18+
19+
let query = r#"
20+
with nums as (
21+
select unnest(array[
22+
'1.0.0-beta',
23+
'1.0.0-alpha',
24+
'1.0.0-rc.1',
25+
'1.0.0',
26+
'1.0.0-beta.2',
27+
'1.0.0-alpha.1',
28+
'1.0.0-alpha.beta',
29+
'1.0.0-beta.11'
30+
]) as num
31+
)
32+
select num
33+
from nums
34+
order by semver_ord(num);
35+
"#;
36+
37+
#[derive(QueryableByName)]
38+
struct Row {
39+
#[diesel(sql_type = Text)]
40+
num: String,
41+
}
42+
43+
impl Debug for Row {
44+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45+
f.write_str(&self.num)
46+
}
47+
}
48+
49+
let nums = diesel::sql_query(query)
50+
.load::<Row>(&mut conn)
51+
.await
52+
.unwrap();
53+
54+
insta::assert_debug_snapshot!(nums, @r"
55+
[
56+
1.0.0-alpha,
57+
1.0.0-alpha.1,
58+
1.0.0-alpha.beta,
59+
1.0.0-beta,
60+
1.0.0-beta.2,
61+
1.0.0-beta.11,
62+
1.0.0-rc.1,
63+
1.0.0,
64+
]
65+
");
66+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
drop trigger trigger_set_semver_ord on versions;
2+
drop function set_semver_ord();
3+
alter table versions drop column semver_ord;
4+
drop function semver_ord;
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
-- Add `semver_ord(num)` function to convert a semver string into a JSONB array for version comparison purposes.
2+
3+
create or replace function semver_ord(num varchar) returns jsonb as $$
4+
declare
5+
-- We need to ensure that the array has the same length for all versions
6+
-- since shorter arrays have lower precedence in JSONB. Since we also need
7+
-- to add a boolean value for each part of the prerelease string, this
8+
-- results in us supporting up to 15 parts in the prerelease string.
9+
-- Everything beyond that will be ignored.
10+
prerelease_array_length constant int := 30;
11+
12+
-- We ignore the "build metadata" part of the semver string, since it has
13+
-- no impact on the version ordering.
14+
match_result text[] := regexp_match(num, '^(\d+).(\d+).(\d+)(?:-([0-9A-Za-z\-.]+))?');
15+
16+
prerelease jsonb;
17+
prerelease_part text;
18+
i int := 0;
19+
begin
20+
if match_result[4] is null then
21+
-- A JSONB object has higher precedence than an array, and versions with
22+
-- prerelease specifiers should have lower precedence than those without.
23+
prerelease := json_build_object();
24+
else
25+
prerelease := to_jsonb(array_fill(NULL::bool, ARRAY[prerelease_array_length]));
26+
27+
-- Split prerelease string by `.` and "append" items to
28+
-- the `prerelease` array.
29+
foreach prerelease_part in array string_to_array(match_result[4], '.')
30+
loop
31+
-- Parse parts as numbers if they consist of only digits.
32+
if regexp_like(prerelease_part, '^\d+$') then
33+
-- In JSONB a number has higher precedence than a string but in
34+
-- semver it is the other way around, so we use true/false to
35+
-- work around this.
36+
prerelease := jsonb_set(prerelease, array[i::text], to_jsonb(false));
37+
prerelease := jsonb_set(prerelease, array[(i + 1)::text], to_jsonb(prerelease_part::numeric));
38+
else
39+
prerelease := jsonb_set(prerelease, array[i::text], to_jsonb(true));
40+
prerelease := jsonb_set(prerelease, array[(i + 1)::text], to_jsonb(prerelease_part));
41+
end if;
42+
43+
-- Exit the loop if we have reached the maximum number of parts.
44+
i := i + 2;
45+
exit when i > prerelease_array_length;
46+
end loop;
47+
end if;
48+
49+
-- Return an array with the major, minor, patch, and prerelease parts.
50+
return json_build_array(
51+
match_result[1]::numeric,
52+
match_result[2]::numeric,
53+
match_result[3]::numeric,
54+
prerelease
55+
);
56+
end;
57+
$$ language plpgsql immutable;
58+
59+
comment on function semver_ord is 'Converts a semver string into a JSONB array for version comparison purposes. The array has the following format: [major, minor, patch, prerelease] and when used for sorting follow the precedence rules defined in the semver specification (https://semver.org/#spec-item-11).';
60+
61+
62+
-- Add corresponding column to the `versions` table.
63+
64+
alter table versions
65+
add semver_ord jsonb default 'null'::jsonb not null;
66+
67+
comment on column versions.semver_ord is 'JSONB representation of the version number for sorting purposes.';
68+
69+
70+
-- Create a trigger to set the `semver_ord` column when inserting a new version.
71+
-- Ideally, we would use a generated column for this, but introducing such a
72+
-- column would require a full table rewrite, which is not feasible for large
73+
-- tables.
74+
75+
create or replace function set_semver_ord() returns trigger as $$
76+
begin
77+
new.semver_ord := semver_ord(new.num);
78+
return new;
79+
end
80+
$$ language plpgsql;
81+
82+
create or replace trigger trigger_set_semver_ord
83+
before insert on versions
84+
for each row
85+
execute procedure set_semver_ord();
86+
87+
88+
-- Populate the `semver_ord` column for existing versions.
89+
-- This query should be run manually in small batches to avoid locking the
90+
-- table for too long.
91+
92+
-- with versions_to_update as (
93+
-- select id, num
94+
-- from versions
95+
-- where semver_ord = 'null'::jsonb
96+
-- limit 1000
97+
-- )
98+
-- update versions
99+
-- set semver_ord = semver_ord(num)
100+
-- where id in (select id from versions_to_update);

0 commit comments

Comments
 (0)