Skip to content

Commit 4e0afcc

Browse files
authored
Implement a zero copy tzdb provider (#662)
This PR implements the bulk of the logic from the initial PR to create a zero copy tzdb provider. A couple notes: - Some of the logic is duplicated from the `CompiledTzdbProvider` and `FsTzdbProvider` (although, there is hopefully room to consolidate this in future PRs) - This has been run locally on Boa and there was no major regressions with maybe one or two additional failures in the intl402 suite.
1 parent 6ae79e5 commit 4e0afcc

File tree

14 files changed

+1364
-283
lines changed

14 files changed

+1364
-283
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ web-time = { workspace = true, optional = true }
9797
iana-time-zone = { workspace = true, optional = true }
9898

9999
[dev-dependencies]
100-
timezone_provider = { workspace = true, features = ["zoneinfo64"] }
100+
timezone_provider = { workspace = true, features = ["zoneinfo64", "experimental_tzif"] }
101101
zoneinfo64 = { workspace = true }
102102
resb = "0.1.0"
103103

provider/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ datagen = [
3737
]
3838
std = []
3939
# Experimental tzif/tzdb compiled data
40-
experimental_tzif = []
40+
experimental_tzif = ["dep:zerofrom", "zerofrom/derive"]
4141

4242
# Performing timezone resolution with the `tzif` crate
4343
tzif = ["dep:tzif",
@@ -63,6 +63,7 @@ zoneinfo_rs = { workspace = true, features = ["std"], optional = true }
6363
tzif = { workspace = true, optional = true }
6464
jiff-tzdb = { workspace = true, optional = true }
6565
combine = { workspace = true, optional = true }
66+
zerofrom = { version = "0.1.6", optional = true }
6667

6768
# zoneinfo64 dependency
6869
zoneinfo64 = { workspace = true, optional = true }

provider/README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ Below is a list of currently available time zone providers.
2222

2323
- `ZoneInfo64TzdbProvider`: a provider using ICU4C's zoneinfo64 resource bundle (enable with `zoneinfo64` features flag)
2424
- `FsTzdbProvider`: a provider that reads and parses tzdata at runtime from the host file system's
25-
TZif files (enable with `tzif` feature flag)
26-
- `CompiledTzdbProvider`: a provider that reads and parses tzdata at runtime from TZif's compiled
27-
into the application (enable with `tzif` feature flag)
28-
29-
Coming soon (hopefully), a zero copy compiled tzdb provider (see `experimental_tzif` for more).
25+
TZif files (enable with `tzif` feature flag)
26+
- `CompiledTzdbProvider`: a provider that reads and parses tzdata at runtime from TZifs compiled
27+
into the application (enable with `tzif` feature flag)
28+
- `ZeroCompiledTzdbProvider`: a provider that deserializes time zone data from TZifs compiled
29+
into the application (enable with `experimental_tzif` feature flag)
3030

3131
### Time zone provider traits
3232

provider/src/common.rs

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
//! Common logic shared across TZif providers
2+
3+
use crate::{
4+
provider::{GapEntryOffsets, UtcOffsetSeconds},
5+
utils,
6+
};
7+
8+
use core::ops::Range;
9+
10+
#[cfg(feature = "tzif")]
11+
use tzif::data::{
12+
posix::{DstTransitionInfo, PosixTzString, TransitionDate, TransitionDay},
13+
tzif::LocalTimeTypeRecord,
14+
};
15+
16+
// TODO: Workshop record name?
17+
/// The `LocalTimeRecord` result represents the result of searching for a
18+
/// time zone transition without the offset seconds applied to the
19+
/// epoch seconds.
20+
///
21+
/// As a result of the search, it is possible for the resulting search to be either
22+
/// Empty (due to an invalid time being provided that would be in the +1 tz shift)
23+
/// or two time zones (when a time exists in the ambiguous range of a -1 shift).
24+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25+
pub enum LocalTimeRecordResult {
26+
Empty(GapEntryOffsets),
27+
Single(UtcOffsetSeconds),
28+
Ambiguous {
29+
first: UtcOffsetSeconds,
30+
second: UtcOffsetSeconds,
31+
},
32+
}
33+
34+
/// `TimeZoneTransitionInfo` represents information about a timezone transition.
35+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36+
pub struct TimeZoneTransitionInfo {
37+
/// The transition time epoch at which the offset needs to be applied.
38+
pub transition_epoch: Option<i64>,
39+
/// The time zone offset in seconds.
40+
pub offset: UtcOffsetSeconds,
41+
}
42+
43+
impl From<UtcOffsetSeconds> for LocalTimeRecordResult {
44+
fn from(value: UtcOffsetSeconds) -> Self {
45+
Self::Single(value)
46+
}
47+
}
48+
49+
#[cfg(feature = "tzif")]
50+
impl From<LocalTimeTypeRecord> for LocalTimeRecordResult {
51+
fn from(value: LocalTimeTypeRecord) -> Self {
52+
Self::Single(value.into())
53+
}
54+
}
55+
56+
#[cfg(feature = "tzif")]
57+
impl From<(LocalTimeTypeRecord, LocalTimeTypeRecord)> for LocalTimeRecordResult {
58+
fn from(value: (LocalTimeTypeRecord, LocalTimeTypeRecord)) -> Self {
59+
Self::Ambiguous {
60+
first: value.0.into(),
61+
second: value.1.into(),
62+
}
63+
}
64+
}
65+
66+
/// Stores the information about DST transitions for a given year
67+
pub(crate) struct DstTransitionInfoForYear {
68+
pub(crate) dst_start_seconds: i64,
69+
pub(crate) dst_end_seconds: i64,
70+
pub(crate) std_offset: UtcOffsetSeconds,
71+
pub(crate) dst_offset: UtcOffsetSeconds,
72+
}
73+
74+
impl DstTransitionInfoForYear {
75+
#[cfg(feature = "tzif")]
76+
pub(crate) fn compute(
77+
posix_tz_string: &PosixTzString,
78+
dst_variant: &DstTransitionInfo,
79+
year: i32,
80+
) -> Self {
81+
let std_offset = UtcOffsetSeconds::from(&posix_tz_string.std_info);
82+
let dst_offset = UtcOffsetSeconds::from(&dst_variant.variant_info);
83+
let dst_start_seconds =
84+
calculate_transition_seconds_for_year(year, dst_variant.start_date, std_offset);
85+
let dst_end_seconds =
86+
calculate_transition_seconds_for_year(year, dst_variant.end_date, dst_offset);
87+
Self {
88+
dst_start_seconds,
89+
dst_end_seconds,
90+
std_offset,
91+
dst_offset,
92+
}
93+
}
94+
95+
// Returns the range between offsets in this year
96+
// This may cover DST or standard time, whichever starts first
97+
pub(crate) fn transition_range(&self) -> Range<i64> {
98+
if self.dst_start_seconds > self.dst_end_seconds {
99+
self.dst_end_seconds..self.dst_start_seconds
100+
} else {
101+
self.dst_start_seconds..self.dst_end_seconds
102+
}
103+
}
104+
}
105+
106+
#[cfg(feature = "tzif")]
107+
pub(crate) fn calculate_transition_seconds_for_year(
108+
year: i32,
109+
transition_date: TransitionDate,
110+
offset: UtcOffsetSeconds,
111+
) -> i64 {
112+
// Determine the year of the requested time.
113+
let year_epoch_seconds = i64::from(utils::epoch_days_for_year(year)) * 86400;
114+
let is_leap = utils::is_leap(year);
115+
116+
// Calculate the days in the year for the TransitionDate
117+
// This value is zero-indexed so it can be added to the year's epoch seconds
118+
let days = match transition_date.day {
119+
TransitionDay::NoLeap(day) if day > 59 => day - 1 + is_leap as u16,
120+
TransitionDay::NoLeap(day) => day - 1,
121+
TransitionDay::WithLeap(day) => day,
122+
TransitionDay::Mwd(month, week, day) => {
123+
let days_to_month = utils::month_to_day((month - 1) as u8, is_leap);
124+
let days_in_month = u16::from(utils::iso_days_in_month(year, month as u8));
125+
126+
// Month starts in the day...
127+
let day_offset = (u16::from(utils::epoch_seconds_to_day_of_week(year_epoch_seconds))
128+
+ days_to_month)
129+
.rem_euclid(7);
130+
131+
// EXAMPLE:
132+
//
133+
// 0 1 2 3 4 5 6
134+
// sun mon tue wed thu fri sat
135+
// - - - 0 1 2 3
136+
// 4 5 6 7 8 9 10
137+
// 11 12 13 14 15 16 17
138+
// 18 19 20 21 22 23 24
139+
// 25 26 27 28 29 30 -
140+
//
141+
// The day_offset = 3, since the month starts on a wednesday.
142+
//
143+
// We're looking for the second friday of the month. Thus, since the month started before
144+
// a friday, we need to start counting from week 0:
145+
//
146+
// day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset = (2 - 1) * 7 + 5 - 3 = 9
147+
//
148+
// This works if the month started on a day before the day we want (day_offset <= day). However, if that's not the
149+
// case, we need to start counting on week 1. For example, calculate the day of the month for the third monday
150+
// of the month:
151+
//
152+
// day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset = (3 - 0) * 7 + 1 - 3 = 19
153+
154+
// Note: this day_of_month is zero-indexed!
155+
let mut day_of_month = (week - u16::from(day_offset <= day)) * 7 + day - day_offset;
156+
157+
// Week 5 actually means "last <dayofweek> of month". The day_of_month calculation
158+
// above uses `week` directly; so we might end up spilling into the next month. In that
159+
// case, we normalize to the fourth week of the month.
160+
//
161+
// Note that this only needs to be done once; if a month will have at least four of each
162+
// day of the week since all months have 28 days or greater.
163+
//
164+
// We add one because day_of_month is zero_indexed
165+
if day_of_month + 1 > days_in_month {
166+
day_of_month -= 7
167+
}
168+
169+
days_to_month + day_of_month
170+
}
171+
};
172+
173+
// Transition time is on local time, so we need to add the UTC offset to get the correct UTC timestamp
174+
// for the transition.
175+
year_epoch_seconds + i64::from(days) * 86400 + transition_date.time.0 - offset.0
176+
}
177+
178+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
179+
pub(crate) enum TransitionType {
180+
Dst,
181+
Std,
182+
}
183+
184+
impl TransitionType {
185+
pub(crate) fn invert(&mut self) {
186+
*self = match *self {
187+
Self::Dst => Self::Std,
188+
Self::Std => Self::Dst,
189+
}
190+
}
191+
}
192+
193+
/// The month, week of month, and day of week value built into the POSIX tz string.
194+
///
195+
/// For more information, see the [POSIX tz string docs](https://sourceware.org/glibc/manual/2.40/html_node/Proleptic-TZ.html)
196+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
197+
pub(crate) struct Mwd {
198+
month: u8,
199+
week: u8,
200+
day: u8,
201+
}
202+
203+
impl Mwd {
204+
#[cfg(feature = "tzif")]
205+
pub(crate) fn from_u16(month: u16, week: u16, day: u16) -> Self {
206+
Self::from_u8(
207+
u8::try_from(month).unwrap_or(0),
208+
u8::try_from(week).unwrap_or(0),
209+
u8::try_from(day).unwrap_or(0),
210+
)
211+
}
212+
213+
pub(crate) fn from_u8(month: u8, week: u8, day: u8) -> Self {
214+
Self { month, week, day }
215+
}
216+
217+
/// Given the day of the week of the 0th day in this month,
218+
/// normalize the week to being a week number (1 = first week, ...)
219+
/// rather than a weekday ordinal (1 = first friday, etc)
220+
pub(crate) fn normalize_to_week_number(&mut self, day_of_week_zeroth_day: u8) {
221+
if self.day <= day_of_week_zeroth_day {
222+
self.week += 1;
223+
}
224+
}
225+
}
226+
227+
/// Represents an MWD for a given time
228+
#[derive(Debug)]
229+
pub(crate) struct MwdForTime {
230+
/// This will never have day = 5
231+
pub(crate) mwd: Mwd,
232+
/// The day of the week of the 0th day (the day before the month starts)
233+
pub(crate) day_of_week_zeroth_day: u8,
234+
/// This is the day of week of the 29th and the last day of the month,
235+
/// if the month has more than 28 days.
236+
/// Basically, this is the start and end of the "fifth $weekday of the month" period
237+
pub(crate) extra_days: Option<(u8, u8)>,
238+
}
239+
240+
impl MwdForTime {
241+
#[cfg(any(feature = "tzif", feature = "experimental_tzif"))]
242+
pub(crate) fn from_seconds(seconds: i64) -> Self {
243+
let (year, month, day_of_month) = utils::ymd_from_epoch_milliseconds(seconds * 1_000);
244+
let week_of_month = day_of_month / 7 + 1;
245+
let day_of_week = utils::epoch_seconds_to_day_of_week(seconds);
246+
let mut mwd = Mwd::from_u8(month, week_of_month, day_of_week);
247+
let days_in_month = utils::iso_days_in_month(year, month);
248+
let day_of_week_zeroth_day =
249+
(i16::from(day_of_week) - i16::from(day_of_month)).rem_euclid(7) as u8;
250+
mwd.normalize_to_week_number(day_of_week_zeroth_day);
251+
if day_of_month > 28 {
252+
let day_of_week_day_29 = (day_of_week_zeroth_day + 29).rem_euclid(7);
253+
let day_of_week_last_day = (day_of_week_zeroth_day + days_in_month).rem_euclid(7);
254+
Self {
255+
mwd,
256+
day_of_week_zeroth_day,
257+
extra_days: Some((day_of_week_day_29, day_of_week_last_day)),
258+
}
259+
} else {
260+
// No day 5
261+
Self {
262+
mwd,
263+
day_of_week_zeroth_day,
264+
extra_days: None,
265+
}
266+
}
267+
}
268+
269+
/// MWDs from Posix data can contain `w=5`, which means the *last* $weekday of the month,
270+
/// not the 5th. For MWDs in the same month, this normalizes the 5 to the actual number of the
271+
/// last weekday of the month (5 or 4)
272+
///
273+
/// Furthermore, this turns the week number into a true week number: the "second friday in March"
274+
/// will be turned into "the friday in the first week of March" or "the Friday in the second week of March"
275+
/// depending on when March starts.
276+
///
277+
/// This normalization *only* applies to MWDs in the same month. For other MWDs, such normalization is irrelevant.
278+
pub(crate) fn normalize_mwd(&self, other: &mut Mwd) {
279+
// If we're in the same month, normalization will actually have a useful effect
280+
if self.mwd.month == other.month {
281+
// First normalize MWDs that are like "the last $weekday in the month"
282+
// the last $weekday in the month, we need special handling
283+
if other.week == 5 {
284+
if let Some((day_29, last_day)) = self.extra_days {
285+
if day_29 < last_day {
286+
if other.day < day_29 || other.day > last_day {
287+
// This day isn't found in the last week. Subtract one.
288+
other.week = 4;
289+
}
290+
} else {
291+
// The extra part of the month crosses Sunday
292+
if other.day < day_29 && other.day > last_day {
293+
// This day isn't found in the last week. Subtract one.
294+
other.week = 4;
295+
}
296+
}
297+
} else {
298+
// There is no week 5 in this month, normalize to 4
299+
other.week = 4;
300+
}
301+
}
302+
303+
other.normalize_to_week_number(self.day_of_week_zeroth_day);
304+
}
305+
}
306+
}
307+
308+
pub(crate) fn offset_range(offset_one: i64, offset_two: i64) -> core::ops::Range<i64> {
309+
if offset_one < offset_two {
310+
return offset_one..offset_two;
311+
}
312+
offset_two..offset_one
313+
}
314+
315+
#[derive(Debug)]
316+
pub(crate) enum TransitionKind {
317+
// The offsets didn't change (happens when abbreviations/savings values change)
318+
Smooth,
319+
// The offsets changed in a way that leaves a gap
320+
Gap,
321+
// The offsets changed in a way that produces overlapping time.
322+
Overlap,
323+
}

provider/src/epoch_nanoseconds.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ impl From<tzif::data::time::Seconds> for EpochNanoseconds {
5757
}
5858

5959
#[inline]
60-
#[cfg(any(feature = "tzif", feature = "zoneinfo64"))]
60+
#[cfg(any(
61+
feature = "tzif",
62+
feature = "zoneinfo64",
63+
feature = "experimental_tzif"
64+
))]
6165
pub(crate) fn seconds_to_nanoseconds(seconds: i64) -> i128 {
6266
seconds as i128 * NS_IN_S
6367
}

0 commit comments

Comments
 (0)