@@ -60,38 +60,62 @@ const NO_DISTANCE: u32 = 0;
60
60
// MM ≅ M (09 ≅ 9)
61
61
const WIDTH_MISMATCH_DISTANCE : u32 = 1 ;
62
62
63
+ // If a glue pattern is required, give a small penalty.
64
+ const GLUE_DISTANCE : u32 = 10 ;
65
+
63
66
// C. Numeric and text fields are given a larger distance from each other.
64
67
// - MMM ≈ MM (Sep ≈ 09)
65
68
// MMM
66
- const TEXT_VS_NUMERIC_DISTANCE : u32 = 10 ;
69
+ const TEXT_VS_NUMERIC_DISTANCE : u32 = 100 ;
67
70
68
71
// D. Symbols representing substantial differences (week of year vs week of month) are given much
69
72
// larger a distances from each other.
70
73
// - d ≋ D; (12 ≋ 345) Day of month vs Day of year
71
- const SUBSTANTIAL_DIFFERENCES_DISTANCE : u32 = 100 ;
74
+ const SUBSTANTIAL_DIFFERENCES_DISTANCE : u32 = 1000 ;
72
75
73
76
// A skeleton had more symbols than what was requested.
74
- const SKELETON_EXTRA_SYMBOL : u32 = 1000 ;
77
+ const SKELETON_EXTRA_SYMBOL : u32 = 10000 ;
75
78
76
79
// A requested symbol is missing in the skeleton. Note that this final value can be more than
77
80
// MAX_SKELETON_FIELDS, as it's counting the missing requested fields, which can be longer than
78
81
// the stored skeletons. There cannot be any cases higher than this one.
79
- const REQUESTED_SYMBOL_MISSING : u32 = 10000 ;
82
+ const REQUESTED_SYMBOL_MISSING : u32 = 100000 ;
80
83
81
84
/// The best skeleton found, alongside information on how well it matches.
82
85
///
83
86
/// According to the [UTS 35 skeleton matching algorithm](https://unicode.org/reports/tr35/tr35-dates.html#Matching_Skeletons)
84
87
/// there will be a guaranteed match for a skeleton. However, with this initial implementation,
85
88
/// there is no attempt to add on missing fields. This enum encodes the variants for the current
86
89
/// search for a best skeleton.
90
+ ///
91
+ /// The patterns are paired with a measure of their quality.
87
92
#[ derive( Debug , PartialEq , Clone ) ]
88
93
#[ allow( missing_docs) ]
89
94
pub enum BestSkeleton < T > {
90
- AllFieldsMatch ( T ) ,
91
- MissingOrExtraFields ( T ) ,
95
+ AllFieldsMatch ( T , SkeletonQuality ) ,
96
+ MissingOrExtraFields ( T , SkeletonQuality ) ,
92
97
NoMatch ,
93
98
}
94
99
100
+ /// A measure of the quality of a skeleton.
101
+ ///
102
+ /// Internally, this is a u32, a "distance" value. This value is highly
103
+ /// unstable and should not be compared across versions. It should be used
104
+ /// only for comparing against other distances in the same version of ICU4X.
105
+ #[ derive( Debug , Copy , Clone , PartialEq , Eq , PartialOrd , Ord ) ]
106
+ pub struct SkeletonQuality ( u32 ) ;
107
+
108
+ impl SkeletonQuality {
109
+ /// Returns the worst possible quality measure.
110
+ pub fn worst ( ) -> SkeletonQuality {
111
+ SkeletonQuality ( u32:: MAX )
112
+ }
113
+ /// Returns the best possible quality measure.
114
+ pub fn best ( ) -> SkeletonQuality {
115
+ SkeletonQuality ( 0 )
116
+ }
117
+ }
118
+
95
119
/// This function swaps out the time zone name field for the appropriate one. Skeleton matching
96
120
/// only needs to find a single "v" field, and then the time zone name can expand from there.
97
121
fn naively_apply_time_zone_name (
@@ -140,51 +164,57 @@ pub fn create_best_pattern_for_fields<'data>(
140
164
get_best_available_format_pattern ( skeletons, fields, prefer_matched_pattern) ;
141
165
142
166
// Try to match a skeleton to all of the fields.
143
- if let BestSkeleton :: AllFieldsMatch ( mut pattern_plurals) = first_pattern_match {
167
+ if let BestSkeleton :: AllFieldsMatch ( mut pattern_plurals, d ) = first_pattern_match {
144
168
pattern_plurals. for_each_mut ( |pattern| {
145
169
naively_apply_preferences ( pattern, components. hour_cycle ) ;
146
170
naively_apply_time_zone_name ( pattern, components. time_zone_name ) ;
147
171
apply_subseconds ( pattern, components. subsecond ) ;
148
172
} ) ;
149
- return BestSkeleton :: AllFieldsMatch ( pattern_plurals) ;
173
+ return BestSkeleton :: AllFieldsMatch ( pattern_plurals, d ) ;
150
174
}
151
175
152
176
let FieldsByType { date, time } = group_fields_by_type ( fields) ;
153
177
154
178
if date. is_empty ( ) || time. is_empty ( ) {
155
179
return match first_pattern_match {
156
- BestSkeleton :: AllFieldsMatch ( _) => {
180
+ BestSkeleton :: AllFieldsMatch ( _, _ ) => {
157
181
unreachable ! ( "Logic error in implementation. AllFieldsMatch handled above." )
158
182
}
159
- BestSkeleton :: MissingOrExtraFields ( mut pattern_plurals) => {
183
+ BestSkeleton :: MissingOrExtraFields ( mut pattern_plurals, d ) => {
160
184
if date. is_empty ( ) {
161
185
pattern_plurals. for_each_mut ( |pattern| {
162
186
naively_apply_preferences ( pattern, components. hour_cycle ) ;
163
187
naively_apply_time_zone_name ( pattern, components. time_zone_name ) ;
164
188
apply_subseconds ( pattern, components. subsecond ) ;
165
189
} ) ;
166
190
}
167
- BestSkeleton :: MissingOrExtraFields ( pattern_plurals)
191
+ BestSkeleton :: MissingOrExtraFields ( pattern_plurals, d )
168
192
}
169
193
BestSkeleton :: NoMatch => BestSkeleton :: NoMatch ,
170
194
} ;
171
195
}
172
196
173
197
// Match the date and time, and then simplify the combinatorial logic of the results into
174
198
// an optional values of the results, and a boolean value.
175
- let ( date_patterns, date_missing_or_extra) : ( Option < PatternPlurals < ' data > > , bool ) =
176
- match get_best_available_format_pattern ( skeletons, & date, prefer_matched_pattern) {
177
- BestSkeleton :: MissingOrExtraFields ( fields) => ( Some ( fields) , true ) ,
178
- BestSkeleton :: AllFieldsMatch ( fields) => ( Some ( fields) , false ) ,
179
- BestSkeleton :: NoMatch => ( None , true ) ,
180
- } ;
199
+ let ( date_patterns, date_missing_or_extra, date_distance) : (
200
+ Option < PatternPlurals < ' data > > ,
201
+ bool ,
202
+ SkeletonQuality ,
203
+ ) = match get_best_available_format_pattern ( skeletons, & date, prefer_matched_pattern) {
204
+ BestSkeleton :: MissingOrExtraFields ( fields, d) => ( Some ( fields) , true , d) ,
205
+ BestSkeleton :: AllFieldsMatch ( fields, d) => ( Some ( fields) , false , d) ,
206
+ BestSkeleton :: NoMatch => ( None , true , SkeletonQuality ( REQUESTED_SYMBOL_MISSING ) ) ,
207
+ } ;
181
208
182
- let ( time_patterns, time_missing_or_extra) : ( Option < PatternPlurals < ' data > > , bool ) =
183
- match get_best_available_format_pattern ( skeletons, & time, prefer_matched_pattern) {
184
- BestSkeleton :: MissingOrExtraFields ( fields) => ( Some ( fields) , true ) ,
185
- BestSkeleton :: AllFieldsMatch ( fields) => ( Some ( fields) , false ) ,
186
- BestSkeleton :: NoMatch => ( None , true ) ,
187
- } ;
209
+ let ( time_patterns, time_missing_or_extra, time_distance) : (
210
+ Option < PatternPlurals < ' data > > ,
211
+ bool ,
212
+ SkeletonQuality ,
213
+ ) = match get_best_available_format_pattern ( skeletons, & time, prefer_matched_pattern) {
214
+ BestSkeleton :: MissingOrExtraFields ( fields, d) => ( Some ( fields) , true , d) ,
215
+ BestSkeleton :: AllFieldsMatch ( fields, d) => ( Some ( fields) , false , d) ,
216
+ BestSkeleton :: NoMatch => ( None , true , SkeletonQuality ( REQUESTED_SYMBOL_MISSING ) ) ,
217
+ } ;
188
218
let time_pattern: Option < runtime:: Pattern < ' data > > = time_patterns. map ( |pattern_plurals| {
189
219
let mut pattern =
190
220
pattern_plurals. expect_pattern ( "Only date patterns can contain plural variants" ) ;
@@ -254,12 +284,18 @@ pub fn create_best_pattern_for_fields<'data>(
254
284
( None , None ) => None ,
255
285
} ;
256
286
287
+ let distance = SkeletonQuality (
288
+ date_distance
289
+ . 0
290
+ . saturating_add ( time_distance. 0 )
291
+ . saturating_add ( GLUE_DISTANCE ) ,
292
+ ) ;
257
293
match patterns {
258
294
Some ( patterns) => {
259
295
if date_missing_or_extra || time_missing_or_extra {
260
- BestSkeleton :: MissingOrExtraFields ( patterns)
296
+ BestSkeleton :: MissingOrExtraFields ( patterns, distance )
261
297
} else {
262
- BestSkeleton :: AllFieldsMatch ( patterns)
298
+ BestSkeleton :: AllFieldsMatch ( patterns, distance )
263
299
}
264
300
}
265
301
None => BestSkeleton :: NoMatch ,
@@ -481,6 +517,7 @@ pub fn get_best_available_format_pattern<'data>(
481
517
// (e.g. text vs numeric). We return the field instead of the matched pattern.
482
518
return BestSkeleton :: AllFieldsMatch (
483
519
runtime:: Pattern :: from ( vec ! [ PatternItem :: Field ( * field) ] ) . into ( ) ,
520
+ SkeletonQuality ( closest_distance) ,
484
521
) ;
485
522
}
486
523
}
@@ -496,7 +533,10 @@ pub fn get_best_available_format_pattern<'data>(
496
533
}
497
534
498
535
if closest_distance == NO_DISTANCE {
499
- return BestSkeleton :: AllFieldsMatch ( closest_format_pattern) ;
536
+ return BestSkeleton :: AllFieldsMatch (
537
+ closest_format_pattern,
538
+ SkeletonQuality ( closest_distance) ,
539
+ ) ;
500
540
}
501
541
502
542
// Modify the resulting pattern to have fields of the same length.
@@ -511,8 +551,11 @@ pub fn get_best_available_format_pattern<'data>(
511
551
}
512
552
513
553
if closest_distance >= SKELETON_EXTRA_SYMBOL {
514
- return BestSkeleton :: MissingOrExtraFields ( closest_format_pattern) ;
554
+ return BestSkeleton :: MissingOrExtraFields (
555
+ closest_format_pattern,
556
+ SkeletonQuality ( closest_distance) ,
557
+ ) ;
515
558
}
516
559
517
- BestSkeleton :: AllFieldsMatch ( closest_format_pattern)
560
+ BestSkeleton :: AllFieldsMatch ( closest_format_pattern, SkeletonQuality ( closest_distance ) )
518
561
}
0 commit comments