@@ -317,54 +317,41 @@ impl Hir {
317
317
}
318
318
}
319
319
320
- /// Build an HIR expression for `.`.
321
- ///
322
- /// A `.` expression matches any character except for a newline terminator.
323
- /// To build an expression that matches any character, including newline
324
- /// terminators, use the `any_char` method.
325
- pub fn dot_char ( ) -> Hir {
326
- let mut cls = ClassUnicode :: empty ( ) ;
327
- cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
328
- cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\u{10FFFF}' ) ) ;
329
- Hir :: class ( Class :: Unicode ( cls) )
330
- }
331
-
332
- /// Build an HIR expression for `(?-u:.)`.
333
- ///
334
- /// A non-Unicode `.` expression matches any byte except for a newline
335
- /// terminator. To build an expression that matches any byte, including
336
- /// newline terminators, use the `any_byte` method.
337
- pub fn dot_byte ( ) -> Hir {
338
- let mut cls = ClassBytes :: empty ( ) ;
339
- cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
340
- cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\xFF' ) ) ;
341
- Hir :: class ( Class :: Bytes ( cls) )
342
- }
343
-
344
- /// Build an HIR expression for `(?s:.)`.
345
- ///
346
- /// A `(?s:.)` expression matches any character, including newline
347
- /// terminators. To build an expression that matches any character except
348
- /// for newline terminators, use the `dot_char` method.
349
- ///
350
- /// Note that `(?s:)` is equivalent to `\p{any}`.
351
- pub fn any_char ( ) -> Hir {
352
- let mut cls = ClassUnicode :: empty ( ) ;
353
- cls. push ( ClassUnicodeRange :: new ( '\0' , '\u{10FFFF}' ) ) ;
354
- Hir :: class ( Class :: Unicode ( cls) )
355
- }
356
-
357
- /// Build an HIR expression for `(?s-u:.)`.
358
- ///
359
- /// A `(?s-u:.)` expression matches any byte, including newline terminators.
360
- /// To build an expression that matches any byte except for newline
361
- /// terminators, use the `dot_byte` method.
362
- ///
363
- /// Note that `(?s-u:.)` is equivalent to `(?-u:[\x00-\xFF])`.
364
- pub fn any_byte ( ) -> Hir {
365
- let mut cls = ClassBytes :: empty ( ) ;
366
- cls. push ( ClassBytesRange :: new ( b'\0' , b'\xFF' ) ) ;
367
- Hir :: class ( Class :: Bytes ( cls) )
320
+ /// Returns an HIR expression for `.`.
321
+ ///
322
+ /// * [`Dot::AnyChar`] maps to `(?su:.)`.
323
+ /// * [`Dot::AnyByte`] maps to `(?s-u:.)`.
324
+ /// * [`Dot::AnyCharExceptNL`] maps to `(?u-s:.)`.
325
+ /// * [`Dot::AnyByteExceptNL`] maps to `(?-su:.)`.
326
+ ///
327
+ /// Note that this is a convenience routine for constructing the correct
328
+ /// character class based on the value of `Dot`. There is no explicit "dot"
329
+ /// HIR value. It is just an abbreviation for a common character class.
330
+ pub fn dot ( dot : Dot ) -> Hir {
331
+ match dot {
332
+ Dot :: AnyChar => {
333
+ let mut cls = ClassUnicode :: empty ( ) ;
334
+ cls. push ( ClassUnicodeRange :: new ( '\0' , '\u{10FFFF}' ) ) ;
335
+ Hir :: class ( Class :: Unicode ( cls) )
336
+ }
337
+ Dot :: AnyByte => {
338
+ let mut cls = ClassBytes :: empty ( ) ;
339
+ cls. push ( ClassBytesRange :: new ( b'\0' , b'\xFF' ) ) ;
340
+ Hir :: class ( Class :: Bytes ( cls) )
341
+ }
342
+ Dot :: AnyCharExceptNL => {
343
+ let mut cls = ClassUnicode :: empty ( ) ;
344
+ cls. push ( ClassUnicodeRange :: new ( '\0' , '\x09' ) ) ;
345
+ cls. push ( ClassUnicodeRange :: new ( '\x0B' , '\u{10FFFF}' ) ) ;
346
+ Hir :: class ( Class :: Unicode ( cls) )
347
+ }
348
+ Dot :: AnyByteExceptNL => {
349
+ let mut cls = ClassBytes :: empty ( ) ;
350
+ cls. push ( ClassBytesRange :: new ( b'\0' , b'\x09' ) ) ;
351
+ cls. push ( ClassBytesRange :: new ( b'\x0B' , b'\xFF' ) ) ;
352
+ Hir :: class ( Class :: Bytes ( cls) )
353
+ }
354
+ }
368
355
}
369
356
}
370
357
@@ -1233,6 +1220,31 @@ impl Repetition {
1233
1220
}
1234
1221
}
1235
1222
1223
+ /// A type describing the different flavors of `.`.
1224
+ ///
1225
+ /// This type is meant to be used with [`Hir::dot`], which is a convenience
1226
+ /// routine for building HIR values derived from the `.` regex.
1227
+ #[ non_exhaustive]
1228
+ #[ derive( Clone , Copy , Debug , Eq , PartialEq ) ]
1229
+ pub enum Dot {
1230
+ /// Matches the UTF-8 encoding of any Unicode scalar value.
1231
+ ///
1232
+ /// This is equivalent to `(?su:.)` and also `\p{any}`.
1233
+ AnyChar ,
1234
+ /// Matches any byte value.
1235
+ ///
1236
+ /// This is equivalent to `(?s-u:.)` and also `(?-u:[\x00-\xFF])`.
1237
+ AnyByte ,
1238
+ /// Matches the UTF-8 encoding of any Unicode scalar value except for `\n`.
1239
+ ///
1240
+ /// This is equivalent to `(?u-s:.)` and also `[\p{any}--\n]`.
1241
+ AnyCharExceptNL ,
1242
+ /// Matches any byte value except for `\n`.
1243
+ ///
1244
+ /// This is equivalent to `(?-su:.)` and also `(?-u:[[\x00-\xFF]--\n])`.
1245
+ AnyByteExceptNL ,
1246
+ }
1247
+
1236
1248
/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
1237
1249
/// space but heap space proportional to the depth of the total `Hir`.
1238
1250
impl Drop for Hir {
0 commit comments