Skip to content

Commit 55976dc

Browse files
committed
syntax: rejigger Hir::{dot,any}
Instead of using a boolean parameter, we just split them into dot_char, dot_byte, any_char, any_byte. Another path would be to use an enum, but this appeals to me a little more.
1 parent 82e0a46 commit 55976dc

File tree

3 files changed

+60
-48
lines changed

3 files changed

+60
-48
lines changed

regex-syntax/src/hir/mod.rs

Lines changed: 42 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -310,44 +310,52 @@ impl Hir {
310310

311311
/// Build an HIR expression for `.`.
312312
///
313-
/// A `.` expression matches any character except for `\n`. To build an
314-
/// expression that matches any character, including `\n`, use the `any`
315-
/// method.
316-
///
317-
/// If `bytes` is `true`, then this assumes characters are limited to a
318-
/// single byte.
319-
pub fn dot(bytes: bool) -> Hir {
320-
if bytes {
321-
let mut cls = ClassBytes::empty();
322-
cls.push(ClassBytesRange::new(b'\0', b'\x09'));
323-
cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
324-
Hir::class(Class::Bytes(cls))
325-
} else {
326-
let mut cls = ClassUnicode::empty();
327-
cls.push(ClassUnicodeRange::new('\0', '\x09'));
328-
cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
329-
Hir::class(Class::Unicode(cls))
330-
}
313+
/// A `.` expression matches any character except for a newline terminator.
314+
/// To build an expression that matches any character, including newline
315+
/// terminators, use the `any_char` method.
316+
pub fn dot_char() -> Hir {
317+
let mut cls = ClassUnicode::empty();
318+
cls.push(ClassUnicodeRange::new('\0', '\x09'));
319+
cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
320+
Hir::class(Class::Unicode(cls))
331321
}
332322

333-
/// Build an HIR expression for `(?s).`.
323+
/// Build an HIR expression for `(?-u:.)`.
334324
///
335-
/// A `(?s).` expression matches any character, including `\n`. To build an
336-
/// expression that matches any character except for `\n`, then use the
337-
/// `dot` method.
325+
/// A non-Unicode `.` expression matches any byte except for a newline
326+
/// terminator. To build an expression that matches any byte, including
327+
/// newline terminators, use the `any_byte` method.
328+
pub fn dot_byte() -> Hir {
329+
let mut cls = ClassBytes::empty();
330+
cls.push(ClassBytesRange::new(b'\0', b'\x09'));
331+
cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
332+
Hir::class(Class::Bytes(cls))
333+
}
334+
335+
/// Build an HIR expression for `(?s:.)`.
338336
///
339-
/// If `bytes` is `true`, then this assumes characters are limited to a
340-
/// single byte.
341-
pub fn any(bytes: bool) -> Hir {
342-
if bytes {
343-
let mut cls = ClassBytes::empty();
344-
cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
345-
Hir::class(Class::Bytes(cls))
346-
} else {
347-
let mut cls = ClassUnicode::empty();
348-
cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
349-
Hir::class(Class::Unicode(cls))
350-
}
337+
/// A `(?s:.)` expression matches any character, including newline
338+
/// terminators. To build an expression that matches any character except
339+
/// for newline terminators, use the `dot_char` method.
340+
///
341+
/// Note that `(?s:)` is equivalent to `\p{any}`.
342+
pub fn any_char() -> Hir {
343+
let mut cls = ClassUnicode::empty();
344+
cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
345+
Hir::class(Class::Unicode(cls))
346+
}
347+
348+
/// Build an HIR expression for `(?s-u:.)`.
349+
///
350+
/// A `(?s-u:.)` expression matches any byte, including newline terminators.
351+
/// To build an expression that matches any byte except for newline
352+
/// terminators, use the `dot_byte` method.
353+
///
354+
/// Note that `(?s-u:.)` is equivalent to `(?-u:[\x00-\xFF])`.
355+
pub fn any_byte() -> Hir {
356+
let mut cls = ClassBytes::empty();
357+
cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
358+
Hir::class(Class::Bytes(cls))
351359
}
352360
}
353361

regex-syntax/src/hir/translate.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -861,9 +861,17 @@ impl<'t, 'p> TranslatorI<'t, 'p> {
861861
return Err(self.error(span, ErrorKind::InvalidUtf8));
862862
}
863863
Ok(if self.flags().dot_matches_new_line() {
864-
Hir::any(!unicode)
864+
if unicode {
865+
Hir::any_char()
866+
} else {
867+
Hir::any_byte()
868+
}
865869
} else {
866-
Hir::dot(!unicode)
870+
if unicode {
871+
Hir::dot_char()
872+
} else {
873+
Hir::dot_byte()
874+
}
867875
})
868876
}
869877

src/compile.rs

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -424,23 +424,19 @@ impl Compiler {
424424
}
425425

426426
fn c_dotstar(&mut self) -> Result {
427-
Ok(if !self.compiled.only_utf8() {
428-
self.c(&Hir::repetition(hir::Repetition {
429-
min: 0,
430-
max: None,
431-
greedy: false,
432-
hir: Box::new(Hir::any(true)),
433-
}))?
434-
.unwrap()
427+
let hir = if self.compiled.only_utf8() {
428+
Hir::any_char()
435429
} else {
436-
self.c(&Hir::repetition(hir::Repetition {
430+
Hir::any_byte()
431+
};
432+
Ok(self
433+
.c(&Hir::repetition(hir::Repetition {
437434
min: 0,
438435
max: None,
439436
greedy: false,
440-
hir: Box::new(Hir::any(false)),
437+
hir: Box::new(hir),
441438
}))?
442-
.unwrap()
443-
})
439+
.unwrap())
444440
}
445441

446442
fn c_char(&mut self, c: char) -> ResultOrEmpty {

0 commit comments

Comments
 (0)