Skip to content

Commit 3b05152

Browse files
authored
Merge pull request #427 from rustcoreutils/updates
Updates
2 parents 75f9d99 + ebddc29 commit 3b05152

File tree

28 files changed

+1270
-623
lines changed

28 files changed

+1270
-623
lines changed

awk/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ libc.workspace = true
1313
pest = "2.7"
1414
pest_derive = "2.7"
1515
lexical = { version = "6.1", features = ["format"] }
16+
plib = { path = "../plib" }
1617
rand = { version = "0.8", default-features = false, features = ["small_rng"] }
1718

1819
[dev-dependencies]

awk/regex.rs

Lines changed: 48 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -7,36 +7,14 @@
77
// SPDX-License-Identifier: MIT
88
//
99

10+
use plib::regex::{Match, Regex as PlibRegex, RegexFlags};
1011
use std::ffi::CString;
11-
use std::ptr;
12-
13-
fn regex_compilation_result(
14-
status_integer: libc::c_int,
15-
regex: &libc::regex_t,
16-
) -> Result<(), String> {
17-
if status_integer != 0 {
18-
let mut error_buffer = vec![b'\0'; 128];
19-
unsafe {
20-
libc::regerror(
21-
status_integer,
22-
ptr::from_ref(regex),
23-
error_buffer.as_mut_ptr() as *mut libc::c_char,
24-
128,
25-
)
26-
};
27-
let error = CString::from_vec_with_nul(error_buffer)
28-
.expect("error message returned from `libc::regerror` is an invalid CString");
29-
Err(error
30-
.into_string()
31-
.expect("error message from `libc::regerror' contains invalid utf-8"))
32-
} else {
33-
Ok(())
34-
}
35-
}
3612

13+
/// A regex wrapper that provides CString-compatible API for AWK.
14+
/// Internally uses plib::regex for POSIX ERE support.
3715
pub struct Regex {
38-
raw_regex: libc::regex_t,
39-
regex_string: CString,
16+
inner: PlibRegex,
17+
pattern_string: String,
4018
}
4119

4220
#[cfg_attr(test, derive(Debug))]
@@ -46,95 +24,95 @@ pub struct RegexMatch {
4624
pub end: usize,
4725
}
4826

27+
impl From<Match> for RegexMatch {
28+
fn from(m: Match) -> Self {
29+
RegexMatch {
30+
start: m.start,
31+
end: m.end,
32+
}
33+
}
34+
}
35+
36+
/// Iterator over regex matches in a string.
37+
/// Owns the input CString to preserve lifetimes.
4938
pub struct MatchIter<'re> {
50-
string: CString,
39+
// Store the string as owned String to avoid lifetime issues
40+
string: String,
5141
next_start: usize,
5242
regex: &'re Regex,
5343
}
5444

5545
impl Iterator for MatchIter<'_> {
5646
type Item = RegexMatch;
5747
fn next(&mut self) -> Option<Self::Item> {
58-
if self.next_start >= self.string.as_bytes().len() {
59-
return None;
60-
}
61-
let mut match_range = libc::regmatch_t {
62-
rm_so: -1,
63-
rm_eo: -1,
64-
};
65-
let exec_status = unsafe {
66-
libc::regexec(
67-
ptr::from_ref(&self.regex.raw_regex),
68-
self.string.as_ptr().add(self.next_start),
69-
1,
70-
ptr::from_mut(&mut match_range),
71-
0,
72-
)
73-
};
74-
if exec_status == libc::REG_NOMATCH {
48+
if self.next_start >= self.string.len() {
7549
return None;
7650
}
51+
52+
// Find match starting from current offset
53+
let substring = &self.string[self.next_start..];
54+
let m = self.regex.inner.find(substring)?;
55+
7756
let result = RegexMatch {
78-
start: self.next_start + match_range.rm_so as usize,
79-
end: self.next_start + match_range.rm_eo as usize,
57+
start: self.next_start + m.start,
58+
end: self.next_start + m.end,
8059
};
81-
self.next_start += match_range.rm_eo as usize;
60+
61+
// Move past this match for next iteration
62+
// Ensure we make progress even on zero-width matches
63+
self.next_start = if m.end > 0 {
64+
self.next_start + m.end
65+
} else {
66+
self.next_start + 1
67+
};
68+
8269
Some(result)
8370
}
8471
}
8572

8673
impl Regex {
8774
pub fn new(regex: CString) -> Result<Self, String> {
88-
let mut raw = unsafe { std::mem::zeroed::<libc::regex_t>() };
89-
let compilation_status =
90-
unsafe { libc::regcomp(ptr::from_mut(&mut raw), regex.as_ptr(), libc::REG_EXTENDED) };
91-
regex_compilation_result(compilation_status, &raw)?;
75+
let pattern = regex.to_str().map_err(|e| e.to_string())?;
76+
let inner = PlibRegex::new(pattern, RegexFlags::ere()).map_err(|e| e.to_string())?;
9277
Ok(Self {
93-
raw_regex: raw,
94-
regex_string: regex,
78+
inner,
79+
pattern_string: pattern.to_string(),
9580
})
9681
}
9782

83+
/// Returns an iterator over all match locations in the string.
84+
/// Takes ownership of the CString.
9885
pub fn match_locations(&self, string: CString) -> MatchIter {
86+
let s = string.into_string().unwrap_or_default();
9987
MatchIter {
10088
next_start: 0,
10189
regex: self,
102-
string,
90+
string: s,
10391
}
10492
}
10593

10694
pub fn matches(&self, string: &CString) -> bool {
107-
let exec_status = unsafe {
108-
libc::regexec(
109-
ptr::from_ref(&self.raw_regex),
110-
string.as_ptr(),
111-
0,
112-
ptr::null_mut(),
113-
0,
114-
)
115-
};
116-
exec_status != libc::REG_NOMATCH
95+
let s = string.to_str().unwrap_or("");
96+
self.inner.is_match(s)
11797
}
11898
}
11999

120100
impl Drop for Regex {
121101
fn drop(&mut self) {
122-
unsafe {
123-
libc::regfree(ptr::from_mut(&mut self.raw_regex));
124-
}
102+
// plib::regex handles cleanup internally
125103
}
126104
}
127105

128106
#[cfg(test)]
129107
impl core::fmt::Debug for Regex {
130108
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
131-
writeln!(f, "/{}/", self.regex_string.to_str().unwrap())
109+
writeln!(f, "/{}/", self.pattern_string)
132110
}
133111
}
134112

135113
impl PartialEq for Regex {
136114
fn eq(&self, other: &Self) -> bool {
137-
self.regex_string == other.regex_string
115+
self.pattern_string == other.pattern_string
138116
}
139117
}
140118

display/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ clap.workspace = true
1212
clap.features = ["env"]
1313
gettext-rs.workspace = true
1414
libc.workspace = true
15+
plib = { path = "../plib" }
1516
termion = "4.0"
1617
thiserror = "1.0"
1718

display/more.rs

Lines changed: 18 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,15 @@
99

1010
use clap::Parser;
1111
use gettextrs::{bind_textdomain_codeset, gettext, setlocale, textdomain, LocaleCategory};
12-
use libc::{
13-
getegid, getgid, getuid, regcomp, regex_t, regexec, setgid, setuid, REG_ICASE, REG_NOMATCH,
14-
};
12+
use libc::{getegid, getgid, getuid, setgid, setuid};
13+
use plib::regex::{Regex, RegexFlags};
1514
use std::collections::HashMap;
16-
use std::ffi::CString;
1715
use std::fs::File;
1816
use std::io::{stdout, BufRead, BufReader, Cursor, Read, Seek, SeekFrom, Write};
1917
use std::ops::{Not, Range};
2018
use std::os::fd::AsRawFd;
2119
use std::path::PathBuf;
2220
use std::process::{exit, ExitStatus};
23-
use std::ptr;
2421
use std::str::FromStr;
2522
use std::sync::mpsc::{channel, Receiver, TryRecvError};
2623
use std::sync::Mutex;
@@ -942,8 +939,8 @@ struct SourceContext {
942939
/// Current search pattern
943940
current_pattern: String,
944941
/// Last search settings
945-
last_search: Option<(regex_t, bool, Direction)>,
946-
/// Storage for marks that were set durring current [`Source`] processing
942+
last_search: Option<(Regex, bool, Direction)>,
943+
/// Storage for marks that were set during current [`Source`] processing
947944
marked_positions: HashMap<char, usize>,
948945
/// Flag that [`true`] if input files count is more that 1
949946
is_many_files: bool,
@@ -1206,7 +1203,7 @@ impl SourceContext {
12061203
pub fn search(
12071204
&mut self,
12081205
count: Option<usize>,
1209-
pattern: regex_t,
1206+
pattern: Regex,
12101207
is_not: bool,
12111208
direction: Direction,
12121209
) -> Result<(), MoreError> {
@@ -1222,21 +1219,10 @@ impl SourceContext {
12221219
Direction::Backward => haystack + &last_string,
12231220
};
12241221
}
1225-
let c_input = CString::new(haystack)
1226-
.map_err(|_| MoreError::StringParse(self.current_source.name()))?;
1227-
let has_match = unsafe {
1228-
regexec(
1229-
&pattern as *const regex_t,
1230-
c_input.as_ptr(),
1231-
0,
1232-
ptr::null_mut(),
1233-
0,
1234-
)
1235-
};
12361222
let has_match = if is_not {
1237-
has_match == REG_NOMATCH
1223+
!pattern.is_match(&haystack)
12381224
} else {
1239-
has_match != REG_NOMATCH
1225+
pattern.is_match(&haystack)
12401226
};
12411227
if has_match {
12421228
let Some((rows, _)) = self.terminal_size else {
@@ -1291,7 +1277,7 @@ impl SourceContext {
12911277
} else {
12921278
direction.clone()
12931279
};
1294-
self.search(count, *pattern, *is_not, direction)
1280+
self.search(count, pattern.clone(), *is_not, direction)
12951281
} else {
12961282
Err(MoreError::SourceContext(
12971283
SourceContextError::MissingLastSearch,
@@ -1647,39 +1633,19 @@ impl Prompt {
16471633
}
16481634
}
16491635

1650-
/// Compiles [`pattern`] as [`regex_t`]
1651-
fn compile_regex(pattern: String, ignore_case: bool) -> Result<regex_t, MoreError> {
1652-
#[cfg(target_os = "macos")]
1653-
let mut pattern = pattern.replace("\\\\", "\\");
1654-
#[cfg(all(unix, not(target_os = "macos")))]
1636+
/// Compiles [`pattern`] as a POSIX BRE regex
1637+
fn compile_regex(pattern: String, ignore_case: bool) -> Result<Regex, MoreError> {
1638+
// Normalize backslash escapes
16551639
let pattern = pattern.replace("\\\\", "\\");
1656-
let mut cflags = 0;
1657-
if ignore_case {
1658-
cflags |= REG_ICASE;
1659-
}
16601640

1661-
// macOS version of [regcomp](regcomp) from `libc` provides additional check
1662-
// for empty regex. In this case, an error
1663-
// [REG_EMPTY](https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/regcomp.3.html)
1664-
// will be returned. Therefore, an empty pattern is replaced with ".*".
1665-
#[cfg(target_os = "macos")]
1666-
{
1667-
pattern = if pattern == "" {
1668-
String::from(".*")
1669-
} else {
1670-
pattern
1671-
};
1672-
}
1673-
1674-
let c_pattern =
1675-
CString::new(pattern.clone()).map_err(|_| MoreError::StringParse(pattern.clone()))?;
1676-
let mut regex = unsafe { std::mem::zeroed::<regex_t>() };
1677-
1678-
if unsafe { regcomp(&mut regex, c_pattern.as_ptr(), cflags) } == 0 {
1679-
Ok(regex)
1641+
let flags = if ignore_case {
1642+
RegexFlags::bre().ignore_case()
16801643
} else {
1681-
Err(MoreError::StringParse(pattern))
1682-
}
1644+
RegexFlags::bre()
1645+
};
1646+
1647+
// plib::regex handles macOS empty pattern workaround internally
1648+
Regex::new(&pattern, flags).map_err(|_| MoreError::StringParse(pattern))
16831649
}
16841650

16851651
/// More state

0 commit comments

Comments
 (0)