Skip to content

Commit a4805b2

Browse files
make regex.scan's behavior consistent across targets (#423)
1 parent a37d5dd commit a4805b2

File tree

5 files changed

+102
-3
lines changed

5 files changed

+102
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## Unreleased
44

5+
- `regex.scan` now behaves consistently across both targets when a capture group does not capture anything.
56
- The `Map` type was rewritten as a persistent immutable data structure. This
67
results in drastically improved performance when constructing or updating
78
maps, especially with large maps.

src/gleam/regex.gleam

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ if javascript {
160160
/// ## Examples
161161
///
162162
/// ```gleam
163-
/// > assert Ok(re) = from_string("[oi]n a (\\w+)")
163+
/// > let assert Ok(re) = from_string("[oi]n a (\\w+)")
164164
/// > scan(with: re, content: "I am on a boat in a lake.")
165165
/// [
166166
/// Match(
@@ -174,6 +174,50 @@ if javascript {
174174
/// ]
175175
/// ```
176176
///
177+
/// ```gleam
178+
/// > let assert Ok(re) = regex.from_string("([+|\\-])?(\\d+)(\\w+)?")
179+
/// > scan(with: re, content: "-36")
180+
/// [
181+
/// Match(
182+
/// content: "-36",
183+
/// submatches: [Some("-"), Some("36")]
184+
/// )
185+
/// ]
186+
///
187+
/// > scan(with: re, content: "36")
188+
/// [
189+
/// Match(
190+
/// content: "-36",
191+
/// submatches: [None, Some("36")]
192+
/// )
193+
/// ]
194+
/// ```
195+
///
196+
/// ```gleam
197+
/// > let assert Ok(re) = regex.from_string("var\\s*(\\w+)\\s*(int|string)?\\s*=\\s*(.*)")
198+
/// > scan(with: re, content: "var age = 32")
199+
/// [
200+
/// Match(
201+
/// content: "var age = 32",
202+
/// submatches: [Some("age"), None, Some("32")]
203+
/// )
204+
/// ]
205+
/// ```
206+
///
207+
/// ```gleam
208+
/// > let assert Ok(re) = regex.from_string("let (\\w+) = (\\w+)")
209+
/// > scan(with: re, content: "let age = 32")
210+
/// [
211+
/// Match(
212+
/// content: "let age = 32",
213+
/// submatches: [Some("age"), Some("32")]
214+
/// )
215+
/// ]
216+
///
217+
/// > scan(with: re, content: "const age = 32")
218+
/// []
219+
/// ```
220+
///
177221
pub fn scan(with regex: Regex, content string: String) -> List(Match) {
178222
do_scan(regex, string)
179223
}

src/gleam_stdlib.erl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ regex_check(Regex, String) ->
205205
regex_split(Regex, String) ->
206206
re:split(String, Regex).
207207

208+
regex_submatches(_, {-1, 0}) -> none;
208209
regex_submatches(String, {Start, Length}) ->
209210
BinarySlice = binary:part(String, {Start, Length}),
210211
case string:is_empty(binary_to_list(BinarySlice)) of

src/gleam_stdlib.mjs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,17 @@ export function compile_regex(pattern, options) {
388388

389389
export function regex_scan(regex, string) {
390390
let matches = Array.from(string.matchAll(regex)).map((match) => {
391-
let content = match.shift();
392-
let submatches = match.map((x) => (x ? new Some(x) : new None()));
391+
const content = match[0];
392+
const submatches = [];
393+
for (let n = match.length - 1; n > 0; n--) {
394+
if (match[n]) {
395+
submatches[n-1] = new Some(match[n])
396+
continue
397+
}
398+
if(submatches.length > 0) {
399+
submatches[n-1] = new None()
400+
}
401+
}
393402
return new RegexMatch(content, List.fromArray(submatches));
394403
});
395404
return List.fromArray(matches);

test/gleam/regex_test.gleam

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,48 @@ pub fn scan_test() {
8484

8585
regex.scan(re, "你好 42 世界")
8686
|> should.equal([Match(content: "42", submatches: [Some("42")])])
87+
88+
let assert Ok(re) = regex.from_string("([+|\\-])?(\\d+)(\\w+)?")
89+
regex.scan(re, "+36kg")
90+
|> should.equal([
91+
Match(content: "+36kg", submatches: [Some("+"), Some("36"), Some("kg")]),
92+
])
93+
94+
regex.scan(re, "36kg")
95+
|> should.equal([
96+
Match(content: "36kg", submatches: [None, Some("36"), Some("kg")]),
97+
])
98+
99+
regex.scan(re, "36")
100+
|> should.equal([Match(content: "36", submatches: [None, Some("36")])])
101+
102+
regex.scan(re, "-36")
103+
|> should.equal([Match(content: "-36", submatches: [Some("-"), Some("36")])])
104+
105+
regex.scan(re, "-kg")
106+
|> should.equal([])
107+
108+
let assert Ok(re) =
109+
regex.from_string("var\\s*(\\w+)\\s*(int|string)?\\s*=\\s*(.*)")
110+
regex.scan(re, "var age int = 32")
111+
|> should.equal([
112+
Match(
113+
content: "var age int = 32",
114+
submatches: [Some("age"), Some("int"), Some("32")],
115+
),
116+
])
117+
118+
regex.scan(re, "var age = 32")
119+
|> should.equal([
120+
Match(content: "var age = 32", submatches: [Some("age"), None, Some("32")]),
121+
])
122+
123+
let assert Ok(re) = regex.from_string("let (\\w+) = (\\w+)")
124+
regex.scan(re, "let age = 32")
125+
|> should.equal([
126+
Match(content: "let age = 32", submatches: [Some("age"), Some("32")]),
127+
])
128+
129+
regex.scan(re, "const age = 32")
130+
|> should.equal([])
87131
}

0 commit comments

Comments
 (0)