Skip to content

Commit f6f4f81

Browse files
committed
Improve coverage of malformed sequences in String
1 parent c2426ce commit f6f4f81

File tree

2 files changed

+75
-10
lines changed

2 files changed

+75
-10
lines changed

lib/elixir/lib/string.ex

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -524,11 +524,10 @@ defmodule String do
524524
defp parts_to_index(n) when is_integer(n) and n > 0, do: n
525525

526526
defp split_empty("", true, 1), do: []
527-
defp split_empty(string, _, 1), do: [string]
527+
defp split_empty(string, _, 1) when is_binary(string), do: [IO.iodata_to_binary(string)]
528528

529529
defp split_empty(string, trim, count) do
530530
case :unicode_util.gc(string) do
531-
[gc] -> [grapheme_to_binary(gc) | split_empty(<<>>, trim, count - 1)]
532531
[gc | rest] -> [grapheme_to_binary(gc) | split_empty(rest, trim, count - 1)]
533532
[] -> split_empty("", trim, 1)
534533
{:error, <<byte, rest::bits>>} -> [<<byte>> | split_empty(rest, trim, count - 1)]

lib/elixir/test/elixir/string_test.exs

Lines changed: 74 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,17 @@ defmodule StringTest do
107107
assert String.split("x-x-", pattern, parts: 3, trim: true) == ["x", "x"]
108108
end
109109

110+
test "split/2,3 with malformed" do
111+
assert String.split(<<225, 158, 128, 225, 158, 185, 225>>, "", parts: 1) ==
112+
[<<225, 158, 128, 225, 158, 185, 225>>]
113+
114+
assert String.split(<<225, 158, 128, 225, 158, 185, 225>>, "", parts: 2) ==
115+
["", <<225, 158, 128, 225, 158, 185, 225>>]
116+
117+
assert String.split(<<225, 158, 128, 225, 158, 185, 225>>, "", parts: 3) ==
118+
["", "កឹ", <<225>>, ""]
119+
end
120+
110121
test "splitter/2,3" do
111122
assert String.splitter("a,b,c", ",") |> Enum.to_list() == ["a", "b", "c"]
112123
assert String.splitter("a,b", ".") |> Enum.to_list() == ["a,b"]
@@ -155,9 +166,18 @@ defmodule StringTest do
155166
end
156167
end
157168

158-
test "split_at/2 with invalid guard" do
169+
test "split_at/2 with malformed" do
159170
assert String.split_at(<<?a, 195, 10, ?a>>, 2) == {<<?a, 195>>, <<10, ?a>>}
160171
assert String.split_at(<<107, 205, 135, 184>>, 1) == {<<107, 205, 135>>, <<184>>}
172+
173+
assert String.split_at(<<225, 158, 128, 225, 158, 185, 225>>, 0) ==
174+
{"", <<225, 158, 128, 225, 158, 185, 225>>}
175+
176+
assert String.split_at(<<225, 158, 128, 225, 158, 185, 225>>, 1) ==
177+
{"កឹ", <<225>>}
178+
179+
assert String.split_at(<<225, 158, 128, 225, 158, 185, 225>>, 2) ==
180+
{<<225, 158, 128, 225, 158, 185, 225>>, ""}
161181
end
162182

163183
test "upcase/1" do
@@ -457,6 +477,10 @@ defmodule StringTest do
457477
end
458478
end
459479

480+
test "with empty string and string replacement with malformed" do
481+
assert String.replace(<<225, 158, 128, 225, 158, 185, 225>>, "", ".") == ".កឹ.\xE1."
482+
end
483+
460484
test "with empty pattern list" do
461485
assert String.replace("elixir", [], "anything") == "elixir"
462486
end
@@ -585,16 +609,58 @@ defmodule StringTest do
585609
test "next_grapheme/1" do
586610
assert String.next_grapheme("Ā̀stute") == {"Ā̀", "stute"}
587611
assert String.next_grapheme("") == nil
612+
end
613+
614+
describe "randomized" do
615+
test "next_grapheme" do
616+
for _ <- 1..10 do
617+
bin = :crypto.strong_rand_bytes(20)
618+
619+
try do
620+
bin |> Stream.unfold(&String.next_grapheme/1) |> Enum.to_list()
621+
rescue
622+
# Ignore malformed pictographic sequences
623+
_ -> :ok
624+
else
625+
list ->
626+
assert Enum.all?(list, &is_binary/1), "cannot build graphemes for #{inspect(bin)}"
627+
end
628+
end
629+
end
630+
631+
test "split empty" do
632+
for _ <- 1..10 do
633+
bin = :crypto.strong_rand_bytes(20)
634+
635+
try do
636+
String.split(bin, "")
637+
rescue
638+
# Ignore malformed pictographic sequences
639+
_ -> :ok
640+
else
641+
split ->
642+
assert Enum.all?(split, &is_binary/1), "cannot split #{inspect(bin)}"
643+
assert IO.iodata_to_binary(split) == bin
644+
end
645+
end
646+
end
588647

589-
for _ <- 1..10 do
590-
try do
648+
test "graphemes" do
649+
for _ <- 1..10 do
591650
bin = :crypto.strong_rand_bytes(20)
592651

593-
assert bin |> Stream.unfold(&String.next_grapheme/1) |> Enum.all?(&is_binary/1),
594-
"cannot build graphemes for #{inspect(bin)}"
595-
rescue
596-
# Ignore malformed pictographic sequences
597-
_ -> :ok
652+
try do
653+
String.graphemes(bin)
654+
rescue
655+
# Ignore malformed pictographic sequences
656+
_ -> :ok
657+
else
658+
graphemes ->
659+
assert Enum.all?(graphemes, &is_binary/1),
660+
"cannot build graphemes for #{inspect(bin)}"
661+
662+
assert IO.iodata_to_binary(graphemes) == bin
663+
end
598664
end
599665
end
600666
end

0 commit comments

Comments
 (0)