Skip to content

Commit c7640d3

Browse files
alcoJosé Valim
authored andcommitted
Do not traverse the string twice in String.slice(<str>, <range>)
Signed-off-by: José Valim <[email protected]>
1 parent 3992402 commit c7640d3

File tree

2 files changed

+83
-15
lines changed

2 files changed

+83
-15
lines changed

lib/elixir/lib/string.ex

Lines changed: 82 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,14 +1088,13 @@ defmodule String do
10881088
iex> String.slice("a", 1..1500)
10891089
""
10901090
1091-
iex> String.slice("a", 2..1500)
1092-
""
1093-
10941091
"""
10951092
@spec slice(t, Range.t) :: t
10961093

10971094
def slice(string, range)
10981095

1096+
def slice("", _.._), do: ""
1097+
10991098
def slice(string, first..-1) when first >= 0 do
11001099
nbytes = count_bytes_until(string, first)
11011100
if nbytes >= 0 do
@@ -1109,21 +1108,22 @@ defmodule String do
11091108
do_slice(next_grapheme(string), string, first, last, 0, 0, 0)
11101109
end
11111110

1112-
def slice(string, first..last) do
1113-
total = length(string)
1114-
1115-
if first < 0 do
1116-
first = total + first
1117-
end
1111+
def slice(string, first..last) when first >= 0 do
1112+
count = abs(last)
1113+
do_slice_neg_lb(next_grapheme(string), string, first, 0, count, init_bytes)
1114+
end
11181115

1119-
if last < 0 do
1120-
last = total + last
1121-
end
1116+
def slice(string, first..last) when last >= 0 do
1117+
count = abs(first)
1118+
do_slice_neg(next_grapheme(string), string, first, last, count, 0, init_bytes)
1119+
end
11221120

1123-
if first >= 0 do
1124-
do_slice(next_grapheme(string), string, first, last, 0, 0, 0)
1125-
else
1121+
def slice(string, first..last) when first < 0 and last < 0 do
1122+
if first > last do
11261123
""
1124+
else
1125+
count = abs(first)
1126+
do_slice_neg(next_grapheme(string), string, first, last, count, 0, init_bytes)
11271127
end
11281128
end
11291129

@@ -1143,6 +1143,73 @@ defmodule String do
11431143
nbytes
11441144
end
11451145

1146+
# "lb" stands for known lower bound
1147+
defp do_slice_neg_lb(nil, str, first, pos, count, bytes) do
1148+
cond do
1149+
pos < first+1 ->
1150+
# starting position is out of bounds
1151+
""
1152+
pos-count < first ->
1153+
# the negative right bound is out of bounds
1154+
""
1155+
true ->
1156+
{bytes, _, start_bytes} = bytes
1157+
len_bytes = sum_bytes(bytes, count-1)
1158+
binary_part(str, start_bytes, byte_size(str)-start_bytes-len_bytes)
1159+
end
1160+
end
1161+
1162+
defp do_slice_neg_lb({char, rest}, str, first, pos, count, bytes) do
1163+
bytes = update_bytes(bytes, char, first, pos, count)
1164+
do_slice_neg_lb(next_grapheme(rest), str, first, pos+1, count, bytes)
1165+
end
1166+
1167+
# both bounds are negative
1168+
defp do_slice_neg(nil, str, first, last, count, pos, bytes) do
1169+
# get positive bounds
1170+
if first < 0, do: first = pos + first
1171+
if last < 0, do: last = pos + last
1172+
cond do
1173+
first < 0 or first > last ->
1174+
# negative left bound is out of bounds
1175+
""
1176+
true ->
1177+
{bytes, _, _} = bytes
1178+
str_bytes = byte_size(str)
1179+
start_bytes = str_bytes - sum_bytes(bytes, count)
1180+
last = min(last, pos-1)
1181+
len_bytes =
1182+
bytes |> drop_bytes(pos-last-1) |> sum_bytes(last-first+1)
1183+
binary_part(str, start_bytes, min(len_bytes, str_bytes-start_bytes))
1184+
end
1185+
end
1186+
1187+
defp do_slice_neg({char, rest}, str, first, last, count, pos, bytes) do
1188+
bytes = update_bytes(bytes, char, 0, 0, count)
1189+
do_slice_neg(next_grapheme(rest), str, first, last, count, pos+1, bytes)
1190+
end
1191+
1192+
defp init_bytes(), do: {[], 0, 0}
1193+
1194+
defp sum_bytes(bytes, count) do
1195+
bytes |> Enum.take(count) |> Enum.sum
1196+
end
1197+
1198+
defp drop_bytes(bytes, count) do
1199+
Enum.drop(bytes, count)
1200+
end
1201+
1202+
defp update_bytes({bytes, n, start_bytes}, char, first, pos, cnt) do
1203+
char_bytes = byte_size(char)
1204+
if pos < first do
1205+
start_bytes = start_bytes + char_bytes
1206+
end
1207+
if n < cnt do
1208+
n = n + 1
1209+
end
1210+
{[char_bytes|bytes], n, start_bytes}
1211+
end
1212+
11461213
defp do_slice(_, _, start_pos, last_pos, _, _, _) when start_pos > last_pos do
11471214
""
11481215
end

lib/elixir/test/elixir/string_test.exs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ defmodule StringTest do
345345
assert String.slice("あいうえお", -2..-4) == ""
346346
assert String.slice("あいうえお", -10..-15) == ""
347347
assert String.slice("hello あいうえお unicode", 8..-1) == "うえお unicode"
348+
assert String.slice("abc", -1..14) == "c"
348349
end
349350

350351
test :valid? do

0 commit comments

Comments
 (0)