Skip to content

Commit 7ea91d2

Browse files
author
José Valim
committed
Clean up String.slice/2/3 impls
1 parent c7640d3 commit 7ea91d2

File tree

1 file changed

+38
-127
lines changed

1 file changed

+38
-127
lines changed

lib/elixir/lib/string.ex

Lines changed: 38 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,17 +1030,29 @@ defmodule String do
10301030
end
10311031

10321032
def slice(string, start, len) when start >= 0 and len >= 0 do
1033-
do_slice(next_grapheme(string), string, start, start + len - 1, 0, 0, 0)
1033+
case do_count_bytes(next_grapheme(string), start, 0) do
1034+
{nil, _} -> ""
1035+
{next, start_bytes} ->
1036+
{_, len_bytes} = do_count_bytes(next, len, 0)
1037+
binary_part(string, start_bytes, len_bytes)
1038+
end
10341039
end
10351040

10361041
def slice(string, start, len) when start < 0 and len >= 0 do
1037-
real_start_pos = length(string) - abs(start)
1038-
case real_start_pos >= 0 do
1039-
true -> do_slice(next_grapheme(string), string, real_start_pos, real_start_pos + len - 1, 0, 0, 0)
1042+
start = length(string) + start
1043+
case start >= 0 do
1044+
true -> slice(string, start, len)
10401045
false -> ""
10411046
end
10421047
end
10431048

1049+
defp do_count_bytes(next, 0, acc), do: {next, acc}
1050+
defp do_count_bytes(nil, _, acc), do: {nil, acc}
1051+
1052+
defp do_count_bytes({char, rest}, counter, acc) do
1053+
do_count_bytes(next_grapheme(rest), counter - 1, acc + byte_size(char))
1054+
end
1055+
10441056
@doc """
10451057
Returns a substring from the offset given by the start of the
10461058
range to the offset given by the end of the range.
@@ -1096,144 +1108,43 @@ defmodule String do
10961108
def slice("", _.._), do: ""
10971109

10981110
def slice(string, first..-1) when first >= 0 do
1099-
nbytes = count_bytes_until(string, first)
1100-
if nbytes >= 0 do
1101-
binary_part(string, nbytes, byte_size(string) - nbytes)
1102-
else
1103-
""
1111+
case do_count_bytes(next_grapheme(string), first, 0) do
1112+
{nil, _} -> ""
1113+
{_, start_bytes} -> binary_part(string, start_bytes, byte_size(string) - start_bytes)
11041114
end
11051115
end
11061116

11071117
def slice(string, first..last) when first >= 0 and last >= 0 do
1108-
do_slice(next_grapheme(string), string, first, last, 0, 0, 0)
1109-
end
1110-
1111-
def slice(string, first..last) when first >= 0 do
1112-
count = abs(last)
1113-
do_slice_neg_lb(next_grapheme(string), string, first, 0, count, init_bytes)
1114-
end
1115-
1116-
def slice(string, first..last) when last >= 0 do
1117-
count = abs(first)
1118-
do_slice_neg(next_grapheme(string), string, first, last, count, 0, init_bytes)
1119-
end
1120-
1121-
def slice(string, first..last) when first < 0 and last < 0 do
1122-
if first > last do
1123-
""
1118+
if last >= first do
1119+
slice(string, first, last - first + 1)
11241120
else
1125-
count = abs(first)
1126-
do_slice_neg(next_grapheme(string), string, first, last, count, 0, init_bytes)
1127-
end
1128-
end
1129-
1130-
defp count_bytes_until(string, index) do
1131-
count_bytes_until(next_grapheme(string), index, 0, 0)
1132-
end
1133-
1134-
defp count_bytes_until({char, rest}, index, pos, nbytes) when pos < index do
1135-
count_bytes_until(next_grapheme(rest), index, pos+1, nbytes+byte_size(char))
1136-
end
1137-
1138-
defp count_bytes_until(nil, _, _, _) do
1139-
-1
1140-
end
1141-
1142-
defp count_bytes_until({_, _}, index, index, nbytes) do
1143-
nbytes
1144-
end
1145-
1146-
# "lb" stands for known lower bound
1147-
defp do_slice_neg_lb(nil, str, first, pos, count, bytes) do
1148-
cond do
1149-
pos < first+1 ->
1150-
# starting position is out of bounds
1151-
""
1152-
pos-count < first ->
1153-
# the negative right bound is out of bounds
1154-
""
1155-
true ->
1156-
{bytes, _, start_bytes} = bytes
1157-
len_bytes = sum_bytes(bytes, count-1)
1158-
binary_part(str, start_bytes, byte_size(str)-start_bytes-len_bytes)
1159-
end
1160-
end
1161-
1162-
defp do_slice_neg_lb({char, rest}, str, first, pos, count, bytes) do
1163-
bytes = update_bytes(bytes, char, first, pos, count)
1164-
do_slice_neg_lb(next_grapheme(rest), str, first, pos+1, count, bytes)
1165-
end
1166-
1167-
# both bounds are negative
1168-
defp do_slice_neg(nil, str, first, last, count, pos, bytes) do
1169-
# get positive bounds
1170-
if first < 0, do: first = pos + first
1171-
if last < 0, do: last = pos + last
1172-
cond do
1173-
first < 0 or first > last ->
1174-
# negative left bound is out of bounds
1175-
""
1176-
true ->
1177-
{bytes, _, _} = bytes
1178-
str_bytes = byte_size(str)
1179-
start_bytes = str_bytes - sum_bytes(bytes, count)
1180-
last = min(last, pos-1)
1181-
len_bytes =
1182-
bytes |> drop_bytes(pos-last-1) |> sum_bytes(last-first+1)
1183-
binary_part(str, start_bytes, min(len_bytes, str_bytes-start_bytes))
1121+
""
11841122
end
11851123
end
11861124

1187-
defp do_slice_neg({char, rest}, str, first, last, count, pos, bytes) do
1188-
bytes = update_bytes(bytes, char, 0, 0, count)
1189-
do_slice_neg(next_grapheme(rest), str, first, last, count, pos+1, bytes)
1190-
end
1191-
1192-
defp init_bytes(), do: {[], 0, 0}
1125+
def slice(string, first..last) do
1126+
{bytes, length} = do_acc_bytes(next_grapheme(string), [], 0)
11931127

1194-
defp sum_bytes(bytes, count) do
1195-
bytes |> Enum.take(count) |> Enum.sum
1196-
end
1128+
if first < 0, do: first = length + first
1129+
if last < 0, do: last = length + last
11971130

1198-
defp drop_bytes(bytes, count) do
1199-
Enum.drop(bytes, count)
1200-
end
1201-
1202-
defp update_bytes({bytes, n, start_bytes}, char, first, pos, cnt) do
1203-
char_bytes = byte_size(char)
1204-
if pos < first do
1205-
start_bytes = start_bytes + char_bytes
1206-
end
1207-
if n < cnt do
1208-
n = n + 1
1131+
if first < 0 or first > last or first > length do
1132+
""
1133+
else
1134+
last = min(last + 1, length)
1135+
bytes = Enum.drop(bytes, length - last)
1136+
first = last - first
1137+
{length_bytes, start_bytes} = Enum.split(bytes, first)
1138+
binary_part(string, Enum.sum(start_bytes), Enum.sum(length_bytes))
12091139
end
1210-
{[char_bytes|bytes], n, start_bytes}
1211-
end
1212-
1213-
defp do_slice(_, _, start_pos, last_pos, _, _, _) when start_pos > last_pos do
1214-
""
1215-
end
1216-
1217-
defp do_slice({char, rest}, str, start_pos, last_pos, current_pos, start_bytes, len_bytes)
1218-
when current_pos < start_pos
1219-
do
1220-
do_slice(next_grapheme(rest), str, start_pos, last_pos, current_pos + 1, start_bytes+byte_size(char), len_bytes)
1221-
end
1222-
1223-
defp do_slice({char, rest}, str, start_pos, last_pos, current_pos, start_bytes, len_bytes)
1224-
when current_pos >= start_pos and current_pos < last_pos
1225-
do
1226-
do_slice(next_grapheme(rest), str, start_pos, last_pos, current_pos + 1, start_bytes, len_bytes+byte_size(char))
12271140
end
12281141

1229-
defp do_slice({char, _}, str, start_pos, last_pos, current_pos, start_bytes, len_bytes)
1230-
when current_pos >= start_pos and current_pos == last_pos
1231-
do
1232-
binary_part(str, start_bytes, len_bytes+byte_size(char))
1142+
defp do_acc_bytes({char, rest}, bytes, length) do
1143+
do_acc_bytes(next_grapheme(rest), [byte_size(char)|bytes], length + 1)
12331144
end
12341145

1235-
defp do_slice(nil, str, _, _, _, start_bytes, len_bytes) do
1236-
binary_part(str, start_bytes, len_bytes)
1146+
defp do_acc_bytes(nil, bytes, length) do
1147+
{bytes, length}
12371148
end
12381149

12391150
@doc """

0 commit comments

Comments
 (0)