Skip to content

Commit e65827a

Browse files
committed
Refactor formatter, extract string to separate function, limit state
1 parent d0c869d commit e65827a

File tree

3 files changed

+147
-161
lines changed

3 files changed

+147
-161
lines changed

formatter_test_suite/simple-object.min.json~

Lines changed: 0 additions & 1 deletion
This file was deleted.

lib/formatter.ex

Lines changed: 113 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ defmodule Jason.Formatter do
5252
"""
5353
@spec pretty_print(iodata, opts) :: binary
5454
def pretty_print(iodata, opts \\ []) do
55-
pretty_print_to_iodata(iodata, opts)
55+
iodata
56+
|> pretty_print_to_iodata(opts)
5657
|> IO.iodata_to_binary()
5758
end
5859

@@ -67,13 +68,10 @@ defmodule Jason.Formatter do
6768
opts = parse_opts(opts, opts(indent: " ", line: "\n", record: nil, colon: " "))
6869
opts = opts(opts, record: opts(opts, :record) || opts(opts, :line))
6970

70-
depth = 0
71-
in_str = false
72-
in_bs = false
71+
depth = :first
7372
empty = false
74-
first = true
7573

76-
{output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts)
74+
{output, _state} = pp_iodata(iodata, [], depth, empty, opts)
7775

7876
output
7977
end
@@ -96,7 +94,8 @@ defmodule Jason.Formatter do
9694
"""
9795
@spec minimize(iodata, opts) :: binary
9896
def minimize(iodata, opts \\ []) do
99-
minimize_to_iodata(iodata, opts)
97+
iodata
98+
|> minimize_to_iodata(opts)
10099
|> IO.iodata_to_binary()
101100
end
102101

@@ -108,212 +107,166 @@ defmodule Jason.Formatter do
108107
"""
109108
@spec minimize_to_iodata(iodata, opts) :: iodata
110109
def minimize_to_iodata(iodata, opts) do
111-
opts = parse_opts(opts, opts(indent: [], line: [], record: "\n", colon: []))
110+
opts = parse_opts(opts, opts(indent: "", line: "", record: "\n", colon: ""))
112111

113-
depth = 0
114-
in_str = false
115-
in_bs = false
112+
depth = :first
116113
empty = false
117-
first = true
118114

119-
{output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts)
115+
{output, _state} = pp_iodata(iodata, [], depth, empty, opts)
120116

121117
output
122118
end
123119

124120
defp parse_opts(opts, defaults) do
125121
Enum.reduce(opts, defaults, fn
126-
{:indent, indent}, opts -> opts(opts, indent: indent)
127-
{:line_separator, line}, opts -> opts(opts, line: line, record: opts(opts, :record) || line)
128-
{:record_separator, record}, opts -> opts(opts, record: record)
129-
{:after_colon, colon}, opts -> opts(opts, colon: colon)
122+
{:indent, indent}, opts ->
123+
opts(opts, indent: IO.iodata_to_binary(indent))
124+
125+
{:line_separator, line}, opts ->
126+
line = IO.iodata_to_binary(line)
127+
opts(opts, line: line, record: opts(opts, :record) || line)
128+
129+
{:record_separator, record}, opts ->
130+
opts(opts, record: IO.iodata_to_binary(record))
131+
132+
{:after_colon, colon}, opts ->
133+
opts(opts, colon: IO.iodata_to_binary(colon))
130134
end)
131135
end
132136

137+
@spec tab(String.t(), non_neg_integer) :: iodata()
133138
## Returns an iolist containing `depth` instances of `opts[:indent]`
134139
for depth <- 1..16 do
135140
defp tab(" ", unquote(depth)), do: unquote(String.duplicate(" ", depth))
136141
end
137142

138-
defp tab([], _), do: ""
143+
defp tab("", _), do: ""
139144
defp tab(indent, depth), do: List.duplicate(indent, depth)
140145

141-
@typep pp_state :: {
142-
## depth -- current nesting depth
143-
non_neg_integer,
144-
## in_str -- is the current byte in a string?
145-
boolean,
146-
## in_bs -- does the current byte follow a backslash in a string?
147-
boolean,
148-
## empty -- is the current object or array empty?
149-
boolean,
150-
## first -- is this the first object or array in the input?
151-
boolean
152-
}
153-
154-
@spec pp_iodata(
155-
## input -- input data
156-
iodata,
157-
## output_acc -- output iolist (built in reverse order)
158-
iodata,
159-
## depth -- current nesting depth
160-
non_neg_integer,
161-
## in_str -- is the current byte in a string?
162-
boolean,
163-
## in_bs -- does the current byte follow a backslash in a string?
164-
boolean,
165-
## empty -- is the current object or array empty?
166-
boolean,
167-
## first -- is this the first object or array in the input?
168-
boolean,
169-
opts
170-
) :: {iodata, pp_state}
171-
defp pp_iodata(input, output_acc, depth, in_str, in_bs, empty, first, opts)
172-
173-
defp pp_iodata("", output_acc, depth, in_str, in_bs, empty, first, opts) do
174-
{output_acc, {depth, in_str, in_bs, empty, first, opts}}
175-
end
176-
177-
defp pp_iodata([], output_acc, depth, in_str, in_bs, empty, first, opts) do
178-
{output_acc, {depth, in_str, in_bs, empty, first, opts}}
146+
defp pp_iodata(<<>>, output_acc, depth, empty, opts) do
147+
{output_acc, &pp_iodata(&1, &2, depth, empty, opts)}
179148
end
180149

181-
defp pp_iodata(
182-
<<byte::size(8), rest::binary>>,
183-
output_acc,
184-
depth,
185-
in_str,
186-
in_bs,
187-
empty,
188-
first,
189-
opts
190-
) do
191-
pp_byte(byte, rest, output_acc, depth, in_str, in_bs, empty, first, opts)
150+
defp pp_iodata(<<byte, rest::binary>>, output_acc, depth, empty, opts) do
151+
pp_byte(byte, rest, output_acc, depth, empty, opts)
192152
end
193153

194-
defp pp_iodata(byte, output_acc, depth, in_str, in_bs, empty, first, opts)
195-
when is_integer(byte) do
196-
pp_byte(byte, [], output_acc, depth, in_str, in_bs, empty, first, opts)
154+
defp pp_iodata([], output_acc, depth, empty, opts) do
155+
{output_acc, &pp_iodata(&1, &2, depth, empty, opts)}
197156
end
198157

199-
defp pp_iodata(list, output_acc, depth, in_str, in_bs, empty, first, opts) when is_list(list) do
200-
starting_state = {depth, in_str, in_bs, empty, first, opts}
201-
202-
{list_output, end_state} =
203-
Enum.reduce(list, {[], starting_state}, fn item, {output_acc, state} ->
204-
{depth, in_str, in_bs, empty, first, opts} = state
205-
{item_output, new_state} = pp_iodata(item, [], depth, in_str, in_bs, empty, first, opts)
206-
{[output_acc, item_output], new_state}
207-
end)
208-
209-
{[output_acc, list_output], end_state}
158+
defp pp_iodata([byte | rest], output_acc, depth, empty, opts) when is_integer(byte) do
159+
pp_byte(byte, rest, output_acc, depth, empty, opts)
210160
end
211161

212-
@spec pp_byte(
213-
## byte -- current byte
214-
byte,
215-
## rest -- rest of input data
216-
iodata,
217-
## output -- output iolist (built in reverse order)
218-
iodata,
219-
## depth -- current nesting depth
220-
non_neg_integer,
221-
## in_str -- is the current byte in a string?
222-
boolean,
223-
## in_bs -- does the current byte follow a backslash in a string?
224-
boolean,
225-
## empty -- is the current object or array empty?
226-
boolean,
227-
## first -- is this the first object or array in the input?
228-
boolean,
229-
opts
230-
) :: {iodata, pp_state}
231-
defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts)
232-
233-
## in string, following backslash
234-
defp pp_byte(byte, rest, output, depth, true = in_str, true = _in_bs, empty, first, opts) do
235-
in_bs = false
236-
pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts)
162+
defp pp_iodata([head | tail], output_acc, depth, empty, opts) do
163+
{output_acc, cont} = pp_iodata(head, output_acc, depth, empty, opts)
164+
cont.(tail, output_acc)
237165
end
238166

239-
## in string, backslash
240-
defp pp_byte(byte, rest, output, depth, true = in_str, _in_bs, empty, first, opts)
241-
when byte in '\\' do
242-
in_bs = true
243-
pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts)
167+
defp pp_byte(byte, rest, output, depth, empty, opts) when byte in ' \n\r\t' do
168+
pp_iodata(rest, output, depth, empty, opts)
244169
end
245170

246-
## in string, end quote
247-
defp pp_byte(byte, rest, output, depth, true = _in_str, in_bs, empty, first, opts)
248-
when byte in '"' do
249-
in_str = false
250-
pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts)
251-
end
252-
253-
## in string, other character
254-
defp pp_byte(byte, rest, output, depth, true = in_str, in_bs, empty, first, opts) do
255-
pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts)
256-
end
257-
258-
## out of string, whitespace
259-
defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts)
260-
when byte in ' \n\r\t' do
261-
pp_iodata(rest, output, depth, in_str, in_bs, empty, first, opts)
262-
end
263-
264-
## out of string, start block
265-
defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts)
266-
when byte in '{[' do
267-
out =
171+
defp pp_byte(byte, rest, output, depth, empty, opts) when byte in '{[' do
172+
{out, depth} =
268173
cond do
269-
first -> byte
270-
empty -> [opts(opts, :line), tab(opts(opts, :indent), depth), byte]
271-
depth == 0 -> [opts(opts, :record), byte]
272-
true -> byte
174+
depth == :first -> {byte, 1}
175+
depth == 0 -> {[opts(opts, :record), byte], 1}
176+
empty -> {[opts(opts, :line), tab(opts(opts, :indent), depth), byte], depth + 1}
177+
true -> {byte, depth + 1}
273178
end
274179

275-
first = false
276180
empty = true
277-
depth = depth + 1
278-
pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts)
181+
pp_iodata(rest, [output, out], depth, empty, opts)
279182
end
280183

281-
## out of string, end empty block
282-
defp pp_byte(byte, rest, output, depth, in_str, in_bs, true = _empty, first, opts)
283-
when byte in '}]' do
184+
defp pp_byte(byte, rest, output, depth, true = _empty, opts) when byte in '}]' do
284185
empty = false
285186
depth = depth - 1
286-
pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts)
187+
pp_iodata(rest, [output, byte], depth, empty, opts)
287188
end
288189

289-
## out of string, end non-empty block
290-
defp pp_byte(byte, rest, output, depth, in_str, in_bs, false = empty, first, opts)
291-
when byte in '}]' do
190+
defp pp_byte(byte, rest, output, depth, false = empty, opts) when byte in '}]' do
292191
depth = depth - 1
293192
out = [opts(opts, :line), tab(opts(opts, :indent), depth), byte]
294-
pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts)
193+
pp_iodata(rest, [output, out], depth, empty, opts)
295194
end
296195

297-
## out of string, comma
298-
defp pp_byte(byte, rest, output, depth, in_str, in_bs, _empty, first, opts)
299-
when byte in ',' do
196+
defp pp_byte(byte, rest, output, depth, _empty, opts) when byte in ',' do
300197
empty = false
301198
out = [byte, opts(opts, :line), tab(opts(opts, :indent), depth)]
302-
pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts)
199+
pp_iodata(rest, [output, out], depth, empty, opts)
303200
end
304201

305-
## out of string, colon
306-
defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts)
307-
when byte in ':' do
202+
defp pp_byte(byte, rest, output, depth, empty, opts) when byte in ':' do
308203
out = [byte, opts(opts, :colon)]
309-
pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts)
204+
pp_iodata(rest, [output, out], depth, empty, opts)
310205
end
311206

312-
## out of string, other character (maybe start quote)
313-
defp pp_byte(byte, rest, output, depth, _in_str, in_bs, empty, first, opts) do
207+
defp pp_byte(byte, rest, output, depth, empty, opts) do
314208
out = if empty, do: [opts(opts, :line), tab(opts(opts, :indent), depth), byte], else: byte
315-
in_str = byte in '"'
316209
empty = false
317-
pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts)
210+
211+
if byte == ?" do
212+
pp_string(rest, [output, out], _in_bs = false, &pp_iodata(&1, &2, depth, empty, opts))
213+
else
214+
pp_iodata(rest, [output, out], depth, empty, opts)
215+
end
216+
end
217+
218+
defp pp_string(<<>>, output_acc, in_bs, cont) do
219+
{output_acc, &pp_string(&1, &2, in_bs, cont)}
220+
end
221+
222+
defp pp_string(<<?", rest::binary>>, output_acc, true = _in_bs, cont) do
223+
pp_string(rest, [output_acc, ?"], false, cont)
224+
end
225+
226+
defp pp_string(<<?", rest::binary>>, output_acc, false = _in_bs, cont) do
227+
cont.(rest, [output_acc, ?"])
228+
end
229+
230+
defp pp_string(<<byte>>, output_acc, in_bs, cont) do
231+
in_bs = not in_bs and byte == ?\\
232+
{[output_acc, byte], &pp_string(&1, &2, in_bs, cont)}
233+
end
234+
235+
defp pp_string(binary, output_acc, _in_bs, cont) when is_binary(binary) do
236+
size = byte_size(binary)
237+
238+
case :binary.match(binary, "\"") do
239+
:nomatch ->
240+
skip = size - 2
241+
<<_::binary-size(skip), prev, last>> = binary
242+
in_bs = not (prev == ?\\ and last == ?\\) or last == ?\\
243+
{[output_acc | binary], &pp_string(&1, &2, in_bs, cont)}
244+
245+
{pos, 1} ->
246+
{leading, tail} = :erlang.split_binary(binary, pos + 1)
247+
output = [output_acc | leading]
248+
249+
case :binary.at(binary, pos - 1) do
250+
?\\ -> pp_string(tail, output, false, cont)
251+
_ -> cont.(tail, output)
252+
end
253+
end
254+
end
255+
256+
defp pp_string([], output_acc, in_bs, cont) do
257+
{output_acc, &pp_string(&1, &2, in_bs, cont)}
258+
end
259+
260+
defp pp_string([byte | rest], output_acc, in_bs, cont) when is_integer(byte) do
261+
cond do
262+
in_bs -> pp_string(rest, [output_acc, byte], false, cont)
263+
byte == ?" -> cont.(rest, [output_acc, byte])
264+
true -> pp_string(rest, [output_acc, byte], byte == ?\\, cont)
265+
end
266+
end
267+
268+
defp pp_string([head | tail], output_acc, in_bs, cont) do
269+
{output_acc, cont} = pp_string(head, output_acc, in_bs, cont)
270+
cont.(tail, output_acc)
318271
end
319272
end

test/formatter_test.exs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,38 @@ defmodule Jason.FormatterTest do
6464
output = ~s|{\n\t"a": {\n\t\t"b": [\n\t\t\ttrue,\n\t\t\tfalse\n\t\t]\n\t}\n}|
6565
assert(pretty_print(input, indent: "\t") == output)
6666
end
67+
68+
test "proper string escaping" do
69+
input = ["\"abc\\\\", "\""]
70+
output = ~S|"abc\\"|
71+
assert(minimize(input) == output)
72+
73+
input = ["\"abc\\\\", ?"]
74+
output = ~S|"abc\\"|
75+
assert(minimize(input) == output)
76+
77+
input = ["\"abc\\\"", "\""]
78+
output = ~S|"abc\""|
79+
assert(minimize(input) == output)
80+
81+
input = ["\"abc\\\"", ?"]
82+
output = ~S|"abc\""|
83+
assert(minimize(input) == output)
84+
85+
input = ["\"abc\\", "\"\""]
86+
output = ~S|"abc\""|
87+
assert(minimize(input) == output)
88+
89+
input = ["\"abc\\", ?", ?"]
90+
output = ~S|"abc\""|
91+
assert(minimize(input) == output)
92+
93+
input = ["\"abc", "\\", ?", ?"]
94+
output = ~S|"abc\""|
95+
assert(minimize(input) == output)
96+
97+
input = ["\"abc\\", "\\", ?"]
98+
output = ~S|"abc\\"|
99+
assert(minimize(input) == output)
100+
end
67101
end

0 commit comments

Comments
 (0)