Skip to content

Commit 770055e

Browse files
authored
Fix DecompressResponse middleware for multiple encodings and keep updated content-length header (#809)
* fix: stop decompressing response on first unknown codec If multiple Content-Encodings were applied restoring the original message requires undoing all steps in reverse order. Thus we cannot continue when encountering an unsupported codec. Just skipping over one step will most likely just lead to decoding errors in the next supported step or incorrect results otherwise. * fix: keep original content length and encoding headers for HEAD requests HEAD requests can be used to check the size of remote content to decide ahead of time whether it is worth fetching. Of course the size after decompression likely differs from the transfer size indicated in the content-length header, but depending on use case only the transfer size might be relevant. This obsoletes the empty-body special case in decompress_body previously added in 5bc9b82 since HEAD requests are now handled earlier. If we get an invalid empty body in a non-HEAD request we want to fail loudly. * fix: update existing content-length header after decompression Depending on context presence of this header is mandatory or at least strongly encouraged in HTTP/1.0 and HTTP/1.1 and some later processing steps might rely on or profit from its presence
1 parent 2599dc2 commit 770055e

File tree

2 files changed

+90
-19
lines changed

2 files changed

+90
-19
lines changed

lib/tesla/middleware/compression.ex

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,14 @@ defmodule Tesla.Middleware.Compression do
6767
def decompress({:ok, env}), do: {:ok, decompress(env)}
6868
def decompress({:error, reason}), do: {:error, reason}
6969

70+
# HEAD requests may be used to obtain information on the transfer size and properties
71+
# and their empty bodies are not actually valid for the possibly indicated encodings
72+
# thus we want to preserve them unchanged.
73+
def decompress(%Tesla.Env{method: :head} = env), do: env
74+
7075
def decompress(env) do
7176
codecs = compression_algorithms(Tesla.get_header(env, "content-encoding"))
72-
{decompressed_body, unknown_codecs} = decompress_body(codecs, env.body, [])
77+
{decompressed_body, unknown_codecs} = decompress_body(codecs, env.body)
7378

7479
env
7580
|> put_decompressed_body(decompressed_body)
@@ -84,28 +89,24 @@ defmodule Tesla.Middleware.Compression do
8489
Tesla.put_header(env, "content-encoding", Enum.join(unknown_codecs, ", "))
8590
end
8691

87-
defp decompress_body(_codecs, "" = body, acc) do
88-
{body, acc}
89-
end
90-
91-
defp decompress_body([gzip | rest], body, acc) when gzip in ["gzip", "x-gzip"] do
92-
decompress_body(rest, :zlib.gunzip(body), acc)
92+
defp decompress_body([gzip | rest], body) when gzip in ["gzip", "x-gzip"] do
93+
decompress_body(rest, :zlib.gunzip(body))
9394
end
9495

95-
defp decompress_body(["deflate" | rest], body, acc) do
96-
decompress_body(rest, :zlib.unzip(body), acc)
96+
defp decompress_body(["deflate" | rest], body) do
97+
decompress_body(rest, :zlib.unzip(body))
9798
end
9899

99-
defp decompress_body(["identity" | rest], body, acc) do
100-
decompress_body(rest, body, acc)
100+
defp decompress_body(["identity" | rest], body) do
101+
decompress_body(rest, body)
101102
end
102103

103-
defp decompress_body([codec | rest], body, acc) do
104-
decompress_body(rest, body, [codec | acc])
104+
defp decompress_body([codec | rest], body) do
105+
{body, Enum.reverse([codec | rest])}
105106
end
106107

107-
defp decompress_body([], body, acc) do
108-
{body, acc}
108+
defp decompress_body([], body) do
109+
{body, []}
109110
end
110111

111112
defp compression_algorithms(nil) do
@@ -123,7 +124,27 @@ defmodule Tesla.Middleware.Compression do
123124
defp put_decompressed_body(env, body) do
124125
env
125126
|> Tesla.put_body(body)
126-
|> Tesla.delete_header("content-length")
127+
|> update_content_length(body)
128+
end
129+
130+
# The value of the content-length header wil be inaccurate after decompression.
131+
# But setting it is mandatory or strongly encouraged in HTTP/1.0 and HTTP/1.1.
132+
# Except, when transfer-encoding is used defining content-length is invalid.
133+
# Thus we can neither just drop it nor indiscriminately add it, but will update it if it already exist.
134+
# Furthermore, content-length is technically allowed to be specified mutliple times if all values match,
135+
# to ensure consistency we must therefore make sure to drop any duplicate definitions while updating.
136+
defp update_content_length(env, body) when is_binary(body) do
137+
if Tesla.get_header(env, "content-length") != nil do
138+
env
139+
|> Tesla.delete_header("content-length")
140+
|> Tesla.put_header("content-length", "#{byte_size(body)}")
141+
else
142+
env
143+
end
144+
end
145+
146+
defp update_content_length(env, _) do
147+
env
127148
end
128149
end
129150

test/tesla/middleware/compression_test.exs

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,33 @@ defmodule Tesla.Middleware.CompressionTest do
5959
{200, [{"content-type", "text/plain"}, {"content-encoding", "deflate"}],
6060
:zlib.zip("decompressed deflate")}
6161

62+
"/multiple-encodings" ->
63+
{200, [{"content-type", "text/plain"}, {"content-encoding", "gzip, zstd, gzip"}],
64+
:zlib.gzip("decompressed gzip")}
65+
6266
"/response-identity" ->
6367
{200, [{"content-type", "text/plain"}, {"content-encoding", "identity"}], "unchanged"}
6468

6569
"/response-empty" ->
6670
{200, [{"content-type", "text/plain"}, {"content-encoding", "gzip"}], ""}
71+
72+
"/response-with-content-length" ->
73+
body = :zlib.gzip("decompressed gzip")
74+
75+
{200,
76+
[
77+
{"content-type", "text/plain"},
78+
{"content-encoding", "gzip"},
79+
{"content-length", "#{byte_size(body)}"}
80+
], body}
81+
82+
"/response-empty-with-content-length" ->
83+
{200,
84+
[
85+
{"content-type", "text/plain"},
86+
{"content-encoding", "gzip"},
87+
{"content-length", "4194304"}
88+
], ""}
6789
end
6890

6991
{:ok, %{env | status: status, headers: headers, body: body}}
@@ -81,16 +103,44 @@ defmodule Tesla.Middleware.CompressionTest do
81103
assert env.body == "decompressed deflate"
82104
end
83105

106+
test "stops decompressing on first unsupported content-encoding" do
107+
assert {:ok, env} = CompressionResponseClient.get("/multiple-encodings")
108+
assert env.body == "decompressed gzip"
109+
assert env.headers == [{"content-type", "text/plain"}, {"content-encoding", "gzip, zstd"}]
110+
end
111+
84112
test "return unchanged response for unsupported content-encoding" do
85113
assert {:ok, env} = CompressionResponseClient.get("/response-identity")
86114
assert env.body == "unchanged"
87115
assert env.headers == [{"content-type", "text/plain"}]
88116
end
89117

90-
test "return unchanged response for empty body (gzip)" do
91-
assert {:ok, env} = CompressionResponseClient.get("/response-empty")
118+
test "raises on invalid empty-body response (gzip)" do
119+
assert_raise(ErlangError, "Erlang error: :data_error", fn ->
120+
CompressionResponseClient.get("/response-empty")
121+
end)
122+
end
123+
124+
test "updates existing content-length header" do
125+
expected_body = "decompressed gzip"
126+
assert {:ok, env} = CompressionResponseClient.get("/response-with-content-length")
127+
assert env.body == expected_body
128+
129+
assert env.headers == [
130+
{"content-type", "text/plain"},
131+
{"content-length", "#{byte_size(expected_body)}"}
132+
]
133+
end
134+
135+
test "preserves compression headers for HEAD requests" do
136+
assert {:ok, env} = CompressionResponseClient.head("/response-empty-with-content-length")
92137
assert env.body == ""
93-
assert env.headers == [{"content-type", "text/plain"}]
138+
139+
assert env.headers == [
140+
{"content-type", "text/plain"},
141+
{"content-encoding", "gzip"},
142+
{"content-length", "4194304"}
143+
]
94144
end
95145

96146
defmodule CompressRequestDecompressResponseClient do

0 commit comments

Comments
 (0)