Skip to content

Commit e22a5e9

Browse files
committed
[k2] add support multipart/form-data to HTTP server
1 parent 62014ae commit e22a5e9

File tree

5 files changed

+365
-1
lines changed

5 files changed

+365
-1
lines changed

runtime-light/server/http/http-server-state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ inline constexpr std::string_view CONTENT_LENGTH = "content-length";
4646
inline constexpr std::string_view AUTHORIZATION = "authorization";
4747
inline constexpr std::string_view ACCEPT_ENCODING = "accept-encoding";
4848
inline constexpr std::string_view CONTENT_ENCODING = "content-encoding";
49+
inline constexpr std::string_view CONTENT_DISPOSITION = "content-disposition";
4950

5051
} // namespace headers
5152

runtime-light/server/http/init-functions.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "runtime-light/core/globals/php-script-globals.h"
2727
#include "runtime-light/k2-platform/k2-api.h"
2828
#include "runtime-light/server/http/http-server-state.h"
29+
#include "runtime-light/server/http/multipart.h"
2930
#include "runtime-light/state/instance-state.h"
3031
#include "runtime-light/stdlib/component/component-api.h"
3132
#include "runtime-light/stdlib/diagnostics/logs.h"
@@ -320,7 +321,10 @@ void init_server(kphp::component::stream&& request_stream, kphp::stl::vector<std
320321
if (content_type == CONTENT_TYPE_APP_FORM_URLENCODED) {
321322
f$parse_str(body_str, superglobals.v$_POST);
322323
} else if (content_type.starts_with(CONTENT_TYPE_MULTIPART_FORM_DATA)) {
323-
kphp::log::error("unsupported content-type: {}", CONTENT_TYPE_MULTIPART_FORM_DATA);
324+
std::string_view boundary{parse_boundary(content_type)};
325+
if (!boundary.empty()) {
326+
kphp::http::parse_multipart({body_str.c_str(), body_str.size()}, boundary, superglobals.v$_POST, superglobals.v$_FILES);
327+
}
324328
} else {
325329
http_server_instance_st.opt_raw_post_data.emplace(std::move(body_str));
326330
}
Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
// Compiler for PHP (aka KPHP)
2+
// Copyright (c) 2024 LLC «V Kontakte»
3+
// Distributed under the GPL v3 License, see LICENSE.notice.txt
4+
5+
#include "runtime-light/server/http/multipart.h"
6+
#include "runtime-light/stdlib/file/resource.h"
7+
#include "runtime-light/stdlib/diagnostics/logs.h"
8+
#include "runtime-light/server/http/http-server-state.h"
9+
10+
#include <string_view>
11+
#include <cstdio>
12+
13+
#include "runtime-common/core/runtime-core.h"
14+
#include "common/algorithms/string-algorithms.h"
15+
16+
namespace {
17+
18+
constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = "form-data;";
19+
constexpr std::string_view MULTIPART_BOUNDARY_EQ = "boundary=";
20+
21+
struct header {
22+
std::string_view header_view, name, value;
23+
24+
header() = default;
25+
header(const std::string_view header_str_) : header_view{header_str_} {
26+
auto [name_view, value_view]{vk::split_string_view(header_view, ':')};
27+
if (name_view.size() + value_view.size() + 1 != header_view.size()) [[unlikely]] {
28+
return;
29+
}
30+
name = name_view;
31+
if (!value_view.empty()) {
32+
value = value_view.substr(1); // skip ' '
33+
}
34+
}
35+
36+
bool is_valid() {
37+
return !name.empty() && !value.empty();
38+
}
39+
40+
bool name_is(const std::string_view s) {
41+
const auto lower_name{name | std::views::take(s.size()) |
42+
std::views::transform([](auto c) noexcept { return std::tolower(c, std::locale::classic()); })};
43+
return std::ranges::equal(lower_name, s);
44+
}
45+
};
46+
47+
// Represents one attribute from Content-Disposition header.
48+
// For example, a typically file field will have two attributes:
49+
// 1) attr = "name", value = "avatar"
50+
// 2) attr = "filename", value = "my_avatar.png"
51+
struct partAttr {
52+
std::string_view attr, value;
53+
54+
partAttr() = default;
55+
partAttr(const std::string_view attr_, const std::string_view value_) : attr{attr_}, value{value_} {};
56+
};
57+
58+
// Represents a parser of Content-Disposition header string.
59+
struct attrParser {
60+
private:
61+
std::string_view header;
62+
size_t pos{0};
63+
64+
public:
65+
attrParser(const std::string_view header_) : header{header_} {}
66+
partAttr next_attr();
67+
bool end() {
68+
return pos >= header.size();
69+
}
70+
71+
private:
72+
void markEnd() {
73+
pos = header.size();
74+
}
75+
};
76+
77+
partAttr attrParser::next_attr() {
78+
if (pos == 0) {
79+
if (!header.starts_with(HEADER_CONTENT_DISPOSITION_FORM_DATA)) {
80+
markEnd();
81+
return {};
82+
}
83+
pos = HEADER_CONTENT_DISPOSITION_FORM_DATA.size();
84+
}
85+
86+
if (pos >= header.size()) {
87+
return {};
88+
}
89+
90+
size_t end{header.find(';', pos)};
91+
if (end == std::string_view::npos) {
92+
end = header.size();
93+
}
94+
95+
std::string_view part_view{vk::trim(header.substr(pos, end-pos))};
96+
auto [name_view, value_view]{vk::split_string_view(part_view, '=')};
97+
if (value_view.size() >= 2 && value_view.starts_with('"') && value_view.ends_with('"')) {
98+
value_view = value_view.substr(1, value_view.size()-2);
99+
}
100+
pos = end + 1;
101+
102+
return {name_view, value_view};
103+
}
104+
105+
// Represents one part of multipart content
106+
struct part {
107+
std::string_view name, filename, content_type, data;
108+
};
109+
110+
struct MultipartBody {
111+
private:
112+
113+
std::string_view body, boundary;
114+
size_t pos;
115+
116+
part next_part();
117+
void addPost(const part &part, mixed &v$_POST);
118+
void addFile(const part &part, mixed &v$_FILES);
119+
120+
header next_header();
121+
std::string_view parse_data();
122+
123+
// Returns true if current pos refers to one of \r, \n, \r\n
124+
bool is_crlf() {
125+
return body[pos] == '\r' || body[pos] == '\n' || (body[pos] == '\r' && body[pos+1] == '\n');
126+
}
127+
128+
void skip_crlf() {
129+
if (body[pos] == '\r') {
130+
pos++;
131+
}
132+
if (body[pos] == '\n') {
133+
pos++;
134+
}
135+
}
136+
137+
void skip_boundary() {
138+
if (pos == 0) {
139+
pos += 2;
140+
}
141+
pos += boundary.size();
142+
if (body[pos] == '-' && body[pos+1] == '-') {
143+
pos += 2;
144+
}
145+
}
146+
147+
bool end() {
148+
return pos >= body.size();
149+
}
150+
151+
void markEnd() {
152+
pos = body.size();
153+
}
154+
155+
public:
156+
157+
MultipartBody(const std::string_view body_, const std::string_view boundary_)
158+
: body{body_}, boundary{boundary_}, pos{0} {}
159+
160+
void parse_into(mixed &v$_POST, mixed &v$_FILES);
161+
};
162+
163+
part MultipartBody::next_part() {
164+
part part;
165+
166+
if (pos == 0) {
167+
skip_boundary();
168+
skip_crlf();
169+
}
170+
171+
do {
172+
header header{next_header()};
173+
if (!header.is_valid()) {
174+
markEnd();
175+
return {};
176+
}
177+
178+
if (header.name_is(kphp::http::headers::CONTENT_DISPOSITION)) {
179+
attrParser parser{header.value};
180+
while (!parser.end()) {
181+
partAttr pa{parser.next_attr()};
182+
if (pa.attr.empty()) {
183+
markEnd();
184+
return {};
185+
}
186+
if (pa.attr == "name") {
187+
part.name = pa.value;
188+
} else if (pa.attr == "filename") {
189+
part.filename = pa.value;
190+
}
191+
}
192+
} else if (header.name_is(kphp::http::headers::CONTENT_TYPE)) {
193+
part.content_type = header.value;
194+
}
195+
} while (!is_crlf());
196+
197+
skip_crlf();
198+
part.data = parse_data();
199+
skip_boundary();
200+
skip_crlf();
201+
return part;
202+
}
203+
204+
header MultipartBody::next_header() {
205+
size_t lf{body.find('\n', pos)};
206+
size_t header_end{lf-1};
207+
208+
if (lf == std::string_view::npos) {
209+
return {};
210+
}
211+
212+
if (body[header_end] == '\r') {
213+
header_end--;
214+
}
215+
216+
header header{body.substr(pos, header_end-pos+1)};
217+
pos = lf + 1;
218+
return header;
219+
}
220+
221+
std::string_view MultipartBody::parse_data() {
222+
size_t data_start{pos};
223+
size_t data_end{body.find(boundary, data_start)};
224+
pos = data_end;
225+
226+
if (pos == std::string_view::npos) {
227+
return {};
228+
}
229+
230+
if (body[data_end-1] != '-' || body[data_end-2] != '-') {
231+
return {};
232+
}
233+
data_end -= 2;
234+
if (body[data_end] == '\n') {
235+
data_end--;
236+
}
237+
if (body[data_end] == '\r') {
238+
data_end--;
239+
}
240+
241+
if (data_end > data_start) {
242+
return body.substr(data_start, data_end-data_start-1);
243+
}
244+
245+
return {};
246+
247+
}
248+
249+
void MultipartBody::parse_into(mixed &v$_POST, mixed &v$_FILES) {
250+
while (!end()) {
251+
part part{next_part()};
252+
if (part.name.empty()) {
253+
return;
254+
}
255+
256+
if (!part.filename.empty()) {
257+
addFile(part, v$_FILES);
258+
} else {
259+
addPost(part, v$_POST);
260+
}
261+
}
262+
}
263+
264+
void MultipartBody::addPost(const part &part, mixed &v$_POST) {
265+
string name{part.name.data(), static_cast<string::size_type>(part.name.size())};
266+
v$_POST.set_value(name, string(part.data.data(), part.data.size()));
267+
}
268+
269+
void MultipartBody::addFile(const part &part, mixed &v$_FILES) {
270+
std::string_view tmp_name{std::tmpnam(nullptr)};
271+
auto file{kphp::fs::file::open(tmp_name, "w")};
272+
if (!file) [[unlikely]] {
273+
kphp::log::warning("error opening tmp file: error code -> {}", file.error());
274+
return;
275+
}
276+
277+
int file_size{0};
278+
auto file_size_result = (*file).write({reinterpret_cast<const std::byte*>(part.data.data()), part.data.size()});
279+
if (file_size_result.has_value()) {
280+
file_size = file_size_result.value();
281+
if (file_size < part.data.size()) {
282+
kphp::log::warning("error write to tmp file: wrote {} bytes insted of {}", file_size, part.data.size());
283+
return;
284+
}
285+
} else {
286+
kphp::log::warning("error write to tmp file: errcode {}", file_size_result.error());
287+
}
288+
289+
string name{part.name.data(), static_cast<string::size_type>(part.name.size())};
290+
291+
if (part.name.ends_with("[]")) {
292+
mixed& file = v$_FILES[name.substr(0, name.size() - 2)];
293+
if (file_size == part.data.size()) {
294+
file[string("name")].push_back(string(part.filename.data(), part.filename.size()));
295+
file[string("type")].push_back(string(part.content_type.data(), part.content_type.size()));
296+
file[string("size")].push_back(file_size);
297+
file[string("tmp_name")].push_back(string(tmp_name.data(), tmp_name.size()));
298+
file[string("error")].push_back(0);
299+
} else {
300+
file[string("name")].push_back(string());
301+
file[string("type")].push_back(string());
302+
file[string("size")].push_back(0);
303+
file[string("tmp_name")].push_back(string());
304+
file[string("error")].push_back(-file_size);
305+
}
306+
} else {
307+
mixed& file = v$_FILES[name];
308+
if (file_size == part.data.size()) {
309+
file.set_value(string("name"), string(part.filename.data(), part.filename.size()));
310+
file.set_value(string("type"), string(part.content_type.data(), part.content_type.size()));
311+
file.set_value(string("size"), file_size);
312+
file.set_value(string("tmp_name"), string(tmp_name.data(), tmp_name.size()));
313+
file.set_value(string("error"), 0);
314+
} else {
315+
file.set_value(string("size"), 0);
316+
file.set_value(string("tmp_name"), string());
317+
file.set_value(string("error"), -file_size);
318+
}
319+
}
320+
}
321+
322+
} // namespace
323+
324+
namespace kphp::http {
325+
326+
void parse_multipart(const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES) {
327+
MultipartBody mb{body, boundary};
328+
mb.parse_into(v$_POST, v$_FILES);
329+
}
330+
331+
std::string_view parse_boundary(const std::string_view content_type) {
332+
size_t pos{content_type.find(MULTIPART_BOUNDARY_EQ)};
333+
if (pos == std::string_view::npos) {
334+
return {};
335+
}
336+
std::string_view res = content_type.substr(pos + MULTIPART_BOUNDARY_EQ.size());
337+
if (res.size() >= 2 && res.starts_with('"') && res.ends_with('"')) {
338+
res = res.substr(1, res.size()-2);
339+
}
340+
return res;
341+
}
342+
343+
} // namespace kphp::http
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Compiler for PHP (aka KPHP)
2+
// Copyright (c) 2024 LLC «V Kontakte»
3+
// Distributed under the GPL v3 License, see LICENSE.notice.txt
4+
5+
#include <string_view>
6+
7+
#include "runtime-common/core/runtime-core.h"
8+
9+
namespace kphp::http {
10+
11+
void parse_multipart(const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES);
12+
13+
std::string_view parse_boundary(const std::string_view content_type);
14+
15+
} // namespace kphp::http

runtime-light/server/server.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ prepend(
33
server/
44
cli/cli-instance-state.cpp
55
http/init-functions.cpp
6+
http/multipart.cpp
67
http/http-server-state.cpp
78
job-worker/job-worker-server-state.cpp
89
rpc/init-functions.cpp

0 commit comments

Comments
 (0)