1+ // Compiler for PHP (aka KPHP)
2+ // Copyright (c) 2024 LLC «V Kontakte»
3+ // Distributed under the GPL v3 License, see LICENSE.notice.txt
4+
5+ #include " runtime-light/server/http/multipart.h"
6+ #include " runtime-light/stdlib/file/resource.h"
7+ #include " runtime-light/stdlib/diagnostics/logs.h"
8+ #include " runtime-light/server/http/http-server-state.h"
9+
10+ #include < string_view>
11+ #include < cstdio>
12+
13+ #include " runtime-common/core/runtime-core.h"
14+ #include " common/algorithms/string-algorithms.h"
15+
16+ namespace {
17+
18+ constexpr std::string_view HEADER_CONTENT_DISPOSITION_FORM_DATA = " form-data;" ;
19+ constexpr std::string_view MULTIPART_BOUNDARY_EQ = " boundary=" ;
20+
21+ struct header {
22+ std::string_view header_view, name, value;
23+
24+ header () = default ;
25+ header (const std::string_view header_str_) : header_view{header_str_} {
26+ auto [name_view, value_view]{vk::split_string_view (header_view, ' :' )};
27+ if (name_view.size () + value_view.size () + 1 != header_view.size ()) [[unlikely]] {
28+ return ;
29+ }
30+ name = name_view;
31+ if (!value_view.empty ()) {
32+ value = value_view.substr (1 ); // skip ' '
33+ }
34+ }
35+
36+ bool is_valid () {
37+ return !name.empty () && !value.empty ();
38+ }
39+
40+ bool name_is (const std::string_view s) {
41+ const auto lower_name{name | std::views::take (s.size ()) |
42+ std::views::transform ([](auto c) noexcept { return std::tolower (c, std::locale::classic ()); })};
43+ return std::ranges::equal (lower_name, s);
44+ }
45+ };
46+
47+ // Represents one attribute from Content-Disposition header.
48+ // For example, a typically file field will have two attributes:
49+ // 1) attr = "name", value = "avatar"
50+ // 2) attr = "filename", value = "my_avatar.png"
51+ struct partAttr {
52+ std::string_view attr, value;
53+
54+ partAttr () = default ;
55+ partAttr (const std::string_view attr_, const std::string_view value_) : attr{attr_}, value{value_} {};
56+ };
57+
58+ // Represents a parser of Content-Disposition header string.
59+ struct attrParser {
60+ private:
61+ std::string_view header;
62+ size_t pos{0 };
63+
64+ public:
65+ attrParser (const std::string_view header_) : header{header_} {}
66+ partAttr next_attr ();
67+ bool end () {
68+ return pos >= header.size ();
69+ }
70+
71+ private:
72+ void markEnd () {
73+ pos = header.size ();
74+ }
75+ };
76+
77+ partAttr attrParser::next_attr () {
78+ if (pos == 0 ) {
79+ if (header.find (HEADER_CONTENT_DISPOSITION_FORM_DATA) != pos) {
80+ markEnd ();
81+ return {};
82+ }
83+ pos += HEADER_CONTENT_DISPOSITION_FORM_DATA.size ();
84+ }
85+
86+ if (pos >= header.size ()) {
87+ return {};
88+ }
89+
90+ size_t end{header.find (' ;' , pos)};
91+ if (end == std::string_view::npos) {
92+ end = header.size ();
93+ }
94+
95+ std::string_view part_view{vk::trim (header.substr (pos, end-pos))};
96+ auto [name_view, value_view]{vk::split_string_view (part_view, ' =' )};
97+ if (value_view.size () >= 2 && value_view.starts_with (' "' ) && value_view.ends_with (' "' )) {
98+ value_view = value_view.substr (1 , value_view.size ()-2 );
99+ }
100+ pos = end + 1 ;
101+
102+ return {name_view, value_view};
103+ }
104+
105+ // Represents one part of multipart content
106+ struct part {
107+ std::string_view name, filename, content_type, data;
108+ };
109+
110+ struct MultipartBody {
111+ private:
112+
113+ std::string_view body, boundary;
114+ size_t pos;
115+
116+ part next_part ();
117+ void addPost (const part &part, mixed &v$_POST);
118+ void addFile (const part &part, mixed &v$_FILES);
119+
120+ header next_header ();
121+ std::string_view parse_data ();
122+
123+ // Returns true if current pos refers to one of \r, \n, \r\n
124+ bool is_crlf () {
125+ return body[pos] == ' \r ' || body[pos] == ' \n ' || (body[pos] == ' \r ' && body[pos+1 ] == ' \n ' );
126+ }
127+
128+ void skip_crlf () {
129+ if (body[pos] == ' \r ' ) {
130+ pos++;
131+ }
132+ if (body[pos] == ' \n ' ) {
133+ pos++;
134+ }
135+ }
136+
137+ void skip_boundary () {
138+ if (pos == 0 ) {
139+ pos += 2 ;
140+ }
141+ pos += boundary.size ();
142+ if (body[pos] == ' -' && body[pos+1 ] == ' -' ) {
143+ pos += 2 ;
144+ }
145+ }
146+
147+ bool end () {
148+ return pos >= body.size ();
149+ }
150+
151+ void markEnd () {
152+ pos = body.size ();
153+ }
154+
155+ public:
156+
157+ MultipartBody (const std::string_view body_, const std::string_view boundary_)
158+ : body{body_}, boundary{boundary_}, pos{0 } {}
159+
160+ void parse_into (mixed &v$_POST, mixed &v$_FILES);
161+ };
162+
163+ part MultipartBody::next_part () {
164+ part part;
165+
166+ if (pos == 0 ) {
167+ skip_boundary ();
168+ skip_crlf ();
169+ }
170+
171+ do {
172+ header header{next_header ()};
173+ if (!header.is_valid ()) {
174+ markEnd ();
175+ return {};
176+ }
177+
178+ if (header.name_is (kphp::http::headers::CONTENT_DISPOSITION)) {
179+ attrParser parser{header.value };
180+ while (!parser.end ()) {
181+ partAttr pa{parser.next_attr ()};
182+ if (pa.attr .empty ()) {
183+ markEnd ();
184+ return {};
185+ }
186+ if (pa.attr == " name" ) {
187+ part.name = pa.value ;
188+ } else if (pa.attr == " filename" ) {
189+ part.filename = pa.value ;
190+ }
191+ }
192+ } else if (header.name_is (kphp::http::headers::CONTENT_TYPE)) {
193+ part.content_type = header.value ;
194+ }
195+ } while (!is_crlf ());
196+
197+ skip_crlf ();
198+ part.data = parse_data ();
199+ skip_boundary ();
200+ skip_crlf ();
201+ return part;
202+ }
203+
204+ header MultipartBody::next_header () {
205+ size_t lf{body.find (' \n ' , pos)};
206+ size_t header_end{lf-1 };
207+
208+ if (lf == std::string_view::npos) {
209+ return {};
210+ }
211+
212+ if (body[header_end] == ' \r ' ) {
213+ header_end--;
214+ }
215+
216+ header header{body.substr (pos, header_end-pos+1 )};
217+ pos = lf + 1 ;
218+ return header;
219+ }
220+
221+ std::string_view MultipartBody::parse_data () {
222+ size_t data_start{pos};
223+ size_t data_end{body.find (boundary, data_start)};
224+ pos = data_end;
225+
226+ if (pos == std::string_view::npos) {
227+ return {};
228+ }
229+
230+ if (body[data_end-1 ] != ' -' || body[data_end-2 ] != ' -' ) {
231+ return {};
232+ }
233+ data_end -= 2 ;
234+ if (body[data_end] == ' \n ' ) {
235+ data_end--;
236+ }
237+ if (body[data_end] == ' \r ' ) {
238+ data_end--;
239+ }
240+
241+ if (data_end > data_start) {
242+ return body.substr (data_start, data_end-data_start-1 );
243+ }
244+
245+ return {};
246+
247+ }
248+
249+ void MultipartBody::parse_into (mixed &v$_POST, mixed &v$_FILES) {
250+ while (!end ()) {
251+ part part{next_part ()};
252+ if (part.name .empty ()) {
253+ return ;
254+ }
255+
256+ if (!part.filename .empty ()) {
257+ addFile (part, v$_FILES);
258+ } else {
259+ addPost (part, v$_POST);
260+ }
261+ }
262+ }
263+
264+ void MultipartBody::addPost (const part &part, mixed &v$_POST) {
265+ string name{part.name .data (), static_cast <string::size_type>(part.name .size ())};
266+ v$_POST.set_value (name, string (part.data .data (), part.data .size ()));
267+ }
268+
269+ void MultipartBody::addFile (const part &part, mixed &v$_FILES) {
270+ std::string_view tmp_name{std::tmpnam (nullptr )};
271+ auto file{kphp::fs::file::open (tmp_name, " w" )};
272+ if (!file) [[unlikely]] {
273+ kphp::log::warning (" error opening tmp file: error code -> {}" , file.error ());
274+ return ;
275+ }
276+
277+ int file_size{0 };
278+ auto file_size_result = (*file).write ({reinterpret_cast <const std::byte*>(part.data .data ()), part.data .size ()});
279+ if (file_size_result.has_value ()) {
280+ file_size = file_size_result.value ();
281+ if (file_size < part.data .size ()) {
282+ kphp::log::warning (" error write to tmp file: wrote {} bytes insted of {}" , file_size, part.data .size ());
283+ return ;
284+ }
285+ } else {
286+ kphp::log::warning (" error write to tmp file: errcode {}" , file_size_result.error ());
287+ }
288+
289+ string name{part.name .data (), static_cast <string::size_type>(part.name .size ())};
290+
291+ if (part.name .ends_with (" []" )) {
292+ mixed& file = v$_FILES[name.substr (0 , name.size () - 2 )];
293+ if (file_size == part.data .size ()) {
294+ file[string (" name" )].push_back (string (part.filename .data (), part.filename .size ()));
295+ file[string (" type" )].push_back (string (part.content_type .data (), part.content_type .size ()));
296+ file[string (" size" )].push_back (file_size);
297+ file[string (" tmp_name" )].push_back (string (tmp_name.data (), tmp_name.size ()));
298+ file[string (" error" )].push_back (0 );
299+ } else {
300+ file[string (" name" )].push_back (string ());
301+ file[string (" type" )].push_back (string ());
302+ file[string (" size" )].push_back (0 );
303+ file[string (" tmp_name" )].push_back (string ());
304+ file[string (" error" )].push_back (-file_size);
305+ }
306+ } else {
307+ mixed& file = v$_FILES[name];
308+ if (file_size == part.data .size ()) {
309+ file.set_value (string (" name" ), string (part.filename .data (), part.filename .size ()));
310+ file.set_value (string (" type" ), string (part.content_type .data (), part.content_type .size ()));
311+ file.set_value (string (" size" ), file_size);
312+ file.set_value (string (" tmp_name" ), string (tmp_name.data (), tmp_name.size ()));
313+ file.set_value (string (" error" ), 0 );
314+ } else {
315+ file.set_value (string (" size" ), 0 );
316+ file.set_value (string (" tmp_name" ), string ());
317+ file.set_value (string (" error" ), -file_size);
318+ }
319+ }
320+ }
321+
322+ } // namespace
323+
324+ namespace kphp ::http {
325+
326+ void parse_multipart (const std::string_view body, const std::string_view boundary, mixed &v$_POST, mixed &v$_FILES) {
327+ MultipartBody mb{body, boundary};
328+ mb.parse_into (v$_POST, v$_FILES);
329+ }
330+
331+ std::string_view parse_boundary (const std::string_view content_type) {
332+ size_t pos{content_type.find (MULTIPART_BOUNDARY_EQ)};
333+ if (pos == std::string_view::npos) {
334+ return {};
335+ }
336+ std::string_view res = content_type.substr (pos + MULTIPART_BOUNDARY_EQ.size ());
337+ if (res.size () >= 2 && res.starts_with (' "' ) && res.ends_with (' "' )) {
338+ res = res.substr (1 , res.size ()-2 );
339+ }
340+ return res;
341+ }
342+
343+ } // namespace kphp::http
0 commit comments