From 07f25d28334b45730fb4b6f79f36ba1f3bb5bb82 Mon Sep 17 00:00:00 2001 From: yhirose Date: Sat, 5 Jul 2025 18:56:22 -0400 Subject: [PATCH] Fix #2082 --- README.md | 47 ++++++++++++++ httplib.h | 161 ++++++++++++++++++++++++++++++++++------------ test/test.cc | 177 +++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 334 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index cd933beb73..9f5a893d7a 100644 --- a/README.md +++ b/README.md @@ -881,6 +881,26 @@ res->status; // 200 cli.set_interface("eth0"); // Interface name, IP address or host name ``` +### Automatic Path Encoding + +The client automatically encodes special characters in URL paths by default: + +```cpp +httplib::Client cli("https://example.com"); + +// Automatic path encoding (default behavior) +cli.set_path_encode(true); +auto res = cli.Get("/path with spaces/file.txt"); // Automatically encodes spaces + +// Disable automatic path encoding +cli.set_path_encode(false); +auto res = cli.Get("/already%20encoded/path"); // Use pre-encoded paths +``` + +- `set_path_encode(bool on)` - Controls automatic encoding of special characters in URL paths + - `true` (default): Automatically encodes spaces, plus signs, newlines, and other special characters + - `false`: Sends paths as-is without encoding (useful for pre-encoded URLs) + Compression ----------- @@ -957,6 +977,33 @@ cli.set_address_family(AF_UNIX); "my-socket.sock" can be a relative path or an absolute path. Your application must have the appropriate permissions for the path. You can also use an abstract socket address on Linux. To use an abstract socket address, prepend a null byte ('\x00') to the path. +URI Encoding/Decoding Utilities +------------------------------- + +cpp-httplib provides utility functions for URI encoding and decoding: + +```cpp +#include + +std::string url = "https://example.com/search?q=hello world"; +std::string encoded = httplib::encode_uri(url); +std::string decoded = httplib::decode_uri(encoded); + +std::string param = "hello world"; +std::string encoded_component = httplib::encode_uri_component(param); +std::string decoded_component = httplib::decode_uri_component(encoded_component); +``` + +### Functions + +- `encode_uri(const std::string &value)` - Encodes a full URI, preserving reserved characters like `://`, `?`, `&`, `=` +- `decode_uri(const std::string &value)` - Decodes a URI-encoded string +- `encode_uri_component(const std::string &value)` - Encodes a URI component (query parameter, path segment), encoding all reserved characters +- `decode_uri_component(const std::string &value)` - Decodes a URI component + +Use `encode_uri()` for full URLs and `encode_uri_component()` for individual query parameters or path segments. + + Split httplib.h into .h and .cc ------------------------------- diff --git a/httplib.h b/httplib.h index c2d986082c..412b7a7270 100644 --- a/httplib.h +++ b/httplib.h @@ -1442,7 +1442,7 @@ class ClientImpl { void set_keep_alive(bool on); void set_follow_location(bool on); - void set_url_encode(bool on); + void set_path_encode(bool on); void set_compress(bool on); @@ -1554,7 +1554,7 @@ class ClientImpl { bool keep_alive_ = false; bool follow_location_ = false; - bool url_encode_ = true; + bool path_encode_ = true; int address_family_ = AF_UNSPEC; bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY; @@ -1792,6 +1792,7 @@ class Client { void set_keep_alive(bool on); void set_follow_location(bool on); + void set_path_encode(bool on); void set_url_encode(bool on); void set_compress(bool on); @@ -2246,6 +2247,16 @@ std::string hosted_at(const std::string &hostname); void hosted_at(const std::string &hostname, std::vector &addrs); +std::string encode_uri_component(const std::string &value); + +std::string encode_uri(const std::string &value); + +std::string decode_uri_component(const std::string &value); + +std::string decode_uri(const std::string &value); + +std::string encode_query_param(const std::string &value); + std::string append_query_params(const std::string &path, const Params ¶ms); std::pair make_range_header(const Ranges &ranges); @@ -2287,9 +2298,7 @@ struct FileStat { int ret_ = -1; }; -std::string encode_query_param(const std::string &value); - -std::string decode_url(const std::string &s, bool convert_plus_to_space); +std::string decode_path(const std::string &s, bool convert_plus_to_space); std::string trim_copy(const std::string &s); @@ -2759,28 +2768,7 @@ inline bool FileStat::is_dir() const { return ret_ >= 0 && S_ISDIR(st_.st_mode); } -inline std::string encode_query_param(const std::string &value) { - std::ostringstream escaped; - escaped.fill('0'); - escaped << std::hex; - - for (auto c : value) { - if (std::isalnum(static_cast(c)) || c == '-' || c == '_' || - c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || - c == ')') { - escaped << c; - } else { - escaped << std::uppercase; - escaped << '%' << std::setw(2) - << static_cast(static_cast(c)); - escaped << std::nouppercase; - } - } - - return escaped.str(); -} - -inline std::string encode_url(const std::string &s) { +inline std::string encode_path(const std::string &s) { std::string result; result.reserve(s.size()); @@ -2812,8 +2800,8 @@ inline std::string encode_url(const std::string &s) { return result; } -inline std::string decode_url(const std::string &s, - bool convert_plus_to_space) { +inline std::string decode_path(const std::string &s, + bool convert_plus_to_space) { std::string result; for (size_t i = 0; i < s.size(); i++) { @@ -4537,7 +4525,7 @@ inline bool parse_header(const char *beg, const char *end, T fn) { case_ignore::equal(key, "Referer")) { fn(key, val); } else { - fn(key, decode_url(val, false)); + fn(key, decode_path(val, false)); } return true; @@ -5102,7 +5090,7 @@ inline std::string params_to_query_str(const Params ¶ms) { if (it != params.begin()) { query += "&"; } query += it->first; query += "="; - query += encode_query_param(it->second); + query += httplib::encode_uri_component(it->second); } return query; } @@ -5125,7 +5113,7 @@ inline void parse_query_text(const char *data, std::size_t size, }); if (!key.empty()) { - params.emplace(decode_url(key, true), decode_url(val, true)); + params.emplace(decode_path(key, true), decode_path(val, true)); } }); } @@ -5435,7 +5423,7 @@ class MultipartFormDataParser { std::smatch m2; if (std::regex_match(it->second, m2, re_rfc5987_encoding)) { - file_.filename = decode_url(m2[1], false); // override... + file_.filename = decode_path(m2[1], false); // override... } else { is_valid_ = false; return false; @@ -6258,6 +6246,94 @@ inline void hosted_at(const std::string &hostname, } } +inline std::string encode_uri_component(const std::string &value) { + std::ostringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (auto c : value) { + if (std::isalnum(static_cast(c)) || c == '-' || c == '_' || + c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || + c == ')') { + escaped << c; + } else { + escaped << std::uppercase; + escaped << '%' << std::setw(2) + << static_cast(static_cast(c)); + escaped << std::nouppercase; + } + } + + return escaped.str(); +} + +inline std::string encode_uri(const std::string &value) { + std::ostringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (auto c : value) { + if (std::isalnum(static_cast(c)) || c == '-' || c == '_' || + c == '.' || c == '!' || c == '~' || c == '*' || c == '\'' || c == '(' || + c == ')' || c == ';' || c == '/' || c == '?' || c == ':' || c == '@' || + c == '&' || c == '=' || c == '+' || c == '$' || c == ',' || c == '#') { + escaped << c; + } else { + escaped << std::uppercase; + escaped << '%' << std::setw(2) + << static_cast(static_cast(c)); + escaped << std::nouppercase; + } + } + + return escaped.str(); +} + +inline std::string decode_uri_component(const std::string &value) { + std::string result; + + for (size_t i = 0; i < value.size(); i++) { + if (value[i] == '%' && i + 2 < value.size()) { + auto val = 0; + if (detail::from_hex_to_i(value, i + 1, 2, val)) { + result += static_cast(val); + i += 2; + } else { + result += value[i]; + } + } else { + result += value[i]; + } + } + + return result; +} + +inline std::string decode_uri(const std::string &value) { + std::string result; + + for (size_t i = 0; i < value.size(); i++) { + if (value[i] == '%' && i + 2 < value.size()) { + auto val = 0; + if (detail::from_hex_to_i(value, i + 1, 2, val)) { + result += static_cast(val); + i += 2; + } else { + result += value[i]; + } + } else { + result += value[i]; + } + } + + return result; +} + +[[deprecated("Use encode_uri_component instead")]] +inline std::string encode_query_param(const std::string &value) { + return encode_uri_component(value); +} + inline std::string append_query_params(const std::string &path, const Params ¶ms) { std::string path_with_query = path; @@ -7063,7 +7139,7 @@ inline bool Server::parse_request_line(const char *s, Request &req) const { detail::divide(req.target, '?', [&](const char *lhs_data, std::size_t lhs_size, const char *rhs_data, std::size_t rhs_size) { - req.path = detail::decode_url( + req.path = detail::decode_path( std::string(lhs_data, lhs_size), false); detail::parse_query_text(rhs_data, rhs_size, req.params); }); @@ -7958,7 +8034,7 @@ inline void ClientImpl::copy_settings(const ClientImpl &rhs) { #endif keep_alive_ = rhs.keep_alive_; follow_location_ = rhs.follow_location_; - url_encode_ = rhs.url_encode_; + path_encode_ = rhs.path_encode_; address_family_ = rhs.address_family_; tcp_nodelay_ = rhs.tcp_nodelay_; ipv6_v6only_ = rhs.ipv6_v6only_; @@ -8323,7 +8399,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { if (next_host.empty()) { next_host = host_; } if (next_path.empty()) { next_path = "/"; } - auto path = detail::decode_url(next_path, true) + next_query; + auto path = detail::decode_path(next_path, true) + next_query; // Same host redirect - use current client if (next_scheme == scheme && next_host == host_ && next_port == port_) { @@ -8418,7 +8494,7 @@ inline void ClientImpl::setup_redirect_client(ClientType &client) { client.set_keep_alive(keep_alive_); client.set_follow_location( true); // Enable redirects to handle multi-step redirects - client.set_url_encode(url_encode_); + client.set_path_encode(path_encode_); client.set_compress(compress_); client.set_decompress(decompress_); @@ -8612,7 +8688,7 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req, : append_query_params(req.path, req.params); const auto &path = - url_encode_ ? detail::encode_url(path_with_query) : path_with_query; + path_encode_ ? detail::encode_path(path_with_query) : path_with_query; detail::write_request_line(bstrm, req.method, path); @@ -9658,7 +9734,7 @@ inline void ClientImpl::set_keep_alive(bool on) { keep_alive_ = on; } inline void ClientImpl::set_follow_location(bool on) { follow_location_ = on; } -inline void ClientImpl::set_url_encode(bool on) { url_encode_ = on; } +inline void ClientImpl::set_path_encode(bool on) { path_encode_ = on; } inline void ClientImpl::set_hostname_addr_map(std::map addr_map) { @@ -11134,7 +11210,12 @@ inline void Client::set_follow_location(bool on) { cli_->set_follow_location(on); } -inline void Client::set_url_encode(bool on) { cli_->set_url_encode(on); } +inline void Client::set_path_encode(bool on) { cli_->set_path_encode(on); } + +[[deprecated("Use set_path_encode instead")]] +inline void Client::set_url_encode(bool on) { + cli_->set_path_encode(on); +} inline void Client::set_compress(bool on) { cli_->set_compress(on); } diff --git a/test/test.cc b/test/test.cc index 34a875d390..ad2280907f 100644 --- a/test/test.cc +++ b/test/test.cc @@ -258,33 +258,33 @@ TEST(StartupTest, WSAStartup) { } #endif -TEST(DecodeURLTest, PercentCharacter) { +TEST(DecodePathTest, PercentCharacter) { EXPECT_EQ( - detail::decode_url( + detail::decode_path( R"(descrip=Gastos%20%C3%A1%C3%A9%C3%AD%C3%B3%C3%BA%C3%B1%C3%91%206)", false), u8"descrip=Gastos áéíóúñÑ 6"); } -TEST(DecodeURLTest, PercentCharacterNUL) { +TEST(DecodePathTest, PercentCharacterNUL) { string expected; expected.push_back('x'); expected.push_back('\0'); expected.push_back('x'); - EXPECT_EQ(detail::decode_url("x%00x", false), expected); + EXPECT_EQ(detail::decode_path("x%00x", false), expected); } TEST(EncodeQueryParamTest, ParseUnescapedChararactersTest) { string unescapedCharacters = "-_.!~*'()"; - EXPECT_EQ(detail::encode_query_param(unescapedCharacters), "-_.!~*'()"); + EXPECT_EQ(httplib::encode_uri_component(unescapedCharacters), "-_.!~*'()"); } TEST(EncodeQueryParamTest, ParseReservedCharactersTest) { string reservedCharacters = ";,/?:@&=+$"; - EXPECT_EQ(detail::encode_query_param(reservedCharacters), + EXPECT_EQ(httplib::encode_uri_component(reservedCharacters), "%3B%2C%2F%3F%3A%40%26%3D%2B%24"); } @@ -293,13 +293,168 @@ TEST(EncodeQueryParamTest, TestUTF8Characters) { string russianCharacters = u8"дом"; string brazilianCharacters = u8"óculos"; - EXPECT_EQ(detail::encode_query_param(chineseCharacters), + EXPECT_EQ(httplib::encode_uri_component(chineseCharacters), "%E4%B8%AD%E5%9B%BD%E8%AA%9E"); - EXPECT_EQ(detail::encode_query_param(russianCharacters), + EXPECT_EQ(httplib::encode_uri_component(russianCharacters), "%D0%B4%D0%BE%D0%BC"); - EXPECT_EQ(detail::encode_query_param(brazilianCharacters), "%C3%B3culos"); + EXPECT_EQ(httplib::encode_uri_component(brazilianCharacters), "%C3%B3culos"); +} + +TEST(EncodeUriComponentTest, ParseUnescapedChararactersTest) { + string unescapedCharacters = "-_.!~*'()"; + + EXPECT_EQ(httplib::encode_uri_component(unescapedCharacters), "-_.!~*'()"); +} + +TEST(EncodeUriComponentTest, ParseReservedCharactersTest) { + string reservedCharacters = ";,/?:@&=+$"; + + EXPECT_EQ(httplib::encode_uri_component(reservedCharacters), + "%3B%2C%2F%3F%3A%40%26%3D%2B%24"); +} + +TEST(EncodeUriComponentTest, TestUTF8Characters) { + string chineseCharacters = u8"中国語"; + string russianCharacters = u8"дом"; + string brazilianCharacters = u8"óculos"; + + EXPECT_EQ(httplib::encode_uri_component(chineseCharacters), + "%E4%B8%AD%E5%9B%BD%E8%AA%9E"); + + EXPECT_EQ(httplib::encode_uri_component(russianCharacters), + "%D0%B4%D0%BE%D0%BC"); + + EXPECT_EQ(httplib::encode_uri_component(brazilianCharacters), "%C3%B3culos"); +} + +TEST(EncodeUriComponentTest, TestPathComponentEncoding) { + // Issue #2082 use case: encoding path component with ampersand + string pathWithAmpersand = "Piri Tommy Villiers - on & on"; + + EXPECT_EQ(httplib::encode_uri_component(pathWithAmpersand), + "Piri%20Tommy%20Villiers%20-%20on%20%26%20on"); +} + +TEST(EncodeUriTest, ParseUnescapedChararactersTest) { + string unescapedCharacters = "-_.!~*'()"; + + EXPECT_EQ(httplib::encode_uri(unescapedCharacters), "-_.!~*'()"); +} + +TEST(EncodeUriTest, ParseReservedCharactersTest) { + string reservedCharacters = ";,/?:@&=+$#"; + + EXPECT_EQ(httplib::encode_uri(reservedCharacters), ";,/?:@&=+$#"); +} + +TEST(EncodeUriTest, TestUTF8Characters) { + string chineseCharacters = u8"中国語"; + string russianCharacters = u8"дом"; + string brazilianCharacters = u8"óculos"; + + EXPECT_EQ(httplib::encode_uri(chineseCharacters), + "%E4%B8%AD%E5%9B%BD%E8%AA%9E"); + + EXPECT_EQ(httplib::encode_uri(russianCharacters), "%D0%B4%D0%BE%D0%BC"); + + EXPECT_EQ(httplib::encode_uri(brazilianCharacters), "%C3%B3culos"); +} + +TEST(EncodeUriTest, TestCompleteUri) { + string uri = + "https://example.com/path/to/resource?query=value¶m=test#fragment"; + + EXPECT_EQ( + httplib::encode_uri(uri), + "https://example.com/path/to/resource?query=value¶m=test#fragment"); +} + +TEST(EncodeUriTest, TestUriWithSpacesAndSpecialChars) { + string uri = + "https://example.com/path with spaces/file name.html?q=hello world"; + + EXPECT_EQ(httplib::encode_uri(uri), + "https://example.com/path%20with%20spaces/" + "file%20name.html?q=hello%20world"); +} + +TEST(DecodeUriComponentTest, ParseEncodedChararactersTest) { + string encodedString = "%3B%2C%2F%3F%3A%40%26%3D%2B%24"; + + EXPECT_EQ(httplib::decode_uri_component(encodedString), ";,/?:@&=+$"); +} + +TEST(DecodeUriComponentTest, ParseUnescapedChararactersTest) { + string unescapedCharacters = "-_.!~*'()"; + + EXPECT_EQ(httplib::decode_uri_component(unescapedCharacters), "-_.!~*'()"); +} + +TEST(DecodeUriComponentTest, TestUTF8Characters) { + string encodedChinese = "%E4%B8%AD%E5%9B%BD%E8%AA%9E"; + string encodedRussian = "%D0%B4%D0%BE%D0%BC"; + string encodedBrazilian = "%C3%B3culos"; + + EXPECT_EQ(httplib::decode_uri_component(encodedChinese), u8"中国語"); + EXPECT_EQ(httplib::decode_uri_component(encodedRussian), u8"дом"); + EXPECT_EQ(httplib::decode_uri_component(encodedBrazilian), u8"óculos"); +} + +TEST(DecodeUriComponentTest, TestPathComponentDecoding) { + string encodedPath = "Piri%20Tommy%20Villiers%20-%20on%20%26%20on"; + + EXPECT_EQ(httplib::decode_uri_component(encodedPath), + "Piri Tommy Villiers - on & on"); +} + +TEST(DecodeUriTest, ParseEncodedChararactersTest) { + string encodedString = "%20%22%3C%3E%5C%5E%60%7B%7D%7C"; + + EXPECT_EQ(httplib::decode_uri(encodedString), " \"<>\\^`{}|"); +} + +TEST(DecodeUriTest, ParseUnescapedChararactersTest) { + string unescapedCharacters = "-_.!~*'();,/?:@&=+$#"; + + EXPECT_EQ(httplib::decode_uri(unescapedCharacters), "-_.!~*'();,/?:@&=+$#"); +} + +TEST(DecodeUriTest, TestUTF8Characters) { + string encodedChinese = "%E4%B8%AD%E5%9B%BD%E8%AA%9E"; + string encodedRussian = "%D0%B4%D0%BE%D0%BC"; + string encodedBrazilian = "%C3%B3culos"; + + EXPECT_EQ(httplib::decode_uri(encodedChinese), u8"中国語"); + EXPECT_EQ(httplib::decode_uri(encodedRussian), u8"дом"); + EXPECT_EQ(httplib::decode_uri(encodedBrazilian), u8"óculos"); +} + +TEST(DecodeUriTest, TestCompleteUri) { + string encodedUri = "https://example.com/path%20with%20spaces/" + "file%20name.html?q=hello%20world"; + + EXPECT_EQ( + httplib::decode_uri(encodedUri), + "https://example.com/path with spaces/file name.html?q=hello world"); +} + +TEST(DecodeUriTest, TestRoundTripWithEncodeUri) { + string original = + "https://example.com/path with spaces/file name.html?q=hello world"; + string encoded = httplib::encode_uri(original); + string decoded = httplib::decode_uri(encoded); + + EXPECT_EQ(decoded, original); +} + +TEST(DecodeUriComponentTest, TestRoundTripWithEncodeUriComponent) { + string original = "Piri Tommy Villiers - on & on"; + string encoded = httplib::encode_uri_component(original); + string decoded = httplib::decode_uri_component(encoded); + + EXPECT_EQ(decoded, original); } TEST(TrimTests, TrimStringTests) { @@ -2116,7 +2271,7 @@ TEST(PathUrlEncodeTest, PathUrlEncode) { { Client cli(HOST, PORT); - cli.set_url_encode(false); + cli.set_path_encode(false); auto res = cli.Get("/foo?a=explicitly+encoded"); ASSERT_TRUE(res); @@ -2146,7 +2301,7 @@ TEST(PathUrlEncodeTest, IncludePercentEncodingLF) { { Client cli(HOST, PORT); - cli.set_url_encode(false); + cli.set_path_encode(false); auto res = cli.Get("/?something=%0A"); ASSERT_TRUE(res);