Skip to content

Commit 4d095f0

Browse files
authored
Merge pull request #45 from euroelessar/fully-decode
Support full decoding of reserved expansions
2 parents b48d8aa + 8d0ff5f commit 4d095f0

File tree

2 files changed

+106
-28
lines changed

2 files changed

+106
-28
lines changed

src/include/grpc_transcoding/path_matcher.h

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ namespace transcoding {
3232
template <class Method>
3333
class PathMatcherBuilder; // required for PathMatcher constructor
3434

35+
enum class UrlUnescapeSpec {
36+
// URL path parameters will not decode RFC 6570 reserved characters.
37+
// This is the default behavior.
38+
kAllCharactersExceptReserved = 0,
39+
// URL path parameters will be fully URI-decoded except in
40+
// cases of single segment matches in reserved expansion, where "%2F" will be
41+
// left encoded.
42+
kAllCharactersExceptSlash,
43+
// URL path parameters will be fully URI-decoded.
44+
kAllCharacters,
45+
};
46+
3547
// The immutable, thread safe PathMatcher stores a mapping from a combination of
3648
// a service (host) name and a HTTP path to your method (MethodInfo*). It is
3749
// constructed with a PathMatcherBuilder and supports one operation: Lookup.
@@ -82,6 +94,7 @@ class PathMatcher {
8294
// The info associated with each method. The path matcher nodes
8395
// will hold pointers to MethodData objects in this vector.
8496
std::vector<std::unique_ptr<MethodData>> methods_;
97+
UrlUnescapeSpec unescape_spec_;
8598

8699
private:
87100
friend class PathMatcherBuilder<Method>;
@@ -113,6 +126,11 @@ class PathMatcherBuilder {
113126
bool Register(const std::string& http_method, const std::string& path,
114127
const std::string& body_field_path, Method method);
115128

129+
// Change unescaping behavior, see UrlUnescapeSpec for available options.
130+
void SetUrlUnescapeSpec(UrlUnescapeSpec unescape_spec) {
131+
unescape_spec_ = unescape_spec;
132+
}
133+
116134
// Returns a unique_ptr to a thread safe PathMatcher that contains all
117135
// registered path-WrapperGraph pairs. Note the PathMatchBuilder instance
118136
// will be moved so cannot use after invoking Build().
@@ -133,6 +151,8 @@ class PathMatcherBuilder {
133151
std::unordered_set<std::string> custom_verbs_;
134152
typedef typename PathMatcher<Method>::MethodData MethodData;
135153
std::vector<std::unique_ptr<MethodData>> methods_;
154+
UrlUnescapeSpec unescape_spec_ =
155+
UrlUnescapeSpec::kAllCharactersExceptReserved;
136156

137157
friend class PathMatcher<Method>;
138158
};
@@ -203,13 +223,24 @@ inline int hex_digit_to_int(char c) {
203223
// If the next three characters are an escaped character then this function will
204224
// also return what character is escaped.
205225
bool GetEscapedChar(const std::string& src, size_t i,
206-
bool unescape_reserved_chars, char* out) {
226+
UrlUnescapeSpec unescape_spec, char* out) {
207227
if (i + 2 < src.size() && src[i] == '%') {
208228
if (ascii_isxdigit(src[i + 1]) && ascii_isxdigit(src[i + 2])) {
209229
char c =
210230
(hex_digit_to_int(src[i + 1]) << 4) | hex_digit_to_int(src[i + 2]);
211-
if (!unescape_reserved_chars && IsReservedChar(c)) {
212-
return false;
231+
switch (unescape_spec) {
232+
case UrlUnescapeSpec::kAllCharactersExceptReserved:
233+
if (IsReservedChar(c)) {
234+
return false;
235+
}
236+
break;
237+
case UrlUnescapeSpec::kAllCharactersExceptSlash:
238+
if (c == '/') {
239+
return false;
240+
}
241+
break;
242+
case UrlUnescapeSpec::kAllCharacters:
243+
break;
213244
}
214245
*out = c;
215246
return true;
@@ -222,13 +253,13 @@ bool GetEscapedChar(const std::string& src, size_t i,
222253
// (as specified in RFC 6570) are not escaped if unescape_reserved_chars is
223254
// false.
224255
std::string UrlUnescapeString(const std::string& part,
225-
bool unescape_reserved_chars) {
256+
UrlUnescapeSpec unescape_spec) {
226257
std::string unescaped;
227258
// Check whether we need to escape at all.
228259
bool needs_unescaping = false;
229260
char ch = '\0';
230261
for (size_t i = 0; i < part.size(); ++i) {
231-
if (GetEscapedChar(part, i, unescape_reserved_chars, &ch)) {
262+
if (GetEscapedChar(part, i, unescape_spec, &ch)) {
232263
needs_unescaping = true;
233264
break;
234265
}
@@ -244,7 +275,7 @@ std::string UrlUnescapeString(const std::string& part,
244275
char* p = begin;
245276

246277
for (size_t i = 0; i < part.size();) {
247-
if (GetEscapedChar(part, i, unescape_reserved_chars, &ch)) {
278+
if (GetEscapedChar(part, i, unescape_spec, &ch)) {
248279
*p++ = ch;
249280
i += 3;
250281
} else {
@@ -260,7 +291,8 @@ std::string UrlUnescapeString(const std::string& part,
260291
template <class VariableBinding>
261292
void ExtractBindingsFromPath(const std::vector<HttpTemplate::Variable>& vars,
262293
const std::vector<std::string>& parts,
263-
std::vector<VariableBinding>* bindings) {
294+
std::vector<VariableBinding>* bindings,
295+
UrlUnescapeSpec unescape_spec) {
264296
for (const auto& var : vars) {
265297
// Determine the subpath bound to the variable based on the
266298
// [start_segment, end_segment) segment range of the variable.
@@ -278,10 +310,13 @@ void ExtractBindingsFromPath(const std::vector<HttpTemplate::Variable>& vars,
278310
// multi-part match by checking if it->second.end_segment is negative.
279311
bool is_multipart =
280312
(end_segment - var.start_segment) > 1 || var.end_segment < 0;
313+
const UrlUnescapeSpec var_unescape_spec =
314+
is_multipart ? unescape_spec : UrlUnescapeSpec::kAllCharacters;
315+
281316
// Joins parts with "/" to form a path string.
282317
for (size_t i = var.start_segment; i < end_segment; ++i) {
283318
// For multipart matches only unescape non-reserved characters.
284-
binding.value += UrlUnescapeString(parts[i], !is_multipart);
319+
binding.value += UrlUnescapeString(parts[i], var_unescape_spec);
285320
if (i < end_segment - 1) {
286321
binding.value += "/";
287322
}
@@ -314,7 +349,8 @@ void ExtractBindingsFromQueryParameters(
314349
// in the request, e.g. `book.author.name`.
315350
VariableBinding binding;
316351
split(name, '.', binding.field_path);
317-
binding.value = UrlUnescapeString(param.substr(pos + 1), true);
352+
binding.value = UrlUnescapeString(param.substr(pos + 1),
353+
UrlUnescapeSpec::kAllCharacters);
318354
bindings->emplace_back(std::move(binding));
319355
}
320356
}
@@ -389,7 +425,8 @@ template <class Method>
389425
PathMatcher<Method>::PathMatcher(PathMatcherBuilder<Method>&& builder)
390426
: root_ptr_(std::move(builder.root_ptr_)),
391427
custom_verbs_(std::move(builder.custom_verbs_)),
392-
methods_(std::move(builder.methods_)) {}
428+
methods_(std::move(builder.methods_)),
429+
unescape_spec_(builder.unescape_spec_) {}
393430

394431
// Lookup is a wrapper method for the recursive node Lookup. First, the wrapper
395432
// splits the request path into slash-separated path parts. Next, the method
@@ -424,7 +461,8 @@ Method PathMatcher<Method>::Lookup(
424461
MethodData* method_data = reinterpret_cast<MethodData*>(lookup_result.data);
425462
if (variable_bindings != nullptr) {
426463
variable_bindings->clear();
427-
ExtractBindingsFromPath(method_data->variables, parts, variable_bindings);
464+
ExtractBindingsFromPath(method_data->variables, parts, variable_bindings,
465+
unescape_spec_);
428466
ExtractBindingsFromQueryParameters(
429467
query_params, method_data->system_query_parameter_names,
430468
variable_bindings);

test/path_matcher_test.cc

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,6 @@ std::string FieldPathToString(const FieldPath& fp) {
6464
return s;
6565
}
6666

67-
} // namespace
68-
6967
std::ostream& operator<<(std::ostream& os, const Binding& b) {
7068
return os << "{ " << FieldPathToString(b.field_path) << "=" << b.value << "}";
7169
}
@@ -77,6 +75,8 @@ std::ostream& operator<<(std::ostream& os, const Bindings& bindings) {
7775
return os;
7876
}
7977

78+
} // namespace
79+
8080
namespace {
8181

8282
class PathMatcherTest : public ::testing::Test {
@@ -116,6 +116,10 @@ class PathMatcherTest : public ::testing::Test {
116116

117117
MethodInfo* AddGetPath(std::string path) { return AddPath("GET", path); }
118118

119+
void SetUrlUnescapeSpec(UrlUnescapeSpec unescape_spec) {
120+
builder_.SetUrlUnescapeSpec(unescape_spec);
121+
}
122+
119123
void Build() { matcher_ = builder_.Build(); }
120124

121125
MethodInfo* LookupWithBodyFieldPath(std::string method, std::string path,
@@ -146,6 +150,29 @@ class PathMatcherTest : public ::testing::Test {
146150
return result;
147151
}
148152

153+
void MultiSegmentMatchWithReservedCharactersBase(
154+
std::string expected_component) {
155+
MethodInfo* a__c = AddGetPath("/a/{x=*}/{y=**}/c");
156+
Build();
157+
158+
EXPECT_NE(nullptr, a__c);
159+
160+
Bindings bindings;
161+
EXPECT_EQ(
162+
Lookup("GET",
163+
"/a/%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D/"
164+
"%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D/c",
165+
&bindings),
166+
a__c);
167+
168+
EXPECT_EQ(
169+
Bindings({// Single-part component is always fully decoded.
170+
Binding{FieldPath{"x"}, "!#$&'()*+,/:;=?@[]"},
171+
// Multi-part component depends on the builder configuration.
172+
Binding{FieldPath{"y"}, expected_component}}),
173+
bindings);
174+
}
175+
149176
private:
150177
PathMatcherBuilder<MethodInfo*> builder_;
151178
PathMatcherPtr<MethodInfo*> matcher_;
@@ -354,24 +381,37 @@ TEST_F(PathMatcherTest, PercentEscapesNotUnescapedForMultiSegment2) {
354381
bindings);
355382
}
356383

357-
TEST_F(PathMatcherTest, OnlyUnreservedCharsAreUnescapedForMultiSegmentMatch) {
358-
MethodInfo* a__c = AddGetPath("/a/{x=**}/c");
359-
Build();
384+
TEST_F(
385+
PathMatcherTest,
386+
OnlyUnreservedCharsAreUnescapedForMultiSegmentMatchUnescapeAllExceptReservedImplicit) {
387+
// All %XX are reserved characters, they should be intact.
388+
MultiSegmentMatchWithReservedCharactersBase(
389+
"%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D");
390+
}
360391

361-
EXPECT_NE(nullptr, a__c);
392+
TEST_F(
393+
PathMatcherTest,
394+
OnlyUnreservedCharsAreUnescapedForMultiSegmentMatchUnescapeAllExceptReservedExplicit) {
395+
SetUrlUnescapeSpec(UrlUnescapeSpec::kAllCharactersExceptReserved);
396+
// Set default value explicitly.
397+
MultiSegmentMatchWithReservedCharactersBase(
398+
"%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D");
399+
}
362400

363-
Bindings bindings;
364-
EXPECT_EQ(
365-
Lookup("GET",
366-
"/a/%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D/c",
367-
&bindings),
368-
a__c);
401+
TEST_F(
402+
PathMatcherTest,
403+
OnlyUnreservedCharsAreUnescapedForMultiSegmentMatchUnescapeAllExceptSlash) {
404+
SetUrlUnescapeSpec(UrlUnescapeSpec::kAllCharactersExceptSlash);
405+
// All %XX are reserved characters, all of them should be decoded except
406+
// slash.
407+
MultiSegmentMatchWithReservedCharactersBase("!#$&'()*+,%2F:;=?@[]");
408+
}
369409

370-
// All %XX are reserved characters, they should be intact.
371-
EXPECT_EQ(Bindings({Binding{
372-
FieldPath{"x"},
373-
"%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D"}}),
374-
bindings);
410+
TEST_F(PathMatcherTest,
411+
OnlyUnreservedCharsAreUnescapedForMultiSegmentMatchUnescapeAll) {
412+
SetUrlUnescapeSpec(UrlUnescapeSpec::kAllCharacters);
413+
// All %XX are reserved characters, they should be decoded.
414+
MultiSegmentMatchWithReservedCharactersBase("!#$&'()*+,/:;=?@[]");
375415
}
376416

377417
TEST_F(PathMatcherTest, VariableBindingsWithCustomVerb) {

0 commit comments

Comments
 (0)