Skip to content

Commit 49a199b

Browse files
committed
torcontrol: Handle escapes in Tor QuotedStrings
https://trac.torproject.org/projects/tor/ticket/14999 is tracking an encoding bug with the Tor control protocol, where many of the QuotedString instances that Tor outputs are in fact CStrings, but it is not documented which ones are which. https://spec.torproject.org/control-spec section 2.1.1 provides a future-proofed rule for handing QuotedStrings, which this commit implements. This commit merges all six commits from zcash/zcash#2251
1 parent 0182a11 commit 49a199b

File tree

2 files changed

+87
-11
lines changed

2 files changed

+87
-11
lines changed

src/test/torcontrol_tests.cpp

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,29 +119,60 @@ BOOST_AUTO_TEST_CASE(util_ParseTorReplyMapping)
119119
{"Foo", "Bar Baz"},
120120
});
121121

122-
// Escapes (which are left escaped by the parser)
122+
// Escapes
123123
CheckParseTorReplyMapping(
124124
"Foo=\"Bar\\ Baz\"", {
125-
{"Foo", "Bar\\ Baz"},
125+
{"Foo", "Bar Baz"},
126126
});
127127
CheckParseTorReplyMapping(
128128
"Foo=\"Bar\\Baz\"", {
129-
{"Foo", "Bar\\Baz"},
129+
{"Foo", "BarBaz"},
130130
});
131131
CheckParseTorReplyMapping(
132132
"Foo=\"Bar\\@Baz\"", {
133-
{"Foo", "Bar\\@Baz"},
133+
{"Foo", "Bar@Baz"},
134134
});
135135
CheckParseTorReplyMapping(
136136
"Foo=\"Bar\\\"Baz\" Spam=\"\\\"Eggs\\\"\"", {
137-
{"Foo", "Bar\\\"Baz"},
138-
{"Spam", "\\\"Eggs\\\""},
137+
{"Foo", "Bar\"Baz"},
138+
{"Spam", "\"Eggs\""},
139139
});
140140
CheckParseTorReplyMapping(
141141
"Foo=\"Bar\\\\Baz\"", {
142-
{"Foo", "Bar\\\\Baz"},
142+
{"Foo", "Bar\\Baz"},
143143
});
144144

145+
// C escapes
146+
CheckParseTorReplyMapping(
147+
"Foo=\"Bar\\nBaz\\t\" Spam=\"\\rEggs\" Octals=\"\\1a\\11\\17\\18\\81\\377\\378\\400\\2222\" Final=Check", {
148+
{"Foo", "Bar\nBaz\t"},
149+
{"Spam", "\rEggs"},
150+
{"Octals", "\1a\11\17\1" "881\377\37" "8\40" "0\222" "2"},
151+
{"Final", "Check"},
152+
});
153+
CheckParseTorReplyMapping(
154+
"Valid=Mapping Escaped=\"Escape\\\\\"", {
155+
{"Valid", "Mapping"},
156+
{"Escaped", "Escape\\"},
157+
});
158+
CheckParseTorReplyMapping(
159+
"Valid=Mapping Bare=\"Escape\\\"", {});
160+
CheckParseTorReplyMapping(
161+
"OneOctal=\"OneEnd\\1\" TwoOctal=\"TwoEnd\\11\"", {
162+
{"OneOctal", "OneEnd\1"},
163+
{"TwoOctal", "TwoEnd\11"},
164+
});
165+
166+
// Special handling for null case
167+
// (needed because string comparison reads the null as end-of-string)
168+
BOOST_TEST_MESSAGE(std::string("CheckParseTorReplyMapping(Null=\"\\0\")"));
169+
auto ret = ParseTorReplyMapping("Null=\"\\0\"");
170+
BOOST_CHECK_EQUAL(ret.size(), 1);
171+
auto r_it = ret.begin();
172+
BOOST_CHECK_EQUAL(r_it->first, "Null");
173+
BOOST_CHECK_EQUAL(r_it->second.size(), 1);
174+
BOOST_CHECK_EQUAL(r_it->second[0], '\0');
175+
145176
// A more complex valid grammar. PROTOCOLINFO accepts a VersionLine that
146177
// takes a key=value pair followed by an OptArguments, making this valid.
147178
// Because an OptArguments contains no semantic data, there is no point in

src/torcontrol.cpp

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
// Copyright (c) 2015-2016 The Bitcoin Core developers
2+
// Copyright (c) 2017 The Zcash developers
23
// Distributed under the MIT software license, see the accompanying
34
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
45

@@ -291,17 +292,61 @@ static std::map<std::string,std::string> ParseTorReplyMapping(const std::string
291292
++ptr; // skip opening '"'
292293
bool escape_next = false;
293294
while (ptr < s.size() && (escape_next || s[ptr] != '"')) {
294-
escape_next = (s[ptr] == '\\');
295+
// Repeated backslashes must be interpreted as pairs
296+
escape_next = (s[ptr] == '\\' && !escape_next);
295297
value.push_back(s[ptr]);
296298
++ptr;
297299
}
298300
if (ptr == s.size()) // unexpected end of line
299301
return std::map<std::string,std::string>();
300302
++ptr; // skip closing '"'
301-
/* TODO: unescape value - according to the spec this depends on the
302-
* context, some strings use C-LogPrintf style escape codes, some
303-
* don't. So may be better handled at the call site.
303+
/**
304+
* Unescape value. Per https://spec.torproject.org/control-spec section 2.1.1:
305+
*
306+
* For future-proofing, controller implementors MAY use the following
307+
* rules to be compatible with buggy Tor implementations and with
308+
* future ones that implement the spec as intended:
309+
*
310+
* Read \n \t \r and \0 ... \377 as C escapes.
311+
* Treat a backslash followed by any other character as that character.
304312
*/
313+
std::string escaped_value;
314+
for (size_t i = 0; i < value.size(); ++i) {
315+
if (value[i] == '\\') {
316+
// This will always be valid, because if the QuotedString
317+
// ended in an odd number of backslashes, then the parser
318+
// would already have returned above, due to a missing
319+
// terminating double-quote.
320+
++i;
321+
if (value[i] == 'n') {
322+
escaped_value.push_back('\n');
323+
} else if (value[i] == 't') {
324+
escaped_value.push_back('\t');
325+
} else if (value[i] == 'r') {
326+
escaped_value.push_back('\r');
327+
} else if ('0' <= value[i] && value[i] <= '7') {
328+
size_t j;
329+
// Octal escape sequences have a limit of three octal digits,
330+
// but terminate at the first character that is not a valid
331+
// octal digit if encountered sooner.
332+
for (j = 1; j < 3 && (i+j) < value.size() && '0' <= value[i+j] && value[i+j] <= '7'; ++j) {}
333+
// Tor restricts first digit to 0-3 for three-digit octals.
334+
// A leading digit of 4-7 would therefore be interpreted as
335+
// a two-digit octal.
336+
if (j == 3 && value[i] > '3') {
337+
j--;
338+
}
339+
escaped_value.push_back(strtol(value.substr(i, j).c_str(), NULL, 8));
340+
// Account for automatic incrementing at loop end
341+
i += j - 1;
342+
} else {
343+
escaped_value.push_back(value[i]);
344+
}
345+
} else {
346+
escaped_value.push_back(value[i]);
347+
}
348+
}
349+
value = escaped_value;
305350
} else { // Unquoted value. Note that values can contain '=' at will, just no spaces
306351
while (ptr < s.size() && s[ptr] != ' ') {
307352
value.push_back(s[ptr]);

0 commit comments

Comments
 (0)