Skip to content

Commit 0afe6e4

Browse files
[libc] Scanf shouldn't match just "0x" for hex int (#112440)
Scanf parsing reads the longest possibly valid prefix for a given conversion. Then, it performs the conversion on that string. In the case of "0xZ" with a hex conversion (either "%x" or "%i") the longest possibly valid prefix is "0x", which makes it the "input item" (per the standard). The sequence "0x" is not a "matching sequence" for a hex conversion, meaning it results in a matching failure, and parsing ends. This is because to know that there's no valid digit after "0x" it reads the 'Z', but it can only put back one character (the 'Z') leaving it with consuming an invalid sequence. (inspired by a thread on the libc-coord mailing list: https://www.openwall.com/lists/libc-coord/2024/10/15/1, see 7.32.6.2 in the standard for more details.)
1 parent b35b583 commit 0afe6e4

File tree

2 files changed

+42
-14
lines changed

2 files changed

+42
-14
lines changed

libc/src/stdio/scanf_core/int_converter.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,24 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
124124

125125
if (to_lower(cur_char) == 'x') {
126126
// This is a valid hex prefix.
127+
128+
is_number = false;
129+
// A valid hex prefix is not necessarily a valid number. For the
130+
// conversion to be valid it needs to use all of the characters it
131+
// consumes. From the standard:
132+
// 7.23.6.2 paragraph 9: "An input item is defined as the longest
133+
// sequence of input characters which does not exceed any specified
134+
// field width and which is, or is a prefix of, a matching input
135+
// sequence."
136+
// 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
137+
// the execution of the directive fails: this condition is a matching
138+
// failure"
127139
base = 16;
128140
if (max_width > 1) {
129141
--max_width;
130142
cur_char = reader->getc();
131143
} else {
132-
write_int_with_length(0, to_conv);
133-
return READ_OK;
144+
return MATCHING_FAILURE;
134145
}
135146

136147
} else {
@@ -198,6 +209,9 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
198209
// last one back.
199210
reader->ungetc(cur_char);
200211

212+
if (!is_number)
213+
return MATCHING_FAILURE;
214+
201215
if (has_overflow) {
202216
write_int_with_length(MAX, to_conv);
203217
} else {
@@ -207,8 +221,6 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
207221
write_int_with_length(result, to_conv);
208222
}
209223

210-
if (!is_number)
211-
return MATCHING_FAILURE;
212224
return READ_OK;
213225
}
214226

libc/test/src/stdio/sscanf_test.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -177,13 +177,25 @@ TEST(LlvmLibcSScanfTest, IntConvMaxLengthTests) {
177177
EXPECT_EQ(ret_val, 1);
178178
EXPECT_EQ(result, 0);
179179

180+
result = -999;
181+
182+
// 0x is a valid prefix, but not a valid number. This should be a matching
183+
// failure and should not modify the values.
180184
ret_val = LIBC_NAMESPACE::sscanf("0x1", "%2i", &result);
181-
EXPECT_EQ(ret_val, 1);
182-
EXPECT_EQ(result, 0);
185+
EXPECT_EQ(ret_val, 0);
186+
EXPECT_EQ(result, -999);
183187

184188
ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%3i", &result);
189+
EXPECT_EQ(ret_val, 0);
190+
EXPECT_EQ(result, -999);
191+
192+
ret_val = LIBC_NAMESPACE::sscanf("0x1", "%3i", &result);
185193
EXPECT_EQ(ret_val, 1);
186-
EXPECT_EQ(result, 0);
194+
EXPECT_EQ(result, 1);
195+
196+
ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%4i", &result);
197+
EXPECT_EQ(ret_val, 1);
198+
EXPECT_EQ(result, -1);
187199

188200
ret_val = LIBC_NAMESPACE::sscanf("-0x123", "%4i", &result);
189201
EXPECT_EQ(ret_val, 1);
@@ -212,7 +224,7 @@ TEST(LlvmLibcSScanfTest, IntConvNoWriteTests) {
212224
EXPECT_EQ(result, 0);
213225

214226
ret_val = LIBC_NAMESPACE::sscanf("0x1", "%*2i", &result);
215-
EXPECT_EQ(ret_val, 1);
227+
EXPECT_EQ(ret_val, 0);
216228
EXPECT_EQ(result, 0);
217229

218230
ret_val = LIBC_NAMESPACE::sscanf("a", "%*i", &result);
@@ -679,13 +691,17 @@ TEST(LlvmLibcSScanfTest, CombinedConv) {
679691
EXPECT_EQ(result, 123);
680692
ASSERT_STREQ(buffer, "abc");
681693

694+
result = -1;
695+
696+
// 0x is a valid prefix, but not a valid number. This should be a matching
697+
// failure and should not modify the values.
682698
ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%i%s", &result, buffer);
683-
EXPECT_EQ(ret_val, 2);
684-
EXPECT_EQ(result, 0);
685-
ASSERT_STREQ(buffer, "ZZZ");
699+
EXPECT_EQ(ret_val, 0);
700+
EXPECT_EQ(result, -1);
701+
ASSERT_STREQ(buffer, "abc");
686702

687703
ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%X%s", &result, buffer);
688-
EXPECT_EQ(ret_val, 2);
689-
EXPECT_EQ(result, 0);
690-
ASSERT_STREQ(buffer, "ZZZ");
704+
EXPECT_EQ(ret_val, 0);
705+
EXPECT_EQ(result, -1);
706+
ASSERT_STREQ(buffer, "abc");
691707
}

0 commit comments

Comments
 (0)