Skip to content

Commit c19c71b

Browse files
authored
[lldb][test] Split out libc++ std::string tests that check corrupted strings (#147252)
As a pre-requisite to combine the libcxx and libstdcxx string formatter tests (see #146740) this patch splits out the libcxx specific parts into a separate test. These are probably best tested with the libcxx-simulator tests. But for now I just moved them.
1 parent 912ab52 commit c19c71b

File tree

5 files changed

+153
-124
lines changed

5 files changed

+153
-124
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CXX_SOURCES := main.cpp
2+
3+
USE_LIBCPP := 1
4+
5+
include Makefile.rules
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""
2+
Test lldb behaves sanely when formatting corrupted `std::string`s.
3+
"""
4+
5+
import lldb
6+
from lldbsuite.test.decorators import *
7+
from lldbsuite.test.lldbtest import *
8+
from lldbsuite.test import lldbutil
9+
10+
11+
class LibcxxInvalidStringDataFormatterTestCase(TestBase):
12+
@add_test_categories(["libc++"])
13+
@skipIf(oslist=[lldbplatformutil.getDarwinOSTriples()], archs=["arm", "aarch64"])
14+
def test(self):
15+
self.build()
16+
17+
(target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
18+
self, "Set break point at this line.", lldb.SBFileSpec("main.cpp")
19+
)
20+
frame = thread.frames[0]
21+
22+
if not self.process().GetAddressByteSize() == 8:
23+
self.skip()
24+
25+
# The test assumes that std::string is in its cap-size-data layout.
26+
self.expect(
27+
"frame variable garbage1", substrs=["garbage1 = Summary Unavailable"]
28+
)
29+
self.expect(
30+
"frame variable garbage2", substrs=[r'garbage2 = "\xfa\xfa\xfa\xfa"']
31+
)
32+
self.expect("frame variable garbage3", substrs=[r'garbage3 = "\xf0\xf0"'])
33+
self.expect(
34+
"frame variable garbage4", substrs=["garbage4 = Summary Unavailable"]
35+
)
36+
self.expect(
37+
"frame variable garbage5", substrs=["garbage5 = Summary Unavailable"]
38+
)
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <stdint.h>
4+
#include <string>
5+
6+
// For more information about libc++'s std::string ABI, see:
7+
//
8+
// https://joellaity.com/2020/01/31/string.html
9+
10+
// A corrupt string which hits the SSO code path, but has an invalid size.
11+
static struct {
12+
#if _LIBCPP_ABI_VERSION == 1
13+
// Set the size of this short-mode string to 116. Note that in short mode,
14+
// the size is encoded as `size << 1`.
15+
unsigned char size = 232;
16+
17+
// 23 garbage bytes for the inline string payload.
18+
char inline_buf[23] = {0};
19+
#else // _LIBCPP_ABI_VERSION == 1
20+
// Like above, but data comes first, and use bitfields to indicate size.
21+
char inline_buf[23] = {0};
22+
unsigned char size : 7 = 116;
23+
unsigned char is_long : 1 = 0;
24+
#endif // #if _LIBCPP_ABI_VERSION == 1
25+
} garbage_string_short_mode;
26+
27+
// A corrupt libcxx string in long mode with a payload that contains a utf8
28+
// sequence that's inherently too long.
29+
static unsigned char garbage_utf8_payload1[] = {
30+
250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
31+
// should fall back to ASCII printing.
32+
250, 250, 250};
33+
static struct {
34+
#if _LIBCPP_ABI_VERSION == 1
35+
uint64_t cap = 5;
36+
uint64_t size = 4;
37+
unsigned char *data = &garbage_utf8_payload1[0];
38+
#else // _LIBCPP_ABI_VERSION == 1
39+
unsigned char *data = &garbage_utf8_payload1[0];
40+
uint64_t size = 4;
41+
uint64_t cap : 63 = 4;
42+
uint64_t is_long : 1 = 1;
43+
#endif // #if _LIBCPP_ABI_VERSION == 1
44+
} garbage_string_long_mode1;
45+
46+
// A corrupt libcxx string in long mode with a payload that contains a utf8
47+
// sequence that's too long to fit in the buffer.
48+
static unsigned char garbage_utf8_payload2[] = {
49+
240, // This means that we expect a 4-byte sequence, but the buffer is too
50+
// small for this. LLDB should fall back to ASCII printing.
51+
240};
52+
static struct {
53+
#if _LIBCPP_ABI_VERSION == 1
54+
uint64_t cap = 3;
55+
uint64_t size = 2;
56+
unsigned char *data = &garbage_utf8_payload2[0];
57+
#else // _LIBCPP_ABI_VERSION == 1
58+
unsigned char *data = &garbage_utf8_payload2[0];
59+
uint64_t size = 2;
60+
uint64_t cap : 63 = 3;
61+
uint64_t is_long : 1 = 1;
62+
#endif // #if _LIBCPP_ABI_VERSION == 1
63+
} garbage_string_long_mode2;
64+
65+
// A corrupt libcxx string which has an invalid size (i.e. a size greater than
66+
// the capacity of the string).
67+
static struct {
68+
#if _LIBCPP_ABI_VERSION == 1
69+
uint64_t cap = 5;
70+
uint64_t size = 7;
71+
const char *data = "foo";
72+
#else // _LIBCPP_ABI_VERSION == 1
73+
const char *data = "foo";
74+
uint64_t size = 7;
75+
uint64_t cap : 63 = 5;
76+
uint64_t is_long : 1 = 1;
77+
#endif // #if _LIBCPP_ABI_VERSION == 1
78+
} garbage_string_long_mode3;
79+
80+
// A corrupt libcxx string in long mode with a payload that would trigger a
81+
// buffer overflow.
82+
static struct {
83+
#if _LIBCPP_ABI_VERSION == 1
84+
uint64_t cap = 5;
85+
uint64_t size = 2;
86+
uint64_t data = 0xfffffffffffffffeULL;
87+
#else // _LIBCPP_ABI_VERSION == 1
88+
uint64_t data = 0xfffffffffffffffeULL;
89+
uint64_t size = 2;
90+
uint64_t cap : 63 = 5;
91+
uint64_t is_long : 1 = 1;
92+
#endif // #if _LIBCPP_ABI_VERSION == 1
93+
} garbage_string_long_mode4;
94+
95+
int main() {
96+
std::string garbage1, garbage2, garbage3, garbage4, garbage5;
97+
if (sizeof(std::string) == sizeof(garbage_string_short_mode))
98+
memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
99+
if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
100+
memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
101+
if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
102+
memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
103+
if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
104+
memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
105+
if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
106+
memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
107+
108+
std::puts("// Set break point at this line.");
109+
return 0;
110+
}

lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ def cleanup():
4242
self.runCmd("type filter clear", check=False)
4343
self.runCmd("type synth clear", check=False)
4444

45-
is_64_bit = self.process().GetAddressByteSize() == 8
46-
4745
# Execute the cleanup function during test case tear down.
4846
self.addTearDownHook(cleanup)
4947

@@ -126,25 +124,6 @@ def cleanup():
126124
],
127125
)
128126

129-
# The test assumes that std::string is in its cap-size-data layout.
130-
is_alternate_layout = (
131-
"arm" in self.getArchitecture()
132-
) and self.platformIsDarwin()
133-
if is_64_bit and not is_alternate_layout:
134-
self.expect(
135-
"frame variable garbage1", substrs=["garbage1 = Summary Unavailable"]
136-
)
137-
self.expect(
138-
"frame variable garbage2", substrs=[r'garbage2 = "\xfa\xfa\xfa\xfa"']
139-
)
140-
self.expect("frame variable garbage3", substrs=[r'garbage3 = "\xf0\xf0"'])
141-
self.expect(
142-
"frame variable garbage4", substrs=["garbage4 = Summary Unavailable"]
143-
)
144-
self.expect(
145-
"frame variable garbage5", substrs=["garbage5 = Summary Unavailable"]
146-
)
147-
148127
# Finally, make sure that if the string is not readable, we give an error:
149128
bkpt_2 = target.BreakpointCreateBySourceRegex(
150129
"Break here to look at bad string", self.main_spec

lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp

Lines changed: 0 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,6 @@
11
#include <string>
22
#include <stdint.h>
33

4-
// For more information about libc++'s std::string ABI, see:
5-
//
6-
// https://joellaity.com/2020/01/31/string.html
7-
8-
// A corrupt string which hits the SSO code path, but has an invalid size.
9-
static struct {
10-
#if _LIBCPP_ABI_VERSION == 1
11-
// Set the size of this short-mode string to 116. Note that in short mode,
12-
// the size is encoded as `size << 1`.
13-
unsigned char size = 232;
14-
15-
// 23 garbage bytes for the inline string payload.
16-
char inline_buf[23] = {0};
17-
#else // _LIBCPP_ABI_VERSION == 1
18-
// Like above, but data comes first, and use bitfields to indicate size.
19-
char inline_buf[23] = {0};
20-
unsigned char size : 7 = 116;
21-
unsigned char is_long : 1 = 0;
22-
#endif // #if _LIBCPP_ABI_VERSION == 1
23-
} garbage_string_short_mode;
24-
25-
// A corrupt libcxx string in long mode with a payload that contains a utf8
26-
// sequence that's inherently too long.
27-
static unsigned char garbage_utf8_payload1[] = {
28-
250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
29-
// should fall back to ASCII printing.
30-
250, 250, 250
31-
};
32-
static struct {
33-
#if _LIBCPP_ABI_VERSION == 1
34-
uint64_t cap = 5;
35-
uint64_t size = 4;
36-
unsigned char *data = &garbage_utf8_payload1[0];
37-
#else // _LIBCPP_ABI_VERSION == 1
38-
unsigned char *data = &garbage_utf8_payload1[0];
39-
uint64_t size = 4;
40-
uint64_t cap : 63 = 4;
41-
uint64_t is_long : 1 = 1;
42-
#endif // #if _LIBCPP_ABI_VERSION == 1
43-
} garbage_string_long_mode1;
44-
45-
// A corrupt libcxx string in long mode with a payload that contains a utf8
46-
// sequence that's too long to fit in the buffer.
47-
static unsigned char garbage_utf8_payload2[] = {
48-
240, // This means that we expect a 4-byte sequence, but the buffer is too
49-
// small for this. LLDB should fall back to ASCII printing.
50-
240
51-
};
52-
static struct {
53-
#if _LIBCPP_ABI_VERSION == 1
54-
uint64_t cap = 3;
55-
uint64_t size = 2;
56-
unsigned char *data = &garbage_utf8_payload2[0];
57-
#else // _LIBCPP_ABI_VERSION == 1
58-
unsigned char *data = &garbage_utf8_payload2[0];
59-
uint64_t size = 2;
60-
uint64_t cap : 63 = 3;
61-
uint64_t is_long : 1 = 1;
62-
#endif // #if _LIBCPP_ABI_VERSION == 1
63-
} garbage_string_long_mode2;
64-
65-
// A corrupt libcxx string which has an invalid size (i.e. a size greater than
66-
// the capacity of the string).
67-
static struct {
68-
#if _LIBCPP_ABI_VERSION == 1
69-
uint64_t cap = 5;
70-
uint64_t size = 7;
71-
const char *data = "foo";
72-
#else // _LIBCPP_ABI_VERSION == 1
73-
const char *data = "foo";
74-
uint64_t size = 7;
75-
uint64_t cap : 63 = 5;
76-
uint64_t is_long : 1 = 1;
77-
#endif // #if _LIBCPP_ABI_VERSION == 1
78-
} garbage_string_long_mode3;
79-
80-
// A corrupt libcxx string in long mode with a payload that would trigger a
81-
// buffer overflow.
82-
static struct {
83-
#if _LIBCPP_ABI_VERSION == 1
84-
uint64_t cap = 5;
85-
uint64_t size = 2;
86-
uint64_t data = 0xfffffffffffffffeULL;
87-
#else // _LIBCPP_ABI_VERSION == 1
88-
uint64_t data = 0xfffffffffffffffeULL;
89-
uint64_t size = 2;
90-
uint64_t cap : 63 = 5;
91-
uint64_t is_long : 1 = 1;
92-
#endif // #if _LIBCPP_ABI_VERSION == 1
93-
} garbage_string_long_mode4;
94-
954
size_t touch_string(std::string &in_str)
965
{
976
return in_str.size(); // Break here to look at bad string
@@ -115,18 +24,6 @@ int main()
11524
std::u32string u32_empty(U"");
11625
std::string *null_str = nullptr;
11726

118-
std::string garbage1, garbage2, garbage3, garbage4, garbage5;
119-
if (sizeof(std::string) == sizeof(garbage_string_short_mode))
120-
memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
121-
if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
122-
memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
123-
if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
124-
memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
125-
if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
126-
memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
127-
if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
128-
memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
129-
13027
S.assign(L"!!!!!"); // Set break point at this line.
13128
std::string *not_a_string = (std::string *) 0x0;
13229
touch_string(*not_a_string);

0 commit comments

Comments
 (0)