Skip to content

Commit 41ed82e

Browse files
authored
Add basic support for strings to core, check, and lower. (#5963)
Add a `Core.String` class to the prelude representing a string view, and rename the `String` keyword to `str` and make it evaluate to `Core.String`. `Core.String` is represented as a pair of a pointer to a character (actually, to the first character of a string, but we don't have a way of modeling that yet) and a size (which should be pointer-width, but is currently always a `u64` as we don't have a `usize` equivalent yet). `Core.String` values are generated directly by the toolchain for string literal expressions. This follows the direction established at the recent summit, but the design implemented here has not been through the proposal process yet.
1 parent 8d08e77 commit 41ed82e

39 files changed

+543
-516
lines changed

core/prelude/types.carbon

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ export import library "prelude/types/float_literal";
1111
export import library "prelude/types/int";
1212
export import library "prelude/types/int_literal";
1313
export import library "prelude/types/optional";
14+
export import library "prelude/types/string";
1415
export import library "prelude/types/uint";

core/prelude/types/string.carbon

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
2+
// Exceptions. See /LICENSE for license information.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
package Core library "prelude/types/string";
6+
7+
import library "prelude/destroy";
8+
import library "prelude/types/char";
9+
import library "prelude/types/uint";
10+
11+
class String {
12+
// TODO: This should be an array iterator.
13+
private var ptr: Char*;
14+
// TODO: This should be a word-sized integer.
15+
private var size: u64;
16+
}

toolchain/check/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ cc_library(
126126
"//toolchain/base:canonical_value_store",
127127
"//toolchain/base:index_base",
128128
"//toolchain/base:kind_switch",
129+
"//toolchain/base:value_ids",
129130
"//toolchain/base:value_store",
130131
"//toolchain/check:generic_region_stack",
131132
"//toolchain/check:scope_stack",

toolchain/check/handle_literal.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "toolchain/check/call.h"
88
#include "toolchain/check/context.h"
9+
#include "toolchain/check/convert.h"
910
#include "toolchain/check/handle.h"
1011
#include "toolchain/check/inst.h"
1112
#include "toolchain/check/literal.h"
@@ -66,11 +67,11 @@ auto HandleParseNode(Context& context, Parse::RealLiteralId node_id) -> bool {
6667
}
6768

6869
auto HandleParseNode(Context& context, Parse::StringLiteralId node_id) -> bool {
69-
AddInstAndPush<SemIR::StringLiteral>(
70-
context, node_id,
71-
{.type_id = GetSingletonType(context, SemIR::StringType::TypeInstId),
72-
.string_literal_id = context.tokens().GetStringLiteralValue(
73-
context.parse_tree().node_token(node_id))});
70+
auto str_literal_id =
71+
MakeStringLiteral(context, node_id,
72+
context.tokens().GetStringLiteralValue(
73+
context.parse_tree().node_token(node_id)));
74+
context.node_stack().Push(node_id, str_literal_id);
7475
return true;
7576
}
7677

@@ -128,7 +129,8 @@ auto HandleParseNode(Context& context, Parse::FloatTypeLiteralId node_id)
128129

129130
auto HandleParseNode(Context& context, Parse::StringTypeLiteralId node_id)
130131
-> bool {
131-
context.node_stack().Push(node_id, SemIR::StringType::TypeInstId);
132+
auto type_inst_id = MakeStringTypeLiteral(context, node_id);
133+
context.node_stack().Push(node_id, type_inst_id);
132134
return true;
133135
}
134136

toolchain/check/literal.cpp

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "toolchain/check/convert.h"
1010
#include "toolchain/check/name_lookup.h"
1111
#include "toolchain/check/type.h"
12+
#include "toolchain/check/type_completion.h"
13+
#include "toolchain/diagnostics/diagnostic.h"
1214
#include "toolchain/lex/token_info.h"
1315
#include "toolchain/sem_ir/ids.h"
1416

@@ -44,4 +46,128 @@ auto MakeFloatTypeLiteral(Context& context, Parse::NodeId node_id,
4446
return PerformCall(context, node_id, fn_inst_id, {width_id});
4547
}
4648

49+
namespace {
50+
// The extracted representation of the type `Core.String`.
51+
struct StringRepr {
52+
SemIR::TypeId ptr_field_type_id;
53+
SemIR::TypeId size_field_type_id;
54+
SemIR::TypeStore::IntTypeInfo size_field_type_info;
55+
};
56+
} // namespace
57+
58+
// Extracts information about the representation of the `Core.String` type
59+
// necessary for building a string literal.
60+
static auto GetStringLiteralRepr(Context& context, SemIR::LocId loc_id,
61+
SemIR::TypeId type_id)
62+
-> std::optional<StringRepr> {
63+
// The object representation should be a struct type.
64+
auto object_repr_id = context.types().GetObjectRepr(type_id);
65+
auto struct_repr =
66+
context.types().TryGetAs<SemIR::StructType>(object_repr_id);
67+
if (!struct_repr) {
68+
return std::nullopt;
69+
}
70+
71+
// The struct should have two fields.
72+
auto fields = context.struct_type_fields().Get(struct_repr->fields_id);
73+
if (fields.size() != 2) {
74+
return std::nullopt;
75+
}
76+
77+
// The first field should be a pointer to 8-bit integers.
78+
auto ptr_type =
79+
context.insts().TryGetAs<SemIR::PointerType>(fields[0].type_inst_id);
80+
if (!ptr_type) {
81+
return std::nullopt;
82+
}
83+
auto pointee_type_id =
84+
context.types().GetTypeIdForTypeInstId(ptr_type->pointee_id);
85+
if (!TryToCompleteType(context, pointee_type_id, loc_id)) {
86+
return std::nullopt;
87+
}
88+
auto elem_type_info = context.types().TryGetIntTypeInfo(pointee_type_id);
89+
if (!elem_type_info || context.ints().Get(elem_type_info->bit_width) != 8) {
90+
return std::nullopt;
91+
}
92+
93+
// The second field should be an integer type.
94+
auto size_field_type_id =
95+
context.types().GetTypeIdForTypeInstId(fields[1].type_inst_id);
96+
auto size_type_info = context.types().TryGetIntTypeInfo(size_field_type_id);
97+
if (!size_type_info) {
98+
return std::nullopt;
99+
}
100+
101+
return StringRepr{.ptr_field_type_id = context.types().GetTypeIdForTypeInstId(
102+
fields[0].type_inst_id),
103+
.size_field_type_id = size_field_type_id,
104+
.size_field_type_info = *size_type_info};
105+
}
106+
107+
auto MakeStringLiteral(Context& context, Parse::StringLiteralId node_id,
108+
StringLiteralValueId value_id) -> SemIR::InstId {
109+
auto str_type_id = MakeStringType(context, node_id);
110+
if (!RequireCompleteType(context, str_type_id, node_id, [&] {
111+
CARBON_DIAGNOSTIC(StringLiteralTypeIncomplete, Error,
112+
"type {0} is incomplete", SemIR::TypeId);
113+
return context.emitter().Build(node_id, StringLiteralTypeIncomplete,
114+
str_type_id);
115+
})) {
116+
return SemIR::ErrorInst::InstId;
117+
}
118+
119+
auto repr = GetStringLiteralRepr(context, node_id, str_type_id);
120+
if (!repr) {
121+
if (str_type_id != SemIR::ErrorInst::TypeId) {
122+
CARBON_DIAGNOSTIC(StringLiteralTypeUnexpected, Error,
123+
"unexpected representation for type {0}",
124+
SemIR::TypeId);
125+
context.emitter().Emit(node_id, StringLiteralTypeUnexpected, str_type_id);
126+
}
127+
return SemIR::ErrorInst::InstId;
128+
}
129+
130+
// The pointer field is a `StringLiteral` object.
131+
// TODO: Perhaps `StringLiteral` should instead produce a durable reference,
132+
// and we should take its address here?
133+
auto ptr_value_id = AddInst<SemIR::StringLiteral>(
134+
context, node_id,
135+
{.type_id = repr->ptr_field_type_id, .string_literal_id = value_id});
136+
137+
// The size field is an integer literal.
138+
auto size = context.string_literal_values().Get(value_id).size();
139+
if (repr->size_field_type_info.bit_width.has_value()) {
140+
// Check that the size value fits in the size field.
141+
auto width = context.ints()
142+
.Get(repr->size_field_type_info.bit_width)
143+
.getLimitedValue();
144+
if (repr->size_field_type_info.is_signed ? !llvm::isIntN(width, size)
145+
: !llvm::isUIntN(width, size)) {
146+
CARBON_DIAGNOSTIC(StringLiteralTooLong, Error,
147+
"string literal is too long");
148+
context.emitter().Emit(node_id, StringLiteralTooLong);
149+
return SemIR::ErrorInst::InstId;
150+
}
151+
}
152+
auto size_value_id =
153+
AddInst<SemIR::IntValue>(context, node_id,
154+
{.type_id = repr->size_field_type_id,
155+
.int_id = context.ints().Add(size)});
156+
157+
// Build the representation struct.
158+
auto elements_id = context.inst_blocks().Add({ptr_value_id, size_value_id});
159+
return AddInst<SemIR::StructValue>(
160+
context, node_id, {.type_id = str_type_id, .elements_id = elements_id});
161+
}
162+
163+
auto MakeStringTypeLiteral(Context& context, Parse::NodeId node_id)
164+
-> SemIR::InstId {
165+
return LookupNameInCore(context, node_id, "String");
166+
}
167+
168+
auto MakeStringType(Context& context, Parse::NodeId node_id) -> SemIR::TypeId {
169+
auto type_inst_id = MakeStringTypeLiteral(context, node_id);
170+
return ExprAsType(context, node_id, type_inst_id).type_id;
171+
}
172+
47173
} // namespace Carbon::Check

toolchain/check/literal.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#ifndef CARBON_TOOLCHAIN_CHECK_LITERAL_H_
66
#define CARBON_TOOLCHAIN_CHECK_LITERAL_H_
77

8+
#include "toolchain/base/value_ids.h"
89
#include "toolchain/check/context.h"
910
#include "toolchain/lex/token_info.h"
1011
#include "toolchain/sem_ir/ids.h"
@@ -29,6 +30,17 @@ auto MakeIntType(Context& context, Parse::NodeId node_id,
2930
auto MakeFloatTypeLiteral(Context& context, Parse::NodeId node_id,
3031
IntId size_id) -> SemIR::InstId;
3132

33+
// Forms a string literal value instruction for a given string literal.
34+
auto MakeStringLiteral(Context& context, Parse::StringLiteralId node_id,
35+
StringLiteralValueId value_id) -> SemIR::InstId;
36+
37+
// Forms a string literal type expression for a `str` literal.
38+
auto MakeStringTypeLiteral(Context& context, Parse::NodeId node_id)
39+
-> SemIR::InstId;
40+
41+
// Forms a string type.
42+
auto MakeStringType(Context& context, Parse::NodeId node_id) -> SemIR::TypeId;
43+
3244
} // namespace Carbon::Check
3345

3446
#endif // CARBON_TOOLCHAIN_CHECK_LITERAL_H_

toolchain/check/testdata/array/element_mismatches.carbon

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ library "[[@TEST_NAME]]";
1616

1717
class C {}
1818

19-
// CHECK:STDERR: fail_arg_wrong_type.carbon:[[@LINE+7]]:22: error: cannot implicitly convert expression of type `String` to `C` [ConversionFailure]
20-
// CHECK:STDERR: var a: array(C, 3) = ({}, "Hello", "World");
21-
// CHECK:STDERR: ^~~~~~~~~~~~~~~~~~~~~~
22-
// CHECK:STDERR: fail_arg_wrong_type.carbon:[[@LINE+4]]:22: note: type `String` does not implement interface `Core.ImplicitAs(C)` [MissingImplInMemberAccessNote]
23-
// CHECK:STDERR: var a: array(C, 3) = ({}, "Hello", "World");
24-
// CHECK:STDERR: ^~~~~~~~~~~~~~~~~~~~~~
19+
// CHECK:STDERR: fail_arg_wrong_type.carbon:[[@LINE+7]]:22: error: cannot implicitly convert expression of type `()` to `C` [ConversionFailure]
20+
// CHECK:STDERR: var a: array(C, 3) = ({}, (), true);
21+
// CHECK:STDERR: ^~~~~~~~~~~~~~
22+
// CHECK:STDERR: fail_arg_wrong_type.carbon:[[@LINE+4]]:22: note: type `()` does not implement interface `Core.ImplicitAs(C)` [MissingImplInMemberAccessNote]
23+
// CHECK:STDERR: var a: array(C, 3) = ({}, (), true);
24+
// CHECK:STDERR: ^~~~~~~~~~~~~~
2525
// CHECK:STDERR:
26-
var a: array(C, 3) = ({}, "Hello", "World");
26+
var a: array(C, 3) = ({}, (), true);
2727

2828
// --- fail_var_wrong_type.carbon
2929

toolchain/check/testdata/for/actual.carbon

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -222,28 +222,28 @@ fn Read() {
222222
// CHECK:STDOUT: %Core.import_ref.1c9: %Iterate.assoc_type = import_ref Core//prelude/iterate, loc12_18, loaded [concrete = constants.%assoc0.724]
223223
// CHECK:STDOUT: %Core.import_ref.ed6: %Iterate.assoc_type = import_ref Core//prelude/iterate, loc13_17, loaded [concrete = constants.%assoc1.02e]
224224
// CHECK:STDOUT: %Core.import_ref.9e6: type = import_ref Core//prelude/iterate, loc13_17, loaded [concrete = %CursorType]
225-
// CHECK:STDOUT: %Core.import_ref.f49: @Optional.%Optional.None.type (%Optional.None.type.ef2) = import_ref Core//prelude/iterate, inst138 [indirect], loaded [symbolic = @Optional.%Optional.None (constants.%Optional.None.fd6)]
226-
// CHECK:STDOUT: %Core.import_ref.1a8: @Optional.%Optional.Some.type (%Optional.Some.type.b2c) = import_ref Core//prelude/iterate, inst139 [indirect], loaded [symbolic = @Optional.%Optional.Some (constants.%Optional.Some.d0d)]
227-
// CHECK:STDOUT: %Core.import_ref.36a9: @Optional.as.Destroy.impl.%Optional.as.Destroy.impl.Op.type (%Optional.as.Destroy.impl.Op.type.764) = import_ref Core//prelude/iterate, inst6889 [indirect], loaded [symbolic = @Optional.as.Destroy.impl.%Optional.as.Destroy.impl.Op (constants.%Optional.as.Destroy.impl.Op.bf8)]
225+
// CHECK:STDOUT: %Core.import_ref.f49: @Optional.%Optional.None.type (%Optional.None.type.ef2) = import_ref Core//prelude/iterate, inst139 [indirect], loaded [symbolic = @Optional.%Optional.None (constants.%Optional.None.fd6)]
226+
// CHECK:STDOUT: %Core.import_ref.1a8: @Optional.%Optional.Some.type (%Optional.Some.type.b2c) = import_ref Core//prelude/iterate, inst140 [indirect], loaded [symbolic = @Optional.%Optional.Some (constants.%Optional.Some.d0d)]
227+
// CHECK:STDOUT: %Core.import_ref.36a9: @Optional.as.Destroy.impl.%Optional.as.Destroy.impl.Op.type (%Optional.as.Destroy.impl.Op.type.764) = import_ref Core//prelude/iterate, inst6890 [indirect], loaded [symbolic = @Optional.as.Destroy.impl.%Optional.as.Destroy.impl.Op (constants.%Optional.as.Destroy.impl.Op.bf8)]
228228
// CHECK:STDOUT: %Destroy.impl_witness_table.2ff = impl_witness_table (%Core.import_ref.36a9), @Optional.as.Destroy.impl [concrete]
229-
// CHECK:STDOUT: %Core.import_ref.cf4: @Core.IntLiteral.as.ImplicitAs.impl.%Core.IntLiteral.as.ImplicitAs.impl.Convert.type (%Core.IntLiteral.as.ImplicitAs.impl.Convert.type.0f9) = import_ref Core//prelude/iterate, inst482 [indirect], loaded [symbolic = @Core.IntLiteral.as.ImplicitAs.impl.%Core.IntLiteral.as.ImplicitAs.impl.Convert (constants.%Core.IntLiteral.as.ImplicitAs.impl.Convert.f06)]
229+
// CHECK:STDOUT: %Core.import_ref.cf4: @Core.IntLiteral.as.ImplicitAs.impl.%Core.IntLiteral.as.ImplicitAs.impl.Convert.type (%Core.IntLiteral.as.ImplicitAs.impl.Convert.type.0f9) = import_ref Core//prelude/iterate, inst483 [indirect], loaded [symbolic = @Core.IntLiteral.as.ImplicitAs.impl.%Core.IntLiteral.as.ImplicitAs.impl.Convert (constants.%Core.IntLiteral.as.ImplicitAs.impl.Convert.f06)]
230230
// CHECK:STDOUT: %ImplicitAs.impl_witness_table.2b9 = impl_witness_table (%Core.import_ref.cf4), @Core.IntLiteral.as.ImplicitAs.impl [concrete]
231-
// CHECK:STDOUT: %Core.import_ref.741: @Int.as.Destroy.impl.%Int.as.Destroy.impl.Op.type (%Int.as.Destroy.impl.Op.type) = import_ref Core//prelude/iterate, inst450 [indirect], loaded [symbolic = @Int.as.Destroy.impl.%Int.as.Destroy.impl.Op (constants.%Int.as.Destroy.impl.Op)]
231+
// CHECK:STDOUT: %Core.import_ref.741: @Int.as.Destroy.impl.%Int.as.Destroy.impl.Op.type (%Int.as.Destroy.impl.Op.type) = import_ref Core//prelude/iterate, inst451 [indirect], loaded [symbolic = @Int.as.Destroy.impl.%Int.as.Destroy.impl.Op (constants.%Int.as.Destroy.impl.Op)]
232232
// CHECK:STDOUT: %Destroy.impl_witness_table.1b4 = impl_witness_table (%Core.import_ref.741), @Int.as.Destroy.impl [concrete]
233-
// CHECK:STDOUT: %Core.import_ref.19a: @OrderedWith.%OrderedWith.assoc_type (%OrderedWith.assoc_type.03c) = import_ref Core//prelude/iterate, inst862 [indirect], loaded [symbolic = @OrderedWith.%assoc0 (constants.%assoc0.5db)]
234-
// CHECK:STDOUT: %Core.import_ref.b2b: @Int.as.OrderedWith.impl.db3.%Int.as.OrderedWith.impl.Less.type (%Int.as.OrderedWith.impl.Less.type.2c7) = import_ref Core//prelude/iterate, inst951 [indirect], loaded [symbolic = @Int.as.OrderedWith.impl.db3.%Int.as.OrderedWith.impl.Less (constants.%Int.as.OrderedWith.impl.Less.a5a)]
235-
// CHECK:STDOUT: %Core.import_ref.ab6 = import_ref Core//prelude/iterate, inst952 [indirect], unloaded
236-
// CHECK:STDOUT: %Core.import_ref.875 = import_ref Core//prelude/iterate, inst953 [indirect], unloaded
237-
// CHECK:STDOUT: %Core.import_ref.82b = import_ref Core//prelude/iterate, inst954 [indirect], unloaded
233+
// CHECK:STDOUT: %Core.import_ref.19a: @OrderedWith.%OrderedWith.assoc_type (%OrderedWith.assoc_type.03c) = import_ref Core//prelude/iterate, inst863 [indirect], loaded [symbolic = @OrderedWith.%assoc0 (constants.%assoc0.5db)]
234+
// CHECK:STDOUT: %Core.import_ref.b2b: @Int.as.OrderedWith.impl.db3.%Int.as.OrderedWith.impl.Less.type (%Int.as.OrderedWith.impl.Less.type.2c7) = import_ref Core//prelude/iterate, inst952 [indirect], loaded [symbolic = @Int.as.OrderedWith.impl.db3.%Int.as.OrderedWith.impl.Less (constants.%Int.as.OrderedWith.impl.Less.a5a)]
235+
// CHECK:STDOUT: %Core.import_ref.ab6 = import_ref Core//prelude/iterate, inst953 [indirect], unloaded
236+
// CHECK:STDOUT: %Core.import_ref.875 = import_ref Core//prelude/iterate, inst954 [indirect], unloaded
237+
// CHECK:STDOUT: %Core.import_ref.82b = import_ref Core//prelude/iterate, inst955 [indirect], unloaded
238238
// CHECK:STDOUT: %OrderedWith.impl_witness_table.476 = impl_witness_table (%Core.import_ref.b2b, %Core.import_ref.ab6, %Core.import_ref.875, %Core.import_ref.82b), @Int.as.OrderedWith.impl.db3 [concrete]
239-
// CHECK:STDOUT: %Core.import_ref.13d: @OrderedWith.%OrderedWith.Less.type (%OrderedWith.Less.type.f19) = import_ref Core//prelude/iterate, inst1926 [indirect], loaded [symbolic = @OrderedWith.%OrderedWith.Less (constants.%OrderedWith.Less.02e)]
239+
// CHECK:STDOUT: %Core.import_ref.13d: @OrderedWith.%OrderedWith.Less.type (%OrderedWith.Less.type.f19) = import_ref Core//prelude/iterate, inst1927 [indirect], loaded [symbolic = @OrderedWith.%OrderedWith.Less (constants.%OrderedWith.Less.02e)]
240240
// CHECK:STDOUT: %CursorType: type = assoc_const_decl @CursorType [concrete] {}
241241
// CHECK:STDOUT: %Core.import_ref.4f9: type = import_ref Core//prelude/iterate, loc12_18, loaded [concrete = %ElementType]
242242
// CHECK:STDOUT: %ElementType: type = assoc_const_decl @ElementType [concrete] {}
243243
// CHECK:STDOUT: %Core.Optional: %Optional.type = import_ref Core//prelude/types/optional, Optional, loaded [concrete = constants.%Optional.generic]
244244
// CHECK:STDOUT: %Core.Destroy: type = import_ref Core//prelude/destroy, Destroy, loaded [concrete = constants.%Destroy.type]
245245
// CHECK:STDOUT: %Core.OrderedWith: %OrderedWith.type.270 = import_ref Core//prelude/operators/comparison, OrderedWith, loaded [concrete = constants.%OrderedWith.generic]
246-
// CHECK:STDOUT: %Core.import_ref.d49 = import_ref Core//prelude/iterate, inst6644 [indirect], unloaded
246+
// CHECK:STDOUT: %Core.import_ref.d49 = import_ref Core//prelude/iterate, inst6645 [indirect], unloaded
247247
// CHECK:STDOUT: %Core.Inc: type = import_ref Core//prelude/operators/arithmetic, Inc, loaded [concrete = constants.%Inc.type]
248248
// CHECK:STDOUT: %Core.ImplicitAs: %ImplicitAs.type.cc7 = import_ref Core//prelude/operators/as, ImplicitAs, loaded [concrete = constants.%ImplicitAs.generic]
249249
// CHECK:STDOUT: }

0 commit comments

Comments
 (0)