Skip to content

Commit ddd390b

Browse files
committed
Implemented string indexing for Core.String (#6270)
1 parent ce10970 commit ddd390b

File tree

6 files changed

+133
-0
lines changed

6 files changed

+133
-0
lines changed

core/prelude/types/string.carbon

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,13 @@ import library "prelude/copy";
88
import library "prelude/destroy";
99
import library "prelude/types/char";
1010
import library "prelude/types/uint";
11+
import library "prelude/operators/index";
12+
import library "prelude/types/int";
13+
14+
class String;
15+
16+
// Forward declaration for builtin function
17+
fn StringAt(s: String, index: i32) -> Char;
1118

1219
class String {
1320
fn Size[self: Self]() -> u64 { return self.size; }
@@ -16,8 +23,15 @@ class String {
1623
fn Op[self: Self]() -> Self { return {.ptr = self.ptr, .size = self.size}; }
1724
}
1825

26+
impl as IndexWith(i32) where .ElementType = Char {
27+
fn At[self: Self](subscript: i32) -> Char {
28+
return StringAt(self, subscript);
29+
}
30+
}
1931
// TODO: This should be an array iterator.
2032
private var ptr: Char*;
2133
// TODO: This should be a word-sized integer.
2234
private var size: u64;
2335
}
36+
37+
fn StringAt(s: String, index: i32) -> Char = "string.at";

toolchain/check/eval.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,76 @@ static auto MakeConstantForBuiltinCall(EvalContext& eval_context,
16891689

16901690
case SemIR::BuiltinFunctionKind::PrintChar:
16911691
case SemIR::BuiltinFunctionKind::PrintInt:
1692+
case SemIR::BuiltinFunctionKind::StringAt:{
1693+
//We need both the string and index to be constant
1694+
Phase phase = Phase::Concrete;
1695+
auto str_id = GetConstantValue(eval_context, arg_ids[0], &phase);
1696+
auto index_id = GetConstantValue(eval_context, arg_ids[1], &phase);
1697+
//If either isnt constant, we can't evaluate it at compile time
1698+
if (!str_id.has_value() || !index_id.has_value()) {
1699+
return MakeNonConstantResult(phase);
1700+
}
1701+
1702+
auto str_struct = eval_context.insts().TryGetAs<SemIR::StructValue>(str_id);
1703+
if(!str_struct) {
1704+
return MakeNonConstantResult(phase);
1705+
}
1706+
1707+
auto elements = eval_context.inst_blocks().Get(str_struct->elements_id);
1708+
if (elements.size() != 2) {
1709+
return MakeNonConstantResult(phase);
1710+
}
1711+
1712+
auto ptr_const_id = eval_context.constant_values().Get(elements[0]);
1713+
auto string_literal = eval_context.insts().TryGetAs<SemIR::StringLiteral>(
1714+
eval_context.constant_values().GetInstId(ptr_const_id));
1715+
if (!string_literal){
1716+
return MakeNonConstantResult(phase);
1717+
}
1718+
1719+
auto string_value = eval_context.sem_ir().string_literal_values().Get(
1720+
string_literal->string_literal_id);
1721+
//Get index value
1722+
auto index_inst = eval_context.insts().TryGetAs<SemIR::IntValue>(index_id);
1723+
if (!index_inst) {
1724+
CARBON_CHECK(phase != Phase::Concrete, "Concrete constant integer should be a literal");
1725+
return MakeNonConstantResult(phase);
1726+
}
1727+
1728+
const auto& index_val = eval_context.ints().Get(index_inst->int_id);
1729+
1730+
//Check bounds
1731+
if (index_val.isNegative()){
1732+
CARBON_DIAGNOSTIC(ArrayIndexNegative, Error, "index `{0}` is negative",
1733+
TypedInt);
1734+
eval_context.emitter().Emit(
1735+
eval_context.GetDiagnosticLoc(index_id), ArrayIndexNegative,
1736+
{.type = eval_context.insts().Get(index_id).type_id(),
1737+
.value = index_val});
1738+
return SemIR::ErrorInst::ConstantId;
1739+
}
1740+
1741+
//Check for out of bounds
1742+
if (index_val.getActiveBits() > 64 ||
1743+
index_val.getZExtValue() >= string_value.size()) {
1744+
CARBON_DIAGNOSTIC(ArrayIndexOutOfBounds, Error,
1745+
"string index `{0}` is past the end of the string",
1746+
TypedInt);
1747+
eval_context.emitter().Emit(
1748+
eval_context.GetDiagnosticLoc(index_id), ArrayIndexOutOfBounds,
1749+
{.type = eval_context.insts().Get(index_id).type_id(),
1750+
.value = index_val});
1751+
return SemIR::ErrorInst::ConstantId;
1752+
}
1753+
1754+
auto char_value = static_cast<uint8_t>(string_value[index_val.getZExtValue()]);
1755+
1756+
auto int_id = eval_context.ints().Add(llvm::APSInt(llvm::APInt(32, char_value), /*isUnsigned=*/false));
1757+
return MakeConstantResult(eval_context.context(),
1758+
SemIR::IntValue{.type_id = call.type_id, .int_id = int_id},
1759+
phase);
1760+
1761+
}
16921762
case SemIR::BuiltinFunctionKind::ReadChar:
16931763
case SemIR::BuiltinFunctionKind::FloatAddAssign:
16941764
case SemIR::BuiltinFunctionKind::FloatSubAssign:

toolchain/diagnostics/diagnostic_kind.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,7 @@ CARBON_DIAGNOSTIC_KIND(AddrOnNonSelfParam)
397397
CARBON_DIAGNOSTIC_KIND(AddrOnNonPointerType)
398398
CARBON_DIAGNOSTIC_KIND(ArrayBoundTooLarge)
399399
CARBON_DIAGNOSTIC_KIND(ArrayBoundNegative)
400+
CARBON_DIAGNOSTIC_KIND(ArrayIndexNegative)
400401
CARBON_DIAGNOSTIC_KIND(ArrayIndexOutOfBounds)
401402
CARBON_DIAGNOSTIC_KIND(ArrayInitFromLiteralArgCountMismatch)
402403
CARBON_DIAGNOSTIC_KIND(ArrayInitFromExprArgCountMismatch)

toolchain/lower/handle_call.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,49 @@ static auto HandleBuiltinCall(FunctionContext& context, SemIR::InstId inst_id,
318318
return;
319319
}
320320

321+
case SemIR::BuiltinFunctionKind::StringAt: {
322+
// Get the string argument
323+
auto string_inst_id = arg_ids[0];
324+
auto* string_arg = context.GetValue(string_inst_id);
325+
326+
// Check if this is a pointer to a String or a String value
327+
// If it's a pointer (from a reference), load it first
328+
llvm::Value* string_value;
329+
if (string_arg->getType()->isPointerTy()) {
330+
// Load the struct from the pointer
331+
auto string_type_id = context.GetTypeIdOfInst(string_inst_id);
332+
auto* string_type = context.GetType(string_type_id);
333+
string_value = context.builder().CreateLoad(
334+
string_type, string_arg, "string.load");
335+
} else {
336+
// Already have the struct value
337+
string_value = string_arg;
338+
}
339+
340+
// String is a struct with ptr and size fields
341+
// Extract the pointer field (field 0)
342+
auto* string_ptr_field = context.builder().CreateExtractValue(
343+
string_value, {0}, "string.ptr");
344+
345+
// Get the index value
346+
auto* index_value = context.GetValue(arg_ids[1]);
347+
348+
// Compute the address of the character at the given index
349+
auto* char_ptr = context.builder().CreateInBoundsGEP(
350+
llvm::Type::getInt8Ty(context.llvm_context()),
351+
string_ptr_field, index_value, "string.char_ptr");
352+
353+
// Load the character
354+
auto* char_i8 = context.builder().CreateLoad(
355+
llvm::Type::getInt8Ty(context.llvm_context()),
356+
char_ptr, "string.char");
357+
358+
// Extend to i32 (Char type in Carbon)
359+
context.SetLocal(inst_id, context.builder().CreateZExt(
360+
char_i8, context.GetTypeOfInst(inst_id), "string.char.zext"));
361+
return;
362+
}
363+
321364
case SemIR::BuiltinFunctionKind::TypeAnd: {
322365
context.SetLocal(inst_id, context.GetTypeAsValue());
323366
return;

toolchain/sem_ir/builtin_function_kind.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,10 @@ constexpr BuiltinInfo PrintInt = {
322322
constexpr BuiltinInfo ReadChar = {"read.char",
323323
ValidateSignature<auto()->AnySizedInt>};
324324

325+
// Gets a character from a string at the given index.
326+
constexpr BuiltinInfo StringAt = {
327+
"string.at", ValidateSignature<auto(AnyType, AnySizedInt)->AnySizedInt>};
328+
325329
// Returns the `Core.CharLiteral` type.
326330
constexpr BuiltinInfo CharLiteralMakeType = {"char_literal.make_type",
327331
ValidateSignature<auto()->Type>};

toolchain/sem_ir/builtin_function_kind.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ CARBON_SEM_IR_BUILTIN_FUNCTION_KIND(PrimitiveCopy)
3030
CARBON_SEM_IR_BUILTIN_FUNCTION_KIND(PrintChar)
3131
CARBON_SEM_IR_BUILTIN_FUNCTION_KIND(PrintInt)
3232
CARBON_SEM_IR_BUILTIN_FUNCTION_KIND(ReadChar)
33+
CARBON_SEM_IR_BUILTIN_FUNCTION_KIND(StringAt)
3334

3435
// Type factories.
3536
CARBON_SEM_IR_BUILTIN_FUNCTION_KIND(CharLiteralMakeType)

0 commit comments

Comments
 (0)