-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Added string indexing #6329
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Added string indexing #6329
Conversation
toolchain/check/eval.cpp
Outdated
| case SemIR::BuiltinFunctionKind::StringAt:{ | ||
| //We need both the string and index to be constant |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| case SemIR::BuiltinFunctionKind::StringAt:{ | |
| //We need both the string and index to be constant | |
| case SemIR::BuiltinFunctionKind::StringAt: { | |
| // We need both the string and index to be constant |
toolchain/check/eval.cpp
Outdated
| Phase phase = Phase::Concrete; | ||
| auto str_id = GetConstantValue(eval_context, arg_ids[0], &phase); | ||
| auto index_id = GetConstantValue(eval_context, arg_ids[1], &phase); | ||
| //If either isnt constant, we can't evaluate it at compile time |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| //If either isnt constant, we can't evaluate it at compile time | |
| // If either isnt constant, we can't evaluate it at compile time |
toolchain/check/eval.cpp
Outdated
| auto str_struct = eval_context.insts().TryGetAs<SemIR::StructValue>(str_id); | ||
| if(!str_struct) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| auto str_struct = eval_context.insts().TryGetAs<SemIR::StructValue>(str_id); | |
| if(!str_struct) { | |
| auto str_struct = | |
| eval_context.insts().TryGetAs<SemIR::StructValue>(str_id); | |
| if (!str_struct) { |
toolchain/check/eval.cpp
Outdated
| eval_context.constant_values().GetInstId(ptr_const_id)); | ||
| if (!string_literal){ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| eval_context.constant_values().GetInstId(ptr_const_id)); | |
| if (!string_literal){ | |
| eval_context.constant_values().GetInstId(ptr_const_id)); | |
| if (!string_literal) { |
toolchain/check/eval.cpp
Outdated
| if (!string_literal){ | ||
| return MakeNonConstantResult(phase); | ||
| } | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
toolchain/lower/handle_call.cpp
Outdated
| auto* string_ptr_field = context.builder().CreateExtractValue( | ||
| string_value, {0}, "string.ptr"); | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| auto* string_ptr_field = context.builder().CreateExtractValue( | |
| string_value, {0}, "string.ptr"); | |
| auto* string_ptr_field = | |
| context.builder().CreateExtractValue(string_value, {0}, "string.ptr"); | |
toolchain/lower/handle_call.cpp
Outdated
|
|
||
| // Get the index value | ||
| auto* index_value = context.GetValue(arg_ids[1]); | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
toolchain/lower/handle_call.cpp
Outdated
| llvm::Type::getInt8Ty(context.llvm_context()), | ||
| string_ptr_field, index_value, "string.char_ptr"); | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| llvm::Type::getInt8Ty(context.llvm_context()), | |
| string_ptr_field, index_value, "string.char_ptr"); | |
| llvm::Type::getInt8Ty(context.llvm_context()), string_ptr_field, | |
| index_value, "string.char_ptr"); | |
toolchain/lower/handle_call.cpp
Outdated
| llvm::Type::getInt8Ty(context.llvm_context()), | ||
| char_ptr, "string.char"); | ||
|
|
||
| // Extend to i32 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| llvm::Type::getInt8Ty(context.llvm_context()), | |
| char_ptr, "string.char"); | |
| // Extend to i32 | |
| llvm::Type::getInt8Ty(context.llvm_context()), char_ptr, | |
| "string.char"); | |
| // Extend to i32 |
toolchain/lower/handle_call.cpp
Outdated
|
|
||
| // Extend to i32 | ||
| context.SetLocal(inst_id, context.builder().CreateZExt( | ||
| char_i8, context.GetTypeOfInst(inst_id), "string.char.zext")); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[diff] reported by reviewdog 🐶
| char_i8, context.GetTypeOfInst(inst_id), "string.char.zext")); | |
| char_i8, context.GetTypeOfInst(inst_id), | |
| "string.char.zext")); |
danakj
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like you might want to set up pre-commit so you can check that it will pass that before uploading: https://github.com/carbon-language/carbon-lang/blob/trunk/docs/project/contribution_tools.md#running-pre-commit
| fn At[self: Self](subscript: i32) -> Char { | ||
| return StringAt(self, subscript); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| } | |
| } | |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some unresolved comments still, like this one
toolchain/check/eval.cpp
Outdated
|
|
||
| case SemIR::BuiltinFunctionKind::PrintChar: | ||
| case SemIR::BuiltinFunctionKind::PrintInt: | ||
| case SemIR::BuiltinFunctionKind::StringAt:{ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has changed the behaviour of the other builtin functions that it's below in this switch, no?
Did you try running file_test?
Also, this change should come with some new tests (probably in check/ for compile-time use and in lower/ for run-time use).
toolchain/check/eval.cpp
Outdated
| Phase phase = Phase::Concrete; | ||
| auto str_id = GetConstantValue(eval_context, arg_ids[0], &phase); | ||
| auto index_id = GetConstantValue(eval_context, arg_ids[1], &phase); | ||
| //If either isnt constant, we can't evaluate it at compile time |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| //If either isnt constant, we can't evaluate it at compile time | |
| // If either has no constant value, we can't evaluate it at compile time. |
toolchain/check/eval.cpp
Outdated
|
|
||
| auto string_value = eval_context.sem_ir().string_literal_values().Get( | ||
| string_literal->string_literal_id); | ||
| //Get index value |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| //Get index value |
This comment just says what the code says
toolchain/lower/handle_call.cpp
Outdated
| } | ||
|
|
||
| case SemIR::BuiltinFunctionKind::StringAt: { | ||
| // Get the string argument |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // Get the string argument |
This just says what the code says
toolchain/lower/handle_call.cpp
Outdated
| auto string_inst_id = arg_ids[0]; | ||
| auto* string_arg = context.GetValue(string_inst_id); | ||
|
|
||
| // Check if this is a pointer to a String or a String value |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // Check if this is a pointer to a String or a String value | |
| // Check if this is a pointer to a String or a String value. |
All comments should be complete sentences, I won't keep commenting on them all here
|
|
||
| // Gets a character from a string at the given index. | ||
| constexpr BuiltinInfo StringAt = { | ||
| "string.at", ValidateSignature<auto(AnyType, AnySizedInt)->AnySizedInt>}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we validate that the first parameter is a ClassType of Core.String? And we could write a test with an incorrect type that fails?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will update the signature validation to require the first parameter to be a ClassType of Core.String instead of AnyType. I can also add a test that attempts to declare StringAt with an incorrect first parameter type. Regarding the test, is there an existing pattern I should follow for validating class types in built-in signatures?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Check method has a TypeId, which has the information you need. The type instruction would be a ClassType, which has a ClassId that is pointing to a Class.
toolchain/lower/handle_call.cpp
Outdated
| llvm::Value* string_value; | ||
| if (string_arg->getType()->isPointerTy()) { | ||
| auto string_type_id = context.GetTypeIdOfInst(string_inst_id); | ||
| auto* string_type = context.GetType(string_type_id); | ||
| string_value = context.builder().CreateLoad( | ||
| string_type, string_arg, "string.load"); | ||
| } else { | ||
| // Already have the struct value | ||
| string_value = string_arg; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Eval expects the argument to be a StructValue, why are we checking for pointers here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was not sure whether the String argument would be passed by value or by reference at this point in lowering. Is there a helper I should use to determine the representation? Instead of checking the LLVM type?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm I am not very familiar lower right now, but it should at least be consistent. It's being received as a "value", and classes have a pointer value-representation by default, so maybe this is always a pointer. Running the test should show you what happens, and I would expect it to be consistent.
toolchain/lower/handle_call.cpp
Outdated
| // Extend to i32 | ||
| context.SetLocal(inst_id, context.builder().CreateZExt( | ||
| char_i8, context.GetTypeOfInst(inst_id), "string.char.zext")); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this extending to i32? Isn't it going to whatever the return type was? Maybe drop the comment?
ee719f1 to
98be5b6
Compare
|
I have addressed most of the feedback.
Another issue that I came across was somehow being able to use |
I think string literals should be able to convert to |
|
|
I think this is ready to be reviewed again if possible |
|
Note, please try to avoid rebase/force push in between review iterations, as it removes the ability to see "changes since the last review" and detaches comments from their places. |
| fn At[self: Self](subscript: i32) -> Char { | ||
| return StringAt(self, subscript); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some unresolved comments still, like this one
|
|
||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| library "[[@TEST_NAME]]"; | |
| // Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
| // Exceptions. See /LICENSE for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // INCLUDE-FILE: toolchain/testing/testdata/min_prelude/full.carbon |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // INCLUDE-FILE: toolchain/testing/testdata/min_prelude/full.carbon | |
| // | |
| // INCLUDE-FILE: toolchain/testing/testdata/min_prelude/full.carbon | |
| // |
|
|
||
| fn PrintStr(msg: str) { | ||
| for (i: i32 in Core.Range(msg.Size() as i32)) { | ||
| Core.PrintChar(msg[i]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to print things to test indexing? I think we could just make a string, and then do an index into it and wrap that in //@dump-sem-ir-begin and //@dump-sem-ir-end so that we can see what semir comes out of it?
| fn Run() { | ||
| PrintStr("Hello World!\n"); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no Run in tests
| case CARBON_KIND(SemIR::ClassType class_type): { | ||
| if (IsStringType(context, class_type)) { | ||
| auto index_const_id = context.constant_values().Get(index_inst_id); | ||
| if (index_const_id.is_constant()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given these bounds checks are only done for constant values, I am wondering why they are happening here instead of inside eval, more closely matching ArrayIndex behaviour? I think then we don't need the IsStringType check in here at all?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The bounds checks happen here because string indexing goes through the IndexWith interface, not primitive array indexing. I thought to do this to catch constant index errors earlier during semantic analysis before constant evaluation.
I noticed this during tests as well that when I was checking bounds that it only threw errors for fully constant expressions "Test"[4], but it failed to throw for variable bindings let s: str = "Test"; let c: char = s[4]; Even though both the string value and index are compile-time constants. By checking here, the implementation can check bounds for both direct literal indexing and variable-bound strings with constant indices.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I fear I was not clear. I am not wondering why it's using the ArrayIndex implementation. I am wondering if the checking can be done in the implementation of StringAt in eval: https://github.com/carbon-language/carbon-lang/pull/6329/files/153ff8269fd4a57553db0ec1a000d98ebb3279a2#diff-89653437c0b9dd4925413361e1a840632f5f4dd03dc63fee83ce2d04a1586bf1R1716 we have all the information we need here to check and return an Error instead of duplicating a lot of the code here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I attempted to move the bounds checking to eval, but the StringAt builtin is never being invoked during the check phase for non-constant strings.
// --- fail_negative_index.carbon
library "[[@TEST_NAME]]";
fn TestNegativeIndex() {
let s: str = "Test";
//@dump-sem-ir-begin
let c: char = s[-1];
//@dump-sem-ir-end
}
For example, the SemIR dump from this test case
%str.as.IndexWith.impl.At.call: init %char = call %bound_method.loc6_21.2(%s.ref, %.loc6_19.2)
This call does not have the the [concrete] annotation, implying it is not being evaluated as a constant.
I suppose the issue lies in MakeConstantForCall, more specifically
// TODO: Some builtin calls might allow some operands to be non-constant.
if (!has_constant_operands) {
if (builtin_kind.IsCompTimeOnly(
eval_context.sem_ir(), eval_context.inst_blocks().Get(call.args_id),
call.type_id)) {
CARBON_DIAGNOSTIC(NonConstantCallToCompTimeOnlyFunction, Error,
"non-constant call to compile-time-only function");
CARBON_DIAGNOSTIC(CompTimeOnlyFunctionHere, Note,
"compile-time-only function declared here");
const auto& function = eval_context.functions().Get(
std::get<SemIR::CalleeFunction>(callee).function_id);
eval_context.emitter()
.Build(inst_id, NonConstantCallToCompTimeOnlyFunction)
.Note(function.latest_decl_id(), CompTimeOnlyFunctionHere)
.Emit();
}
return SemIR::ConstantId::NotConstant;
}
Since the string variable s in my test is not constant, the call returns NotConstant before ever reaching MakeConstantForBuiltinCall where the StringAt builtin is. I suppose this limitation is known based on the comment in MakeConstantForCall, however would it be worthwhile to modify MakeConstantForCall to allow StringAt (and potentially other builtins) to be invoked even when operands are not constant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Eval is constant evaluation. Each instruction has a constant value that is either constant (an instruction representing the constant result) or that is "runtime" (aka NotConstant). See ConstantId::is_constant.
If all the operands of (inputs to) the instruction are constant, then the resulting value can also be constant. That's when we execute StringAt in eval, and produce a constant value as a result. And since we have constant inputs, we can do bounds checks there.
If any of the operands of the instruction are runtime, as in the example you wrote here, then we can not evaluate to a constant value during compile (in eval). Instead, we the call instruction goes through to lower, and we generate runtime code for it.
Does that make sense?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using the following test case
// --- test_string_indexing.carbon
library "[[@TEST_NAME]]";
fn TestStringIndexing() {
let s: str = "Test";
//@dump-sem-ir-begin
letc: char = s[0];
//@dump-sem-ir-end
}
The semir shows:
%s.ref: ref %str.ee0ae5.1 = name_ref s, %s
%int_0: Core.IntLiteral = int_value 0 [concrete = constants.%int_0]
...
%str.as.IndexWith.impl.At.call: init %char = call %bound_method.loc6_20.2(%.loc6_17, %int_0)
The NameRef %s.ref does not have the [concrete] annotation, and neither does the resulting Call instruction. The index
is marked [concrete], but the call is not being evaluated as constant.
Even trying a more literal case, where the string itself is concrete
// --- fail_literal_negative_index.carbon
library "[[@TEST_NAME]]";
fn TestLiteralNegativeIndex() {
//@dump-sem-ir-begin
let c: char = "Test"[-1];
//@dump-sem-ir-end
}
Generates this semir
%str: %ptr.fb0 = string_literal "Test" [concrete = constants.%str.0a6]
...
%String.val: %str.ee0ae5.1 = struct_value (%str, %int_4) [concrete = constants.%String.val]
...
str.as.IndexWith.impl.At.call: init %char = call %bound_method.loc9_26.2(%String.val, %.loc9_24.2)
The call itself still is not being evaluated as constant. Is there something preventing interface method calls from being constant evaluated?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In your first example:
let s: str = "Test";
This is a runtime value. a : denotes a runtime binding.
Is there something preventing interface method calls from being constant evaluated?
Here's another example of an interface with builtin methods:
carbon-lang/core/prelude/types/int.carbon
Lines 52 to 55 in 315b0ac
| final impl forall [N:! IntLiteral(), M:! IntLiteral()] Int(N) as EqWith(Int(M)) { | |
| fn Equal[self: Self](other: Int(M)) -> bool = "int.eq"; | |
| fn NotEqual[self: Self](other: Int(M)) -> bool = "int.neq"; | |
| } |
Here is us calling that to generate a compile-time constant value: https://carbon.godbolt.org/z/jnfTcc4ee (the semir gets truncated)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this might help:
The [concrete = on the RHS of the textual semir is the constant value that was output from eval. The call instruction is going into eval, and it is not returning a concrete constant value. It looks like it's returning ConstantId::NotConstant, and you'll probably want to run things in a debugger to figure out what exactly is going on.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Heres what I found, note output from lldb
TEST: toolchain/check/testdata/operators/overloaded/string_indexing.carbon (lldb) p builtin_kind
(Carbon::SemIR::BuiltinFunctionKind) $0 = {
Carbon::Internal::EnumBase<Carbon::SemIR::BuiltinFunctionKind, Carbon::SemIR::Internal::BuiltinFunctionKindData::RawEnum, const llvm::StringLiteral *> = (value_ = None)
}
The test attempts to evaluate s[0], which would resolve to the current implementation of At within the IndexWith interface. However, when the evaluator reaches eval, we hit this check
if (builtin_kind == SemIR::BuiltinFunctionKind::None) {
// TODO: Eventually we'll want to treat some kinds of non-builtin
// functions as producing constants.
return SemIR::ConstantId::NotConstant;
}
At is not a builtin, so we get back NotConstant thereby not hitting StringAt all.
Is there a way to make the constant evaluator "see through" the At wrapper to reach the StringAt builtin?
(Apologies for all the questions on this PR I really appreciate your patience!)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, right. Note the different way that EqWith is written:
fn At[self: Self](subscript: T) -> Char {
return StringAt(self, subscript);
} fn Equal[self: Self](other: UInt(M)) -> bool = "int.eq";Instead of writing a function that calls a builtin internally, we should make the At function into the builtin. Sorry I overlooked that this whole time, I didn't forsee that issue.
| SemIR::InstId index_inst_id, | ||
| const llvm::APInt& index_int) -> void { | ||
| if (index_int.isNegative()) { | ||
| CARBON_DIAGNOSTIC(ArrayIndexNegative, Error, "index `{0}` is negative.", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| CARBON_DIAGNOSTIC(ArrayIndexNegative, Error, "index `{0}` is negative.", | |
| CARBON_DIAGNOSTIC(StringIndexNegative, Error, "index `{0}` is negative.", |
| return true; | ||
| } | ||
| }; | ||
| // Constraint that checks if a type is Core.String. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // Constraint that checks if a type is Core.String. | |
| // Constraint that checks if a type is Core.String. |
| auto type_inst_id = sem_ir.types().GetInstId(type_id); | ||
| auto class_type = sem_ir.insts().TryGetAs<ClassType>(type_inst_id); | ||
| if (!class_type) { | ||
| // Not a string. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // Not a string. |
| } | ||
|
|
||
| const auto& class_info = sem_ir.classes().Get(class_type->class_id); | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemented string indexing for Core.string
Handles references or struct values. No runtime checks as per https://discord.com/channels/655572317891461132/655578254970716160/1431015866270748682
Part of #6270